diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml
index ee18d0d38..54eee564c 100644
--- a/.github/workflows/deploy-staging.yml
+++ b/.github/workflows/deploy-staging.yml
@@ -7,6 +7,9 @@ name: Deploy Staging
 # Both update the staging apps to the target branch, then deploy.
 
 on:
+  push:
+    branches:
+      - pr188-agent-optimize
   pull_request:
     types: [labeled]
   workflow_dispatch:
@@ -23,9 +26,12 @@ jobs:
   deploy-staging:
     # For label trigger: only run when the label is exactly "deploy-staging"
     if: >
+      github.event_name == 'push' ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && github.event.label.name == 'deploy-staging')
     runs-on: ubuntu-latest
+    env:
+      STAGING_STACK_UUID: fasbsube26s75ag6qus5bpi2
 
     steps:
       - name: Resolve target ref
@@ -33,33 +39,99 @@ jobs:
         run: |
           if [ "${{ github.event_name }}" = "pull_request" ]; then
             echo "ref=${{ github.head_ref }}" >> "$GITHUB_OUTPUT"
+          elif [ "${{ github.event_name }}" = "push" ]; then
+            echo "ref=${{ github.ref_name }}" >> "$GITHUB_OUTPUT"
           else
             echo "ref=${{ inputs.ref }}" >> "$GITHUB_OUTPUT"
           fi
 
-      - name: Update staging backend branch
+      - name: Check out target ref
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.ref.outputs.ref }}
+
+      - name: Resolve target commit
+        id: target
         run: |
-          curl -s -X PATCH "${{ secrets.COOLIFY_URL }}/api/v1/applications/${{ secrets.COOLIFY_BACKEND_STAGING_UUID }}" \
-            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}" \
-            -H "Content-Type: application/json" \
-            -d '{"git_branch": "${{ steps.ref.outputs.ref }}"}'
+          set -euo pipefail
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Assert repo staging compose contract
+        run: |
+          set -euo pipefail
+          grep -F "leon-home:/root/.leon" docker-compose.yml >/dev/null
+          grep -F "volumes:" docker-compose.yml >/dev/null
 
-      - name: Update staging frontend branch
+      - name: Update staging stack branch
         run: |
-          curl -s -X PATCH "${{ secrets.COOLIFY_URL }}/api/v1/applications/${{ secrets.COOLIFY_FRONTEND_STAGING_UUID }}" \
+          set -euo pipefail
+          body="$(curl -sS --fail-with-body -X PATCH "${{ secrets.COOLIFY_URL }}/api/v1/applications/${STAGING_STACK_UUID}" \
             -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}" \
             -H "Content-Type: application/json" \
-            -d '{"git_branch": "${{ steps.ref.outputs.ref }}"}'
+            -d "{\"git_branch\": \"${{ steps.ref.outputs.ref }}\"}")"
+          echo "$body"
+          printf '%s' "$body" | jq -e --arg uuid "$STAGING_STACK_UUID" '.uuid == $uuid' >/dev/null
+
+      - name: Deploy staging stack
+        id: deploy
+        run: |
+          set -euo pipefail
+          body="$(curl -sS --fail-with-body "${{ secrets.COOLIFY_URL }}/api/v1/deploy?uuid=${STAGING_STACK_UUID}&force=false" \
+            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}")"
+          echo "$body"
+          printf '%s' "$body" | jq -e --arg uuid "$STAGING_STACK_UUID" '.deployments[0].resource_uuid == $uuid' >/dev/null
+          echo "deployment_uuid=$(printf '%s' "$body" | jq -r '.deployments[0].deployment_uuid')" >> "$GITHUB_OUTPUT"
+
+      - name: Wait for staging deployment
+        run: |
+          set -euo pipefail
+          deployment_uuid="${{ steps.deploy.outputs.deployment_uuid }}"
+          for _ in $(seq 1 60); do
+            body="$(curl -sS --fail-with-body "${{ secrets.COOLIFY_URL }}/api/v1/deployments/${deployment_uuid}" \
+              -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}")"
+            status="$(printf '%s' "$body" | jq -r '.status')"
+            echo "deployment status: $status"
+            if [ "$status" = "finished" ]; then
+              exit 0
+            fi
+            if [ "$status" != "queued" ] && [ "$status" != "in_progress" ]; then
+              echo "$body"
+              exit 1
+            fi
+            sleep 10
+          done
+          echo "Timed out waiting for staging deployment ${deployment_uuid}"
+          exit 1
 
-      - name: Deploy backend to staging
+      - name: Verify Coolify staging contract
         run: |
-          curl -sX GET "${{ secrets.COOLIFY_URL }}/api/v1/deploy?uuid=${{ secrets.COOLIFY_BACKEND_STAGING_UUID }}&force=false" \
-            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
+          set -euo pipefail
+          body="$(curl -sS --fail-with-body "${{ secrets.COOLIFY_URL }}/api/v1/applications/${STAGING_STACK_UUID}" \
+            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}")"
+          echo "$body" | jq '{uuid,git_branch,docker_compose_location}'
+          printf '%s' "$body" | jq -e --arg ref "${{ steps.ref.outputs.ref }}" '.git_branch == $ref' >/dev/null
+          printf '%s' "$body" | jq -e '.docker_compose_raw | contains("leon-home:/root/.leon")' >/dev/null
+          printf '%s' "$body" | jq -e --arg volume "${STAGING_STACK_UUID}_leon-home:/root/.leon" '.docker_compose | contains($volume)' >/dev/null
+          printf '%s' "$body" | jq -e --arg sha "${{ steps.target.outputs.sha }}" '.docker_compose | contains($sha)' >/dev/null
 
-      - name: Deploy frontend to staging
+      - name: Verify staging health contract
         run: |
-          curl -sX GET "${{ secrets.COOLIFY_URL }}/api/v1/deploy?uuid=${{ secrets.COOLIFY_FRONTEND_STAGING_UUID }}&force=false" \
-            -H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}"
+          set -euo pipefail
+          for attempt in $(seq 1 18); do
+            status="$(curl -sS -o /tmp/staging-health.json -w '%{http_code}' "https://app.staging.mycel.nextmind.space/api/monitor/health")"
+            echo "health attempt ${attempt}: status=${status}"
+            if [ "$status" = "200" ]; then
+              body="$(cat /tmp/staging-health.json)"
+              echo "$body"
+              printf '%s' "$body" | jq -e '.db.path == "/root/.leon/sandbox.db"' >/dev/null
+              printf '%s' "$body" | jq -e '.db.exists == true' >/dev/null
+              exit 0
+            fi
+            cat /tmp/staging-health.json || true
+            sleep 10
+          done
+          echo "Staging health contract did not become ready in time"
+          exit 1
 
       - name: Comment on PR with staging URL
         if: github.event_name == 'pull_request'
@@ -70,5 +142,5 @@ jobs:
               issue_number: context.issue.number,
               owner: context.repo.owner,
               repo: context.repo.repo,
-              body: `🚀 **预发部署已触发**\n\n- 前端: https://app.staging.mycel.nextmind.space\n- 后端: https://api.staging.mycel.nextmind.space\n\n分支: \`${{ steps.ref.outputs.ref }}\``
+              body: `🚀 **预发部署已触发**\n\n- 共享 Staging: https://app.staging.mycel.nextmind.space\n- API（同域反代）: https://app.staging.mycel.nextmind.space/api\n\n分支: \`${{ steps.ref.outputs.ref }}\``
             })
diff --git a/Dockerfile b/Dockerfile
index e875ed19f..36bb7bf5a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,11 +7,13 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 
 # Install dependencies (cached layer before source copy)
 COPY pyproject.toml uv.lock ./
-RUN uv sync --frozen --no-dev --no-install-project
+# @@@sandbox-sdk-image-parity - shared staging/provider inventory should reflect runtime truth,
+# not "SDK missing from image" accidents while config files are present.
+RUN uv sync --frozen --no-dev --extra sandbox --extra e2b --extra daytona --no-install-project
 
 # Copy source and install project
 COPY . .
-RUN uv sync --frozen --no-dev
+RUN uv sync --frozen --no-dev --extra sandbox --extra e2b --extra daytona
 
 ENV PATH="/app/.venv/bin:$PATH"
 
diff --git a/README.md b/README.md
index a7fdc9af7..f75571e6f 100644
--- a/README.md
+++ b/README.md
@@ -95,7 +95,7 @@ Full-featured web platform for managing and interacting with agents:
 
 ### Multi-Agent Communication
 
-Agents are first-class social entities. They can discover each other, send messages, and collaborate autonomously:
+Agents are first-class social entities. They can list chats, read messages, send messages, and collaborate autonomously:
 
 ```
 Member (template)
@@ -103,8 +103,10 @@ Member (template)
        └→ Thread (agent brain / conversation)
 ```
 
-- **`chat_send`**: Agent A messages Agent B; B responds autonomously
-- **`directory`**: Agents browse and discover other entities
+- **`list_chats`**: List active conversations with unread counts and participants
+- **`read_messages`**: Read message history before responding
+- **`send_message`**: Agent A messages Agent B; B responds autonomously
+- **`search_messages`**: Search message history across chats
 - **Real-time delivery**: SSE-based chat with typing indicators and read receipts
 
 Humans also have entities — agents can initiate conversations with humans, not just the other way around.
diff --git a/README.zh.md b/README.zh.md
index 12bb8981a..1b3d31c87 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -95,7 +95,7 @@ cd frontend/app && npm run dev
 
 ### 多 Agent 通讯
 
-Agent 是一等公民的社交实体，可以互相发现、发送消息、自主协作：
+Agent 是一等公民的社交实体，可以列出对话、读取消息、发送消息、自主协作：
 
 ```
 Member（模板）
@@ -103,8 +103,10 @@ Member（模板）
        └→ Thread（Agent 大脑 / 对话）
 ```
 
-- **`chat_send`**：Agent A 给 Agent B 发消息，B 自主回复
-- **`directory`**：Agent 浏览和发现其他实体
+- **`list_chats`**：列出活跃对话、未读数和参与者
+- **`read_messages`**：先读取消息历史，再决定如何回复
+- **`send_message`**：Agent A 给 Agent B 发消息，B 自主回复
+- **`search_messages`**：跨对话搜索消息历史
 - **实时投递**：基于 SSE 的聊天，支持输入提示和已读回执
 
 人类也有 Entity——Agent 可以主动找人类对话，而不只是被动响应。
diff --git a/backend/web/core/dependencies.py b/backend/web/core/dependencies.py
index 52bc277a0..ef099c3c8 100644
--- a/backend/web/core/dependencies.py
+++ b/backend/web/core/dependencies.py
@@ -1,7 +1,6 @@
 """FastAPI dependency injection functions."""
 
 import asyncio
-import os
 from typing import Annotated, Any
 
 from fastapi import Depends, FastAPI, HTTPException, Request
@@ -9,18 +8,6 @@
 from backend.web.services.agent_pool import get_or_create_agent, resolve_thread_sandbox
 from sandbox.thread_context import set_current_thread_id
 
-# Dev bypass: set LEON_DEV_SKIP_AUTH=1 to skip JWT verification and inject a mock identity.
-# WARNING: this bypasses ALL auth — never set in production.
-_DEV_SKIP_AUTH = os.environ.get("LEON_DEV_SKIP_AUTH", "").lower() in ("1", "true", "yes")
-_DEV_PAYLOAD = {"user_id": "dev-user"}
-
-if _DEV_SKIP_AUTH:
-    import logging as _logging
-
-    _logging.getLogger(__name__).warning(
-        "LEON_DEV_SKIP_AUTH is active — JWT auth is BYPASSED for all requests. This must never be enabled in production."
-    )
-
 
 async def get_app(request: Request) -> FastAPI:
     """Get FastAPI app instance from request."""
@@ -36,9 +23,7 @@ def _get_auth_service(app: FastAPI):
 
 
 def _extract_jwt_payload(request: Request) -> dict:
-    """Extract and verify JWT payload from Bearer token. Returns {user_id}."""
-    if _DEV_SKIP_AUTH:
-        return _DEV_PAYLOAD
+    """Extract and verify JWT payload from Bearer token. Returns {user_id, entity_id}."""
     auth_header = request.headers.get("Authorization", "")
     if not auth_header.startswith("Bearer "):
         raise HTTPException(401, "Missing or invalid Authorization header")
@@ -52,14 +37,29 @@ def _extract_jwt_payload(request: Request) -> dict:
 async def get_current_user_id(request: Request) -> str:
     """Extract user_id from JWT and verify user exists. Returns 401 if user was deleted (e.g. DB reset)."""
     user_id = _extract_jwt_payload(request)["user_id"]
-    if _DEV_SKIP_AUTH:
-        return user_id
     member_repo = getattr(request.app.state, "member_repo", None)
     if member_repo and member_repo.get_by_id(user_id) is None:
         raise HTTPException(401, "User no longer exists — please re-login")
     return user_id
 
 
+async def get_current_entity_id(request: Request) -> str:
+    """Derive entity_id for the authenticated human user.
+
+    Supabase JWTs may omit custom entity claims, so keep the older
+    direct-claim path when present and otherwise derive the stable
+    human entity convention: f"{user_id}-1".
+    """
+    payload = _extract_jwt_payload(request)
+    entity_id = payload.get("entity_id")
+    if entity_id:
+        return entity_id
+    user_id = payload.get("user_id")
+    if not user_id:
+        raise HTTPException(401, "Token missing user_id")
+    return f"{user_id}-1"
+
+
 async def verify_thread_owner(
     thread_id: str,
     user_id: Annotated[str, Depends(get_current_user_id)],
diff --git a/backend/web/core/lifespan.py b/backend/web/core/lifespan.py
index 13a76a4b2..e2860f177 100644
--- a/backend/web/core/lifespan.py
+++ b/backend/web/core/lifespan.py
@@ -14,84 +14,6 @@
 from core.runtime.middleware.queue import MessageQueueManager
 
 
-def _seed_dev_user(app: FastAPI) -> None:
-    """Create dev-user human member + initial agents if not yet seeded.
-
-    Mirrors AuthService.register() but uses the fixed 'dev-user' ID that
-    matches _DEV_PAYLOAD, so list_members('dev-user') returns results.
-    """
-    import logging
-    import time
-    from pathlib import Path
-
-    from backend.web.services.member_service import MEMBERS_DIR, _write_agent_md, _write_json
-    from storage.contracts import MemberRow, MemberType
-    from storage.providers.sqlite.member_repo import generate_member_id
-
-    log = logging.getLogger(__name__)
-    member_repo = app.state.member_repo
-
-    dev_user_id = "dev-user"
-
-    if member_repo.get_by_id(dev_user_id) is not None:
-        return  # already seeded
-
-    log.info("DEV: seeding dev-user member + initial agents")
-    now = time.time()
-
-    # Human member row
-    member_repo.create(
-        MemberRow(
-            id=dev_user_id,
-            name="Dev",
-            type=MemberType.HUMAN,
-            created_at=now,
-        )
-    )
-
-    # Initial agents (same as register())
-    initial_agents = [
-        {"name": "Toad", "description": "Curious and energetic assistant", "avatar": "toad.jpeg"},
-        {"name": "Morel", "description": "Thoughtful senior analyst", "avatar": "morel.jpeg"},
-    ]
-    assets_dir = Path(__file__).resolve().parents[3] / "assets"
-
-    for agent_def in initial_agents:
-        agent_id = generate_member_id()
-        agent_dir = MEMBERS_DIR / agent_id
-        agent_dir.mkdir(parents=True, exist_ok=True)
-        _write_agent_md(agent_dir / "agent.md", name=agent_def["name"], description=agent_def["description"])
-        _write_json(
-            agent_dir / "meta.json",
-            {
-                "status": "active",
-                "version": "1.0.0",
-                "created_at": int(now * 1000),
-                "updated_at": int(now * 1000),
-            },
-        )
-        member_repo.create(
-            MemberRow(
-                id=agent_id,
-                name=agent_def["name"],
-                type=MemberType.MYCEL_AGENT,
-                description=agent_def["description"],
-                config_dir=str(agent_dir),
-                owner_user_id=dev_user_id,
-                created_at=now,
-            )
-        )
-        src_avatar = assets_dir / agent_def["avatar"]
-        if src_avatar.exists():
-            try:
-                from backend.web.routers.entities import process_and_save_avatar
-
-                avatar_path = process_and_save_avatar(src_avatar, agent_id)
-                member_repo.update(agent_id, avatar=avatar_path, updated_at=now)
-            except Exception as e:
-                log.warning("DEV: avatar copy failed for %s: %s", agent_def["name"], e)
-
-
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """FastAPI lifespan context manager for startup and shutdown."""
@@ -114,7 +36,7 @@ async def lifespan(app: FastAPI):
     _storage_strategy = os.getenv("LEON_STORAGE_STRATEGY", "sqlite")
 
     if _storage_strategy == "supabase":
-        from backend.web.core.supabase_factory import create_supabase_client
+        from backend.web.core.supabase_factory import create_supabase_auth_client, create_supabase_client
         from storage.container import StorageContainer
         from storage.providers.supabase import (
             SupabaseAccountRepo,
@@ -144,6 +66,7 @@ async def lifespan(app: FastAPI):
         app.state.invite_code_repo = SupabaseInviteCodeRepo(_supabase_client)
         app.state.user_settings_repo = SupabaseUserSettingsRepo(_supabase_client)
         app.state._supabase_client = _supabase_client
+        app.state._supabase_auth_client_factory = create_supabase_auth_client
         app.state._storage_container = StorageContainer(strategy="supabase", supabase_client=_supabase_client)
     else:
         from storage.providers.sqlite.chat_repo import SQLiteChatEntityRepo, SQLiteChatMessageRepo, SQLiteChatRepo
@@ -175,6 +98,7 @@ async def lifespan(app: FastAPI):
             accounts=app.state.account_repo,
             entities=app.state.entity_repo,
             supabase_client=_supabase_client,
+            supabase_auth_client_factory=create_supabase_auth_client,
             invite_codes=app.state.invite_code_repo,
         )
     else:
@@ -185,12 +109,6 @@ async def lifespan(app: FastAPI):
             supabase_client=None,
         )
 
-    # Dev bypass: seed dev-user + initial agents on first startup
-    from backend.web.core.dependencies import _DEV_SKIP_AUTH
-
-    if _DEV_SKIP_AUTH:
-        _seed_dev_user(app)
-
     from backend.web.services.chat_events import ChatEventBus
     from backend.web.services.typing_tracker import TypingTracker
 
@@ -259,30 +177,6 @@ async def lifespan(app: FastAPI):
         await cron_svc.start()
         app.state.cron_service = cron_svc
 
-        # @@@wechat-registry — create registry with delivery callback, auto-start all
-        from backend.web.services.wechat_service import WeChatConnectionRegistry, migrate_entity_id_dirs
-        from core.runtime.middleware.queue.formatters import format_wechat_message
-
-        migrate_entity_id_dirs()
-
-        async def _wechat_deliver(conn, msg):
-            """Delivery callback — routes WeChat messages to configured thread/chat."""
-            routing = conn.routing
-            if not routing.type or not routing.id:
-                return
-            sender_name = msg.from_user_id.split("@")[0] or msg.from_user_id
-            if routing.type == "thread":
-                from backend.web.services.message_routing import route_message_to_brain
-
-                content = format_wechat_message(sender_name, msg.from_user_id, msg.text)
-                await route_message_to_brain(app, routing.id, content, source="owner", sender_name=sender_name)
-            elif routing.type == "chat":
-                content = format_wechat_message(sender_name, msg.from_user_id, msg.text)
-                app.state.chat_service.send_message(routing.id, conn.user_id, content)
-
-        app.state.wechat_registry = WeChatConnectionRegistry(delivery_fn=_wechat_deliver)
-        app.state.wechat_registry.auto_start_all()
-
         yield
     finally:
         # @@@background-task-shutdown-order - cancel monitor/reaper before provider cleanup.
@@ -295,10 +189,6 @@ async def _wechat_deliver(conn, msg):
                 except asyncio.CancelledError:
                     pass
 
-        # Cleanup: stop WeChat connections
-        if hasattr(app.state, "wechat_registry"):
-            await app.state.wechat_registry.shutdown()
-
         # Cleanup: stop cron scheduler
         if app.state.cron_service:
             await app.state.cron_service.stop()
@@ -312,3 +202,8 @@ async def _wechat_deliver(conn, msg):
                 agent.close()
             except Exception as e:
                 print(f"[web] Agent cleanup error: {e}")
+
+        # Cleanup: stop LSP language servers
+        from core.tools.lsp.service import lsp_pool
+
+        await lsp_pool.close_all()
diff --git a/backend/web/core/storage_factory.py b/backend/web/core/storage_factory.py
index 8e189dd9d..caba25f04 100644
--- a/backend/web/core/storage_factory.py
+++ b/backend/web/core/storage_factory.py
@@ -45,10 +45,8 @@ def make_cron_job_repo() -> Any:
 
 
 def make_sandbox_monitor_repo() -> Any:
-    if _strategy() == "supabase":
-        from storage.providers.supabase.sandbox_monitor_repo import SupabaseSandboxMonitorRepo
-
-        return SupabaseSandboxMonitorRepo(_supabase_client())
+    # @@@sandbox-runtime-truth-stays-local - sandbox lifecycle facts still live in local sandbox.db.
+    # Auth/member/thread metadata can be Supabase-backed without moving lease/session/terminal monitoring there.
     from storage.providers.sqlite.sandbox_monitor_repo import SQLiteSandboxMonitorRepo
 
     return SQLiteSandboxMonitorRepo()
diff --git a/backend/web/core/supabase_factory.py b/backend/web/core/supabase_factory.py
index c8dc9abd1..6afd00655 100644
--- a/backend/web/core/supabase_factory.py
+++ b/backend/web/core/supabase_factory.py
@@ -1,4 +1,4 @@
-"""Runtime Supabase client factory for storage wiring."""
+"""Runtime Supabase client factories for storage and auth wiring."""
 
 from __future__ import annotations
 
@@ -6,6 +6,19 @@
 
 import httpx
 from supabase import ClientOptions, create_client
+from supabase_auth import SyncGoTrueClient
+
+
+def _resolve_supabase_url() -> str:
+    url = os.getenv("SUPABASE_INTERNAL_URL") or os.getenv("SUPABASE_PUBLIC_URL")
+    if not url:
+        raise RuntimeError("SUPABASE_INTERNAL_URL or SUPABASE_PUBLIC_URL is required.")
+    return url
+
+
+def _resolve_supabase_auth_url() -> str:
+    url = os.getenv("SUPABASE_AUTH_URL") or _resolve_supabase_url()
+    return url
 
 
 def create_supabase_client():
@@ -16,13 +29,30 @@ def create_supabase_client():
     httpx client never routes through any system/VPN proxy.
     """
     # Prefer internal URL (same-host direct connection) over public tunnel URL.
-    url = os.getenv("SUPABASE_INTERNAL_URL") or os.getenv("SUPABASE_PUBLIC_URL")
+    url = _resolve_supabase_url()
     key = os.getenv("LEON_SUPABASE_SERVICE_ROLE_KEY")
-    if not url:
-        raise RuntimeError("SUPABASE_INTERNAL_URL or SUPABASE_PUBLIC_URL is required.")
     if not key:
         raise RuntimeError("LEON_SUPABASE_SERVICE_ROLE_KEY is required for Supabase storage runtime.")
     schema = os.getenv("LEON_DB_SCHEMA", "public")
     timeout = httpx.Timeout(30.0, connect=10.0)
     http_client = httpx.Client(timeout=timeout, trust_env=False)
     return create_client(url, key, options=ClientOptions(httpx_client=http_client, schema=schema))
+
+
+def create_supabase_auth_client():
+    """Build a supabase-py auth client for end-user auth flows.
+
+    Uses the anon key rather than service-role credentials so auth endpoints
+    behave like real caller traffic instead of admin/server traffic.
+    """
+    url = _resolve_supabase_auth_url()
+    key = os.getenv("SUPABASE_ANON_KEY")
+    if not key:
+        raise RuntimeError("SUPABASE_ANON_KEY is required for Supabase auth runtime.")
+    timeout = httpx.Timeout(30.0, connect=10.0)
+    http_client = httpx.Client(timeout=timeout, trust_env=False)
+    auth_url = os.getenv("SUPABASE_AUTH_URL")
+    if auth_url:
+        # @@@direct-gotrue - local auth may bypass Kong and hit GoTrue directly at /token.
+        return SyncGoTrueClient(url=auth_url, headers={"apikey": key}, http_client=http_client)
+    return create_client(url, key, options=ClientOptions(httpx_client=http_client))
diff --git a/backend/web/main.py b/backend/web/main.py
index 64f60e0a5..a457e017b 100644
--- a/backend/web/main.py
+++ b/backend/web/main.py
@@ -83,8 +83,6 @@ def _sqlite_root_supports_wal(root: Path) -> bool:
 from backend.web.routers import (  # noqa: E402
     auth,
     chats,
-    connections,
-    debug,
     entities,
     invite_codes,
     marketplace,
@@ -118,11 +116,9 @@ def _sqlite_root_supports_wal(root: Path) -> bool:
 app.include_router(entities.members_router)
 app.include_router(sandbox.router)
 app.include_router(webhooks.router)
-app.include_router(connections.router)
 app.include_router(thread_files.router)
 app.include_router(thread_files._public)
 app.include_router(settings.router)
-app.include_router(debug.router)
 app.include_router(panel.router)
 app.include_router(monitor.router)
 app.include_router(marketplace.router)
diff --git a/backend/web/models/requests.py b/backend/web/models/requests.py
index 05a108bf0..582ec7f4c 100644
--- a/backend/web/models/requests.py
+++ b/backend/web/models/requests.py
@@ -1,8 +1,8 @@
 """Pydantic request models for Leon web API."""
 
-from typing import Literal
+from typing import Any, Literal
 
-from pydantic import BaseModel, Field
+from pydantic import AliasChoices, BaseModel, Field
 
 from sandbox.config import MountSpec
 
@@ -20,7 +20,7 @@ class RecipeSnapshotRequest(BaseModel):
 
 class CreateThreadRequest(BaseModel):
     member_id: str  # which agent template to create thread from
-    sandbox: str = "local"
+    sandbox: str = Field(default="local", validation_alias=AliasChoices("sandbox", "sandbox_type"))
     recipe: RecipeSnapshotRequest | None = None
     lease_id: str | None = None
     cwd: str | None = None
@@ -53,3 +53,22 @@ class RunRequest(BaseModel):
 class SendMessageRequest(BaseModel):
     message: str
     attachments: list[str] = Field(default_factory=list)
+
+
+class AskUserAnswerRequest(BaseModel):
+    header: str | None = None
+    question: str | None = None
+    selected_options: list[str] = Field(default_factory=list)
+    free_text: str | None = None
+
+
+class ResolvePermissionRequest(BaseModel):
+    decision: Literal["allow", "deny"]
+    message: str | None = None
+    answers: list[AskUserAnswerRequest] | None = None
+    annotations: dict[str, Any] | None = None
+
+
+class ThreadPermissionRuleRequest(BaseModel):
+    behavior: Literal["allow", "deny", "ask"]
+    tool_name: str
diff --git a/backend/web/routers/chats.py b/backend/web/routers/chats.py
index 5e7e3ff9e..fc9e45482 100644
--- a/backend/web/routers/chats.py
+++ b/backend/web/routers/chats.py
@@ -180,15 +180,12 @@ async def stream_chat_events(
     app: Annotated[Any, Depends(get_app)] = None,
 ):
     """SSE stream for chat events. Uses ?token= for auth."""
-    from backend.web.core.dependencies import _DEV_SKIP_AUTH
-
-    if not _DEV_SKIP_AUTH:
-        if not token:
-            raise HTTPException(401, "Missing token")
-        try:
-            app.state.auth_service.verify_token(token)
-        except ValueError as e:
-            raise HTTPException(401, str(e))
+    if not token:
+        raise HTTPException(401, "Missing token")
+    try:
+        app.state.auth_service.verify_token(token)
+    except ValueError as e:
+        raise HTTPException(401, str(e))
 
     event_bus = app.state.chat_event_bus
     queue = event_bus.subscribe(chat_id)
diff --git a/backend/web/routers/connections.py b/backend/web/routers/connections.py
deleted file mode 100644
index c5fa0adc2..000000000
--- a/backend/web/routers/connections.py
+++ /dev/null
@@ -1,150 +0,0 @@
-"""Connection endpoints — manage external platform connections (WeChat, etc.).
-
-@@@per-user — all endpoints scoped by user_id (the user's social identity).
-"""
-
-from typing import Annotated, Any
-
-from fastapi import APIRouter, Depends, HTTPException
-
-from backend.web.core.dependencies import get_app, get_current_user_id
-from backend.web.services.wechat_service import (
-    QrPollRequest,
-    RoutingConfig,
-    RoutingSetRequest,
-    WeChatConnectionRegistry,
-)
-
-router = APIRouter(prefix="/api/connections", tags=["connections"])
-
-
-def _get_registry(app: Any) -> WeChatConnectionRegistry:
-    return app.state.wechat_registry
-
-
-# --- WeChat ---
-
-
-@router.get("/wechat/state")
-async def wechat_state(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    return _get_registry(app).get(user_id).get_state()
-
-
-@router.post("/wechat/qrcode")
-async def wechat_qrcode(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    conn = _get_registry(app).get(user_id)
-    if conn.connected:
-        raise HTTPException(400, "Already connected. Disconnect first.")
-    return await conn.get_qr_code()
-
-
-@router.post("/wechat/qrcode/poll")
-async def wechat_qrcode_poll(
-    body: QrPollRequest,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    registry = _get_registry(app)
-    conn = registry.get(user_id)
-    result = await conn.poll_qr_status(body.qrcode)
-    # Evict duplicates after successful connection
-    if result.get("status") == "confirmed" and conn._credentials:
-        registry.evict_duplicates(conn._credentials.account_id, user_id)
-    return result
-
-
-@router.post("/wechat/disconnect")
-async def wechat_disconnect(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).disconnect()
-    return {"ok": True}
-
-
-@router.post("/wechat/polling/start")
-async def wechat_start_polling(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    conn = _get_registry(app).get(user_id)
-    if not conn.connected:
-        raise HTTPException(400, "Not connected")
-    conn.start_polling()
-    return {"ok": True, "polling": True}
-
-
-@router.post("/wechat/polling/stop")
-async def wechat_stop_polling(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).stop_polling()
-    return {"ok": True, "polling": False}
-
-
-# --- Routing config ---
-
-
-@router.get("/wechat/routing")
-async def wechat_get_routing(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    return _get_registry(app).get(user_id).routing.model_dump()
-
-
-@router.post("/wechat/routing")
-async def wechat_set_routing(
-    body: RoutingSetRequest,
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).set_routing(RoutingConfig(type=body.type, id=body.id, label=body.label))
-    return {"ok": True}
-
-
-@router.delete("/wechat/routing")
-async def wechat_clear_routing(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    _get_registry(app).get(user_id).set_routing(RoutingConfig())
-    return {"ok": True}
-
-
-# --- List targets for routing picker ---
-
-
-@router.get("/wechat/routing/targets")
-async def wechat_routing_targets(
-    user_id: Annotated[str, Depends(get_current_user_id)],
-    app: Annotated[Any, Depends(get_app)],
-) -> dict:
-    """List available threads and chats for the routing picker."""
-    from backend.web.utils.serializers import avatar_url
-
-    raw_threads = app.state.thread_repo.list_by_owner_user_id(user_id)
-    threads = [
-        {
-            "id": t["id"],
-            "label": t.get("entity_name") or t.get("member_name") or t["id"][:12],
-            "avatar_url": avatar_url(t.get("member_id"), bool(t.get("member_avatar"))),
-        }
-        for t in raw_threads
-    ]
-
-    raw_chats = app.state.chat_service.list_chats_for_user(user_id)
-    chats = []
-    for c in raw_chats:
-        others = [e for e in c.get("entities", []) if e["id"] != user_id]
-        name = ", ".join(e["name"] for e in others) or "Unknown"
-        chats.append({"id": c["id"], "label": name})
-
-    return {"threads": threads, "chats": chats}
diff --git a/backend/web/routers/debug.py b/backend/web/routers/debug.py
deleted file mode 100644
index 57299f219..000000000
--- a/backend/web/routers/debug.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""Debug logging endpoints."""
-
-from fastapi import APIRouter
-from pydantic import BaseModel
-
-router = APIRouter(prefix="/api/debug", tags=["debug"])
-
-
-class LogMessage(BaseModel):
-    message: str
-    timestamp: str
-
-
-@router.post("/log")
-async def log_frontend_message(payload: LogMessage) -> dict:
-    """Receive frontend console logs and write to file."""
-    with open("/tmp/leon-frontend-console.log", "a") as f:
-        f.write(f"[{payload.timestamp}] {payload.message}\n")
-    return {"status": "ok"}
diff --git a/backend/web/routers/entities.py b/backend/web/routers/entities.py
index 96f636955..bf64c2e9d 100644
--- a/backend/web/routers/entities.py
+++ b/backend/web/routers/entities.py
@@ -194,6 +194,9 @@ async def list_entities(
         member = member_map.get(entity.member_id)
         owner = member_map.get(member.owner_user_id) if member and member.owner_user_id else None
         thread = app.state.thread_repo.get_by_id(entity.thread_id) if entity.thread_id else None
+        # @@@chat-discovery-surface - branch/subagent entities are runtime artifacts, not top-level chat picker entries.
+        if entity.type == "agent" and thread and not thread["is_main"]:
+            continue
         items.append(
             {
                 "id": entity.id,  # entity.id = member_id = social identity for agents
diff --git a/backend/web/routers/panel.py b/backend/web/routers/panel.py
index 3fe2f481b..700fe1d2f 100644
--- a/backend/web/routers/panel.py
+++ b/backend/web/routers/panel.py
@@ -315,8 +315,12 @@ async def update_resource_content(resource_type: str, resource_id: str, req: Upd
 
 
 @router.get("/profile")
-async def get_profile() -> dict[str, Any]:
-    return await asyncio.to_thread(profile_service.get_profile)
+async def get_profile(
+    user_id: Annotated[str, Depends(get_current_user_id)],
+    request: Request,
+) -> dict[str, Any]:
+    member = request.app.state.member_repo.get_by_id(user_id)
+    return await asyncio.to_thread(profile_service.get_profile, member)
 
 
 @router.put("/profile")
diff --git a/backend/web/routers/threads.py b/backend/web/routers/threads.py
index 33a75b8aa..653674c44 100644
--- a/backend/web/routers/threads.py
+++ b/backend/web/routers/threads.py
@@ -21,8 +21,10 @@
 from backend.web.models.requests import (
     CreateThreadRequest,
     ResolveMainThreadRequest,
+    ResolvePermissionRequest,
     SaveThreadLaunchConfigRequest,
     SendMessageRequest,
+    ThreadPermissionRuleRequest,
 )
 from backend.web.services import sandbox_service
 from backend.web.services.agent_pool import get_or_create_agent, resolve_thread_sandbox
@@ -50,6 +52,7 @@
 from backend.web.utils.serializers import avatar_url, serialize_message
 from core.runtime.middleware.monitor import AgentState
 from sandbox.config import MountSpec
+from sandbox.manager import bind_thread_to_existing_lease
 from sandbox.recipes import normalize_recipe_snapshot, provider_type_from_name
 from sandbox.thread_context import set_current_thread_id
 from storage.contracts import EntityRow
@@ -59,6 +62,18 @@
 router = APIRouter(prefix="/api/threads", tags=["threads"])
 
 
+class _NoopAsyncLock:
+    async def __aenter__(self) -> None:
+        return None
+
+    async def __aexit__(self, exc_type, exc, tb) -> bool:
+        return False
+
+
+def _is_internal_child_thread(thread_id: str) -> bool:
+    return thread_id.startswith("subagent-")
+
+
 def _invalidate_resource_overview_cache() -> None:
     # @@@resource-overview-invalidation - thread/lease mutations change the monitor topology immediately.
     # Clear the overview snapshot so the next /api/monitor/resources read reflects the fresh binding/state.
@@ -179,6 +194,71 @@ async def _validate_mount_capability_gate(
     )
 
 
+def _provider_unavailable_response(sandbox_type: str) -> JSONResponse:
+    return JSONResponse(
+        status_code=400,
+        content={
+            "error": "sandbox_provider_unavailable",
+            "provider": sandbox_type,
+        },
+    )
+
+
+def _format_ask_user_question_followup(
+    pending_request: dict[str, Any],
+    *,
+    answers: list[dict[str, Any]],
+    annotations: dict[str, Any] | None,
+) -> str:
+    payload: dict[str, Any] = {
+        "questions": (pending_request.get("args") or {}).get("questions", []),
+        "answers": answers,
+    }
+    if annotations is not None:
+        payload["annotations"] = annotations
+    # @@@ask-user-followup-payload - keep this as one narrow, structured owner reply
+    # so the resumed run can continue from the user's choices without inventing
+    # a bespoke second continuation channel.
+    return (
+        "The user answered your AskUserQuestion prompt. Continue the task using these answers.\n"
+        "<ask_user_question_answers>\n"
+        f"{json.dumps(payload, ensure_ascii=False, indent=2)}\n"
+        "</ask_user_question_answers>"
+    )
+
+
+def _serialize_permission_answers(payload: Any) -> list[dict[str, Any]] | None:
+    raw_answers = getattr(payload, "answers", None)
+    if raw_answers is None:
+        return None
+    serialized: list[dict[str, Any]] = []
+    for item in raw_answers:
+        if hasattr(item, "model_dump"):
+            serialized.append(item.model_dump(exclude_none=True))
+        elif isinstance(item, dict):
+            serialized.append({key: value for key, value in item.items() if value is not None})
+        else:
+            serialized.append({key: value for key, value in vars(item).items() if value is not None})
+    return serialized
+
+
+def _validate_sandbox_provider_gate(app: Any, owner_user_id: str, payload: CreateThreadRequest) -> JSONResponse | None:
+    sandbox_type = payload.sandbox or "local"
+    if payload.lease_id:
+        owned_lease = next(
+            (lease for lease in sandbox_service.list_user_leases(owner_user_id) if lease["lease_id"] == payload.lease_id),
+            None,
+        )
+        if owned_lease is not None:
+            sandbox_type = str(owned_lease["provider_name"] or sandbox_type)
+    if sandbox_type == "local":
+        return None
+    provider = sandbox_service.build_provider_from_config_name(sandbox_type)
+    if provider is not None:
+        return None
+    return _provider_unavailable_response(sandbox_type)
+
+
 def _get_agent_for_thread(app: Any, thread_id: str) -> Any | None:
     """Get agent instance for a thread from the agent pool."""
     pool = getattr(app.state, "agent_pool", None)
@@ -210,7 +290,165 @@ def _thread_payload(app: Any, thread_id: str, sandbox_type: str) -> dict[str, An
     }
 
 
-def _create_thread_sandbox_resources(thread_id: str, sandbox_type: str, recipe: dict[str, Any] | None) -> None:
+_IDLE_REPLAYABLE_RUN_EVENTS = frozenset({"error", "cancelled", "retry"})
+
+
+def _checkpoint_tail_is_pending_owner_turn(messages: list[dict[str, Any]]) -> bool:
+    if not messages:
+        return False
+    tail = messages[-1]
+    if tail.get("type") != "HumanMessage":
+        return False
+    meta = tail.get("metadata") or {}
+    return meta.get("source") not in {"system", "external"}
+
+
+async def _get_thread_display_entries(app: Any, thread_id: str) -> list[dict[str, Any]]:
+    display_builder = app.state.display_builder
+    entries = display_builder.get_entries(thread_id)
+    if entries is not None:
+        _normalize_blocking_subagent_terminal_status(entries)
+    sandbox_type = resolve_thread_sandbox(app, thread_id)
+    agent = await get_or_create_agent(app, sandbox_type, thread_id=thread_id)
+    if entries is not None and getattr(agent.runtime, "current_state", None) != AgentState.IDLE:
+        return entries
+
+    set_current_thread_id(thread_id)
+    config = {"configurable": {"thread_id": thread_id}}
+    state = await agent.agent.aget_state(config)
+    values = getattr(state, "values", {}) if state else {}
+    messages = values.get("messages", []) if isinstance(values, dict) else []
+    serialized = [serialize_message(msg) for msg in messages]
+
+    from core.runtime.visibility import annotate_owner_visibility
+
+    annotated, _ = annotate_owner_visibility(serialized)
+    if entries is not None and not _display_entries_need_idle_rebuild(entries, annotated):
+        return entries
+    entries = display_builder.build_from_checkpoint(thread_id, annotated)
+    if _checkpoint_tail_is_pending_owner_turn(annotated):
+        await _replay_latest_run_failure_events(
+            thread_id=thread_id,
+            display_builder=display_builder,
+        )
+        entries = display_builder.get_entries(thread_id) or entries
+    _normalize_blocking_subagent_terminal_status(entries)
+    return entries
+
+
+def _display_entries_need_idle_rebuild(entries: list[dict[str, Any]], messages: list[dict[str, Any]]) -> bool:
+    if not messages:
+        return bool(entries)
+    if not entries:
+        return True
+    # @@@idle-cache-honesty - idle detail must not trust cached assistant shells after
+    # clear/restart. Rebuild only when cache is visibly impossible for the persisted checkpoint.
+    return any(entry.get("role") == "assistant" and not entry.get("segments") for entry in entries)
+
+
+def _normalize_blocking_subagent_terminal_status(entries: list[dict[str, Any]]) -> None:
+    for entry in entries:
+        if entry.get("role") != "assistant":
+            continue
+        for seg in entry.get("segments", []):
+            if seg.get("type") != "tool":
+                continue
+            step = seg.get("step") or {}
+            if step.get("name") != "Agent" or step.get("status") != "done":
+                continue
+            stream = step.get("subagent_stream")
+            if not isinstance(stream, dict):
+                continue
+            result_text = step.get("result")
+            existing_status = str(stream.get("status") or "").lower()
+            terminal_status = (
+                existing_status
+                if existing_status in {"completed", "error", "cancelled"}
+                else ("error" if isinstance(result_text, str) and result_text.startswith("<tool_use_error>") else "completed")
+            )
+            if stream.get("status") != terminal_status:
+                # @@@blocking-subagent-terminal-honesty - a finished blocking Agent tool
+                # must not keep exposing a stale running child status on refresh/detail/tasks.
+                stream["status"] = terminal_status
+            if terminal_status == "error" and not stream.get("error") and isinstance(result_text, str):
+                stream["error"] = result_text
+
+
+def _collect_display_subagent_tasks(entries: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
+    tasks: dict[str, dict[str, Any]] = {}
+    for entry in entries:
+        if entry.get("role") != "assistant":
+            continue
+        for seg in entry.get("segments", []):
+            if seg.get("type") != "tool":
+                continue
+            step = seg.get("step") or {}
+            if step.get("name") != "Agent":
+                continue
+            stream = step.get("subagent_stream")
+            if not isinstance(stream, dict) or not stream.get("task_id"):
+                continue
+            task_id = str(stream["task_id"])
+            raw_args = step.get("args")
+            args: dict[str, Any] = raw_args if isinstance(raw_args, dict) else {}
+            description = stream.get("description") or args.get("description") or args.get("prompt")
+            status = str(stream.get("status") or ("completed" if step.get("status") == "done" else "running"))
+            result_text = step.get("result") or stream.get("text")
+            # @@@dual-source-task-surface - blocking Agent subagents never enter parent _background_runs,
+            # so /tasks must also project persisted subagent_stream state from display history.
+            tasks[task_id] = {
+                "task_id": task_id,
+                "task_type": "agent",
+                "status": status,
+                "command_line": None,
+                "description": description,
+                "exit_code": None,
+                "error": stream.get("error"),
+                "result": result_text,
+                "text": result_text,
+                "thread_id": stream.get("thread_id"),
+            }
+    return tasks
+
+
+async def _replay_latest_run_failure_events(
+    *,
+    thread_id: str,
+    display_builder: Any,
+) -> None:
+    from backend.web.services.event_store import get_latest_run_id, read_events_after
+
+    run_id = await get_latest_run_id(thread_id)
+    if not run_id or run_id.startswith("activity_"):
+        return
+
+    events = await read_events_after(thread_id, run_id, 0)
+    if not any(event.get("event") in _IDLE_REPLAYABLE_RUN_EVENTS for event in events):
+        return
+
+    # @@@idle-run-error-replay - checkpoint can stop at the owner's input when
+    # the run dies before first persisted AI/Tool message. Rebuild must replay
+    # the latest run-level failure events so refresh/detail stays honest.
+    for event in events:
+        event_type = event.get("event", "")
+        if event_type not in {"run_start", "run_done", *_IDLE_REPLAYABLE_RUN_EVENTS}:
+            continue
+        raw_data = event.get("data", "{}")
+        try:
+            payload = json.loads(raw_data) if isinstance(raw_data, str) else raw_data
+        except (json.JSONDecodeError, TypeError):
+            payload = {}
+        if not isinstance(payload, dict):
+            payload = {}
+        display_builder.apply_event(thread_id, event_type, payload)
+
+
+def _create_thread_sandbox_resources(
+    thread_id: str,
+    sandbox_type: str,
+    recipe: dict[str, Any] | None,
+    cwd: str | None = None,
+) -> None:
     """Create volume, lease, and terminal eagerly so volume exists before file uploads."""
     from datetime import datetime
 
@@ -250,11 +488,11 @@ def _create_thread_sandbox_resources(thread_id: str, sandbox_type: str, recipe:
     terminal_repo = SQLiteTerminalRepo(db_path=sandbox_db)
     try:
         terminal_id = f"term-{uuid.uuid4().hex[:12]}"
-        # @@@initial-cwd - use project root for local, provider default for remote
+        # @@@initial-cwd - local threads own their requested cwd; remote threads start from provider defaults.
         from backend.web.core.config import LOCAL_WORKSPACE_ROOT
 
         if sandbox_type == "local":
-            initial_cwd = str(LOCAL_WORKSPACE_ROOT)
+            initial_cwd = cwd or str(LOCAL_WORKSPACE_ROOT)
         else:
             from backend.web.services.sandbox_service import build_provider_from_config_name
             from sandbox.manager import resolve_provider_cwd
@@ -271,43 +509,6 @@ def _create_thread_sandbox_resources(thread_id: str, sandbox_type: str, recipe:
         terminal_repo.close()
 
 
-def _resolve_existing_lease_cwd(lease_id: str, fallback_cwd: str | None) -> str:
-    if fallback_cwd:
-        return fallback_cwd
-
-    from backend.web.core.config import LOCAL_WORKSPACE_ROOT
-    from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-    from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-    terminal_repo = SQLiteTerminalRepo(db_path=resolve_role_db_path(SQLiteDBRole.SANDBOX))
-    try:
-        row = terminal_repo.get_latest_by_lease(lease_id)
-    finally:
-        terminal_repo.close()
-    if row and row.get("cwd"):
-        return str(row["cwd"])
-
-    return str(LOCAL_WORKSPACE_ROOT)
-
-
-def _bind_thread_to_existing_lease(thread_id: str, lease_id: str, *, cwd: str | None) -> str:
-    from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
-    from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-    initial_cwd = _resolve_existing_lease_cwd(lease_id, cwd)
-    terminal_repo = SQLiteTerminalRepo(db_path=resolve_role_db_path(SQLiteDBRole.SANDBOX))
-    try:
-        terminal_repo.create(
-            terminal_id=f"term-{uuid.uuid4().hex[:12]}",
-            thread_id=thread_id,
-            lease_id=lease_id,
-            initial_cwd=initial_cwd,
-        )
-    finally:
-        terminal_repo.close()
-    return initial_cwd
-
-
 def _create_owned_thread(
     app: Any,
     owner_user_id: str,
@@ -390,7 +591,7 @@ def _create_owned_thread(
 
     if selected_lease_id:
         # @@@reuse-lease-binding - Reuse an existing lease by attaching a fresh terminal for the new thread.
-        bound_cwd = _bind_thread_to_existing_lease(
+        bound_cwd = bind_thread_to_existing_lease(
             new_thread_id,
             selected_lease_id,
             cwd=payload.cwd,
@@ -403,6 +604,7 @@ def _create_owned_thread(
             new_thread_id,
             sandbox_type,
             payload.recipe.model_dump() if payload.recipe else None,
+            payload.cwd,
         )
 
     if selected_lease_id and owned_lease is not None:
@@ -448,6 +650,9 @@ async def create_thread(
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any] | JSONResponse:
     """Create a new child thread for an agent member."""
+    provider_error = _validate_sandbox_provider_gate(app, user_id, payload)
+    if provider_error is not None:
+        return provider_error
     # Validate bind_mounts capability before creating thread
     sandbox_type = payload.sandbox or "local"
     requested_mounts = payload.bind_mounts if payload.bind_mounts else []
@@ -477,7 +682,15 @@ async def resolve_main_thread(
     existing = app.state.thread_repo.get_main_thread(payload.member_id)
     if existing is None:
         return {"thread": None}
-    return {"thread": _thread_payload(app, existing["id"], existing.get("sandbox_type", "local"))}
+    try:
+        return {"thread": _thread_payload(app, existing["id"], existing.get("sandbox_type", "local"))}
+    except HTTPException as exc:
+        # @@@orphan-main-thread - stale bootstrap data can leave the member pointing at a thread whose
+        # member/entity rows are gone. Treat that as "no resolvable main thread" instead of surfacing a 500.
+        if exc.status_code == 500 and "missing member/entity" in str(exc.detail):
+            logger.warning("resolve_main_thread ignored orphaned main thread %s for member %s", existing["id"], payload.member_id)
+            return {"thread": None}
+        raise
 
 
 @router.get("/default-config")
@@ -518,6 +731,8 @@ async def list_threads(
     threads = []
     for t in raw:
         tid = t["id"]
+        if _is_internal_child_thread(tid):
+            continue
         sandbox_type = t.get("sandbox_type", "local")
         # Check if agent is currently running — pool key is "{thread_id}:{sandbox_type}"
         running = False
@@ -562,26 +777,10 @@ async def get_thread_messages(
     @@@display-builder — returns pre-computed ChatEntry[] from DisplayBuilder.
     Hot path: return in-memory state.  Cold path: rebuild from checkpoint.
     """
-    display_builder = app.state.display_builder
     sandbox_type = resolve_thread_sandbox(app, thread_id)
     agent = await get_or_create_agent(app, sandbox_type, thread_id=thread_id)
-
-    # Hot path: return cached display entries
-    entries = display_builder.get_entries(thread_id)
-    if entries is None:
-        # Cold path: rebuild from checkpoint
-        set_current_thread_id(thread_id)
-        config = {"configurable": {"thread_id": thread_id}}
-        state = await agent.agent.aget_state(config)
-        values = getattr(state, "values", {}) if state else {}
-        messages = values.get("messages", []) if isinstance(values, dict) else []
-        serialized = [serialize_message(msg) for msg in messages]
-
-        from core.runtime.visibility import annotate_owner_visibility
-
-        annotated, _ = annotate_owner_visibility(serialized)
-        entries = display_builder.build_from_checkpoint(thread_id, annotated)
-
+    display_builder = app.state.display_builder
+    entries = await _get_thread_display_entries(app, thread_id)
     sandbox_info = get_sandbox_info(agent, thread_id, sandbox_type)
     return {
         "thread_id": thread_id,
@@ -647,6 +846,28 @@ async def delete_thread(
     return {"ok": True, "thread_id": thread_id}
 
 
+@router.post("/{thread_id}/clear")
+async def clear_thread_history(
+    thread_id: str,
+    user_id: Annotated[str, Depends(verify_thread_owner)],
+    app: Annotated[Any, Depends(get_app)] = None,
+) -> dict[str, Any]:
+    """Clear replayable thread history while preserving the thread itself."""
+    sandbox_type = resolve_thread_sandbox(app, thread_id)
+
+    lock = await get_thread_lock(app, thread_id)
+    async with lock:
+        agent = await get_or_create_agent(app, sandbox_type, thread_id=thread_id)
+        if hasattr(agent, "runtime") and agent.runtime.current_state == AgentState.ACTIVE:
+            raise HTTPException(status_code=409, detail="Cannot clear thread while run is in progress")
+        await agent.aclear_thread(thread_id)
+
+    app.state.display_builder.clear(thread_id)
+    app.state.thread_event_buffers.pop(thread_id, None)
+    app.state.queue_manager.clear_all(thread_id)
+    return {"ok": True, "thread_id": thread_id}
+
+
 @router.post("/{thread_id}/messages")
 async def send_message(
     thread_id: str,
@@ -705,7 +926,7 @@ async def get_thread_history(
     thread_id: str,
     limit: int = 20,
     truncate: int = 300,
-    user_id: Annotated[str, Depends(verify_thread_owner)] = None,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any]:
     """Compact conversation history for debugging — no raw LangChain noise.
@@ -759,7 +980,7 @@ def _expand(msg: Any) -> list[dict[str, Any]]:
             text = extract_text_content(msg.content)
             if text:
                 entries.append({"role": "assistant", "text": _trunc(text)})
-            return entries or [{"role": "assistant", "text": ""}]
+            return entries
         if cls == "ToolMessage":
             return [
                 {
@@ -782,11 +1003,148 @@ def _expand(msg: Any) -> list[dict[str, Any]]:
     }
 
 
+@router.get("/{thread_id}/permissions")
+async def get_thread_permissions(
+    thread_id: str,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    thread_lock: Annotated[asyncio.Lock | None, Depends(get_thread_lock)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+) -> dict[str, Any]:
+    # @@@permission-state-lock - owner polling and resolve can race on idle
+    # threads. Serialize the lightweight /permissions read with resolve/persist
+    # so stale checkpoint hydration cannot resurrect an already-resolved request.
+    async with thread_lock or _NoopAsyncLock():
+        await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+        rule_state = agent.get_thread_permission_rules(thread_id)
+        return {
+            "thread_id": thread_id,
+            "requests": agent.get_pending_permission_requests(thread_id),
+            "session_rules": rule_state["rules"],
+            "managed_only": rule_state["managed_only"],
+        }
+
+
+@router.post("/{thread_id}/permissions/{request_id}/resolve")
+async def resolve_thread_permission_request(
+    thread_id: str,
+    request_id: str,
+    payload: ResolvePermissionRequest,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+    app: Annotated[Any, Depends(get_app)] = None,
+    thread_lock: Annotated[asyncio.Lock | None, Depends(get_thread_lock)] = None,
+) -> dict[str, Any]:
+    async with thread_lock or _NoopAsyncLock():
+        await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+        pending_requests = {
+            item.get("request_id"): item
+            for item in agent.get_pending_permission_requests(thread_id)
+            if isinstance(item, dict) and item.get("request_id")
+        }
+        pending_request = pending_requests.get(request_id)
+        is_ask_user_question = bool(pending_request and pending_request.get("tool_name") == "AskUserQuestion")
+        answers = _serialize_permission_answers(payload)
+        if is_ask_user_question and payload.decision == "allow" and not answers:
+            raise HTTPException(status_code=400, detail="AskUserQuestion answers are required when approving the request")
+        ok = agent.resolve_permission_request(
+            request_id,
+            decision=payload.decision,
+            message=payload.message,
+            answers=answers,
+            annotations=getattr(payload, "annotations", None),
+        )
+        if not ok:
+            raise HTTPException(status_code=404, detail="Permission request not found")
+        await agent.agent.apersist_state(thread_id)
+        if is_ask_user_question and payload.decision == "allow" and answers is not None:
+            # @@@ask-user-lifecycle - the owner's answer is about to become a
+            # real follow-up user message. Clear the old request before that
+            # run starts so checkpoint replay cannot resurrect the popup.
+            agent.drop_permission_request(request_id)
+            await agent.agent.apersist_state(thread_id)
+
+    followup: dict[str, Any] | None = None
+    if is_ask_user_question and payload.decision == "allow" and pending_request is not None and answers is not None:
+        from backend.web.services.message_routing import route_message_to_brain
+
+        followup = await route_message_to_brain(
+            app,
+            thread_id,
+            _format_ask_user_question_followup(
+                pending_request,
+                answers=answers,
+                annotations=getattr(payload, "annotations", None),
+            ),
+            source="owner",
+        )
+
+    response = {"ok": True, "thread_id": thread_id, "request_id": request_id}
+    if followup is not None:
+        response["followup"] = followup
+    return response
+
+
+@router.post("/{thread_id}/permissions/rules")
+async def add_thread_permission_rule(
+    thread_id: str,
+    payload: ThreadPermissionRuleRequest,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+) -> dict[str, Any]:
+    await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+    rule_state = agent.get_thread_permission_rules(thread_id)
+    if rule_state["managed_only"]:
+        raise HTTPException(status_code=409, detail="Managed permission rules only; session overrides are disabled")
+    ok = agent.add_thread_permission_rule(
+        thread_id,
+        behavior=payload.behavior,
+        tool_name=payload.tool_name,
+    )
+    if not ok:
+        raise HTTPException(status_code=400, detail="Could not add thread permission rule")
+    await agent.agent.apersist_state(thread_id)
+    updated = agent.get_thread_permission_rules(thread_id)
+    return {
+        "ok": True,
+        "thread_id": thread_id,
+        "scope": "session",
+        "rules": updated["rules"],
+        "managed_only": updated["managed_only"],
+    }
+
+
+@router.delete("/{thread_id}/permissions/rules/{behavior}/{tool_name}")
+async def delete_thread_permission_rule(
+    thread_id: str,
+    behavior: str,
+    tool_name: str,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
+    agent: Annotated[Any, Depends(get_thread_agent)] = None,
+) -> dict[str, Any]:
+    await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+    ok = agent.remove_thread_permission_rule(
+        thread_id,
+        behavior=behavior,
+        tool_name=tool_name,
+    )
+    if not ok:
+        raise HTTPException(status_code=404, detail="Thread permission rule not found")
+    await agent.agent.apersist_state(thread_id)
+    updated = agent.get_thread_permission_rules(thread_id)
+    return {
+        "ok": True,
+        "thread_id": thread_id,
+        "scope": "session",
+        "rules": updated["rules"],
+        "managed_only": updated["managed_only"],
+    }
+
+
 @router.get("/{thread_id}/runtime")
 async def get_thread_runtime(
     thread_id: str,
     stream: bool = False,
-    user_id: Annotated[str, Depends(verify_thread_owner)] = None,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> dict[str, Any]:
     """Get runtime status for a thread."""
@@ -931,17 +1289,12 @@ async def stream_thread_events(
     app: Annotated[Any, Depends(get_app)] = None,
 ) -> EventSourceResponse:
     """Persistent SSE event stream — uses ?token= for auth (EventSource can't set headers)."""
-    from backend.web.core.dependencies import _DEV_PAYLOAD, _DEV_SKIP_AUTH
-
-    if _DEV_SKIP_AUTH:
-        sse_user_id = _DEV_PAYLOAD["user_id"]
-    else:
-        if not token:
-            raise HTTPException(401, "Missing token")
-        try:
-            sse_user_id = app.state.auth_service.verify_token(token)["user_id"]
-        except ValueError as e:
-            raise HTTPException(401, str(e))
+    if not token:
+        raise HTTPException(401, "Missing token")
+    try:
+        sse_user_id = app.state.auth_service.verify_token(token)["user_id"]
+    except ValueError as e:
+        raise HTTPException(401, str(e))
     thread = app.state.thread_repo.get_by_id(thread_id)
     if not thread:
         raise HTTPException(404, "Thread not found")
@@ -995,7 +1348,7 @@ async def stream_thread_events(
 @router.post("/{thread_id}/runs/cancel")
 async def cancel_run(
     thread_id: str,
-    user_id: Annotated[str, Depends(verify_thread_owner)] = None,
+    user_id: Annotated[str | None, Depends(verify_thread_owner)] = None,
     app: Annotated[Any, Depends(get_app)] = None,
 ):
     """Cancel an active run for the given thread."""
@@ -1016,6 +1369,33 @@ def _get_background_runs(app: Any, thread_id: str) -> dict:
     return getattr(agent, "_background_runs", {}) if agent else {}
 
 
+def _background_run_type(run: Any) -> str:
+    return "bash" if run.__class__.__name__ == "_BashBackgroundRun" else "agent"
+
+
+def _serialize_background_run(task_id: str, run: Any, *, include_result: bool) -> dict[str, Any]:
+    run_type = _background_run_type(run)
+    result_text = run.get_result() if include_result and run.is_done else None
+    payload = {
+        "task_id": task_id,
+        "task_type": run_type,
+        "status": "completed" if run.is_done else "running",
+        "command_line": getattr(run, "command", None) if run_type == "bash" else None,
+    }
+    if include_result:
+        payload["result"] = result_text
+        payload["text"] = result_text
+        return payload
+    payload["description"] = getattr(run, "description", None)
+    payload["exit_code"] = getattr(getattr(run, "_cmd", None), "exit_code", None) if run_type == "bash" else None
+    payload["error"] = None
+    return payload
+
+
+async def _get_display_task_map(app: Any, thread_id: str) -> dict[str, dict[str, Any]]:
+    return _collect_display_subagent_tasks(await _get_thread_display_entries(app, thread_id))
+
+
 @router.get("/{thread_id}/tasks")
 async def list_tasks(
     thread_id: str,
@@ -1023,18 +1403,20 @@ async def list_tasks(
 ) -> list[dict]:
     """列出线程的所有后台 run（bash + agent）"""
     runs = _get_background_runs(request.app, thread_id)
-    result = []
-    for task_id, run in runs.items():
-        run_type = "bash" if run.__class__.__name__ == "_BashBackgroundRun" else "agent"
+    result = [_serialize_background_run(task_id, run, include_result=False) for task_id, run in runs.items()]
+    seen_task_ids = set(runs)
+    for task_id, task in (await _get_display_task_map(request.app, thread_id)).items():
+        if task_id in seen_task_ids:
+            continue
         result.append(
             {
-                "task_id": task_id,
-                "task_type": run_type,
-                "status": "completed" if run.is_done else "running",
-                "command_line": getattr(run, "command", None) if run_type == "bash" else None,
-                "description": getattr(run, "description", None),
-                "exit_code": getattr(getattr(run, "_cmd", None), "exit_code", None) if run_type == "bash" else None,
-                "error": None,
+                "task_id": task["task_id"],
+                "task_type": task["task_type"],
+                "status": task["status"],
+                "command_line": task["command_line"],
+                "description": task["description"],
+                "exit_code": task["exit_code"],
+                "error": task["error"],
             }
         )
     return result
@@ -1050,18 +1432,19 @@ async def get_task(
     runs = _get_background_runs(request.app, thread_id)
     run = runs.get(task_id)
     if not run:
-        raise HTTPException(status_code=404, detail="Task not found")
+        task = (await _get_display_task_map(request.app, thread_id)).get(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        return {
+            "task_id": task["task_id"],
+            "task_type": task["task_type"],
+            "status": task["status"],
+            "command_line": task["command_line"],
+            "result": task["result"],
+            "text": task["text"],
+        }
 
-    run_type = "bash" if run.__class__.__name__ == "_BashBackgroundRun" else "agent"
-    result_text = run.get_result() if run.is_done else None
-    return {
-        "task_id": task_id,
-        "task_type": run_type,
-        "status": "completed" if run.is_done else "running",
-        "command_line": getattr(run, "command", None) if run_type == "bash" else None,
-        "result": result_text,
-        "text": result_text,
-    }
+    return _serialize_background_run(task_id, run, include_result=True)
 
 
 @router.post("/{thread_id}/tasks/{task_id}/cancel")
@@ -1074,7 +1457,16 @@ async def cancel_task(
     runs = _get_background_runs(request.app, thread_id)
     run = runs.get(task_id)
     if not run:
-        raise HTTPException(status_code=404, detail="Task not found")
+        task = (await _get_display_task_map(request.app, thread_id)).get(task_id)
+        if task is None:
+            raise HTTPException(status_code=404, detail="Task not found")
+        if task["status"] != "running":
+            raise HTTPException(status_code=400, detail="Task is not running")
+        thread_task = request.app.state.thread_tasks.get(thread_id)
+        if thread_task is None or thread_task.done():
+            raise HTTPException(status_code=400, detail="Task is not independently cancellable")
+        thread_task.cancel()
+        return {"ok": True, "message": "Run cancellation requested", "task_id": task_id}
     if run.is_done:
         raise HTTPException(status_code=400, detail="Task is not running")
 
@@ -1112,7 +1504,7 @@ async def _notify_task_cancelled(app: Any, thread_id: str, task_id: str, run: An
             agent_id=task_id,
             agent_name=f"cancel-{task_id[:8]}",
         )
-        await emit_fn(
+        emission = emit_fn(
             {
                 "event": "task_done",
                 "data": json.dumps(
@@ -1125,6 +1517,8 @@ async def _notify_task_cancelled(app: Any, thread_id: str, task_id: str, run: An
                 ),
             }
         )
+        if asyncio.iscoroutine(emission):
+            await emission
     except Exception:
         logger.warning("Failed to emit task_done for cancelled task %s", task_id, exc_info=True)
 
diff --git a/backend/web/services/agent_pool.py b/backend/web/services/agent_pool.py
index 50ecb5dbf..e49b70135 100644
--- a/backend/web/services/agent_pool.py
+++ b/backend/web/services/agent_pool.py
@@ -1,18 +1,22 @@
 """Agent pool management service."""
 
 import asyncio
+import logging
 import os
 from pathlib import Path
 from typing import Any
 
 from fastapi import FastAPI
 
+from config.user_paths import preferred_existing_user_home_path
 from core.identity.agent_registry import get_or_create_agent_id
 from core.runtime.agent import create_leon_agent
 from sandbox.manager import lookup_sandbox_for_thread
 from sandbox.thread_context import set_current_thread_id
 from storage.runtime import build_storage_container
 
+logger = logging.getLogger(__name__)
+
 # Thread lock for config updates
 _config_update_locks: dict[str, asyncio.Lock] = {}
 _agent_create_locks: dict[str, asyncio.Lock] = {}
@@ -23,9 +27,14 @@ def create_agent_sync(
     workspace_root: Path | None = None,
     model_name: str | None = None,
     agent: str | None = None,
+    bundle_dir: Path | None = None,
+    thread_repo: Any = None,
+    entity_repo: Any = None,
+    member_repo: Any = None,
     queue_manager: Any = None,
     chat_repos: dict | None = None,
     extra_allowed_paths: list[str] | None = None,
+    web_app: Any = None,
 ) -> Any:
     """Create a LeonAgent with the given sandbox. Runs in a thread."""
     storage_container = build_storage_container(
@@ -41,10 +50,16 @@ def create_agent_sync(
         workspace_root=workspace_root or Path.cwd(),
         sandbox=sandbox_name if sandbox_name != "local" else None,
         storage_container=storage_container,
+        permission_resolver_scope="thread",
+        thread_repo=thread_repo,
+        entity_repo=entity_repo,
+        member_repo=member_repo,
         queue_manager=queue_manager,
         chat_repos=chat_repos,
+        web_app=web_app,
         verbose=True,
         agent=agent,
+        bundle_dir=bundle_dir,
         extra_allowed_paths=extra_allowed_paths,
     )
 
@@ -76,11 +91,27 @@ async def get_or_create_agent(app_obj: FastAPI, sandbox_type: str, thread_id: st
         thread_data = app_obj.state.thread_repo.get_by_id(thread_id) if hasattr(app_obj.state, "thread_repo") else None
         if sandbox_type == "local":
             cwd = app_obj.state.thread_cwd.get(thread_id)
+            cwd_from_live_map = cwd is not None
             if not cwd and thread_data and thread_data.get("cwd"):
                 cwd = thread_data["cwd"]
-                app_obj.state.thread_cwd[thread_id] = cwd
             if cwd:
-                workspace_root = Path(cwd).resolve()
+                path = Path(cwd).expanduser()
+                # @@@fresh-local-cwd-owns-workspace - a cwd chosen in this live backend session is
+                # the caller contract for local threads; create it instead of silently falling
+                # back to the repo root. Persisted paths from another host stay advisory.
+                if cwd_from_live_map:
+                    path.mkdir(parents=True, exist_ok=True)
+                    workspace_root = path.resolve()
+                    app_obj.state.thread_cwd[thread_id] = str(workspace_root)
+                # @@@host-local-cwd-is-advisory - persisted local thread cwd can come from another
+                # host (for example a macOS path stored in shared Supabase but replayed inside a
+                # Linux staging container). Only pin workspace_root when that path exists here.
+                elif path.exists() and path.is_dir():
+                    workspace_root = path.resolve()
+                    app_obj.state.thread_cwd[thread_id] = str(workspace_root)
+                else:
+                    app_obj.state.thread_cwd.pop(thread_id, None)
+                    logger.warning("Ignoring unavailable local cwd for thread %s: %s", thread_id, cwd)
 
         # Look up model for this thread (threads table → preferences default)
         model_name = thread_data.get("model") if thread_data else None
@@ -93,6 +124,11 @@ async def get_or_create_agent(app_obj: FastAPI, sandbox_type: str, thread_id: st
         # @@@agent-vs-member - thread_config.agent stores a member ID (e.g. "__leon__") for display,
         # NOT an agent type name ("bash", "general", etc.). Never pass it to create_leon_agent.
         agent_name = agent  # explicit caller-provided type only; None → default Leon agent
+        bundle_dir = None
+        if thread_data and thread_data.get("member_id"):
+            member_dir = preferred_existing_user_home_path("members", str(thread_data["member_id"]))
+            if member_dir.is_dir():
+                bundle_dir = member_dir.resolve()
 
         # @@@chat-repos - construct chat_repos for ChatToolService if entity system is available
         chat_repos = None
@@ -136,12 +172,24 @@ async def get_or_create_agent(app_obj: FastAPI, sandbox_type: str, thread_id: st
         except FileNotFoundError:
             pass
 
-        extra_allowed_paths = extra_allowed_paths or None
+        extra_allowed_paths_or_none: list[str] | None = extra_allowed_paths or None
 
         # @@@ agent-init-thread - LeonAgent.__init__ uses run_until_complete, must run in thread
         qm = getattr(app_obj.state, "queue_manager", None)
         agent_obj = await asyncio.to_thread(
-            create_agent_sync, sandbox_type, workspace_root, model_name, agent_name, qm, chat_repos, extra_allowed_paths
+            create_agent_sync,
+            sandbox_name=sandbox_type,
+            workspace_root=workspace_root,
+            model_name=model_name,
+            agent=agent_name,
+            bundle_dir=bundle_dir,
+            thread_repo=getattr(app_obj.state, "thread_repo", None),
+            entity_repo=getattr(app_obj.state, "entity_repo", None),
+            member_repo=getattr(app_obj.state, "member_repo", None),
+            queue_manager=qm,
+            chat_repos=chat_repos,
+            extra_allowed_paths=extra_allowed_paths_or_none,
+            web_app=app_obj,
         )
         member = agent_name or "leon"
         agent_id = get_or_create_agent_id(
diff --git a/backend/web/services/auth_service.py b/backend/web/services/auth_service.py
index 85c9c21c6..9467b7e4a 100644
--- a/backend/web/services/auth_service.py
+++ b/backend/web/services/auth_service.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import time
+from collections.abc import Callable
 
 import jwt
 
@@ -22,12 +23,16 @@ def __init__(
         accounts: AccountRepo,
         entities: EntityRepo,
         supabase_client=None,
+        supabase_auth_client=None,
+        supabase_auth_client_factory: Callable[[], object] | None = None,
         invite_codes: InviteCodeRepo | None = None,
     ) -> None:
         self._members = members
         self._accounts = accounts
         self._entities = entities
-        self._sb = supabase_client  # None in sqlite-only mode
+        self._sb = supabase_client  # storage/service-role client
+        self._sb_auth = supabase_auth_client  # end-user auth client
+        self._sb_auth_factory = supabase_auth_client_factory
         self._invite_codes = invite_codes
 
     # ------------------------------------------------------------------
@@ -39,6 +44,7 @@ def __init__(
 
     def send_otp(self, email: str, password: str, invite_code: str) -> None:
         """Validate invite code, create user via signUp (sends confirmation OTP to email)."""
+        auth_client = self._auth_api(self._require_auth_client())
         if self._sb is None:
             raise RuntimeError("Supabase client required.")
         if self._invite_codes is None or not self._invite_codes.is_valid(invite_code):
@@ -46,7 +52,7 @@ def send_otp(self, email: str, password: str, invite_code: str) -> None:
         from supabase_auth.errors import AuthApiError
 
         try:
-            self._sb.auth.sign_up({"email": email, "password": password})
+            auth_client.sign_up({"email": email, "password": password})
         except AuthApiError as e:
             msg = e.message or ""
             if "already registered" in msg or "already exists" in msg:
@@ -55,12 +61,13 @@ def send_otp(self, email: str, password: str, invite_code: str) -> None:
 
     def verify_register_otp(self, email: str, token: str) -> dict:
         """Verify signup OTP. Returns temp_token to be used in complete_register."""
+        auth_client = self._auth_api(self._require_auth_client())
         if self._sb is None:
             raise RuntimeError("Supabase client required.")
         from supabase_auth.errors import AuthApiError
 
         try:
-            resp = self._sb.auth.verify_otp({"email": email, "token": token, "type": "signup"})
+            resp = auth_client.verify_otp({"email": email, "token": token, "type": "signup"})
         except AuthApiError as e:
             raise ValueError(f"验证码错误: {e.message}") from e
         if resp.user is None or resp.session is None:
@@ -129,8 +136,7 @@ def complete_register(self, temp_token: str, invite_code: str) -> dict:
 
     def login(self, identifier: str, password: str) -> dict:
         """Login with email or mycel_id + password."""
-        if self._sb is None:
-            raise RuntimeError("Supabase client required for login. Set LEON_STORAGE_STRATEGY=supabase.")
+        auth_client = self._auth_api(self._require_auth_client())
 
         # Resolve email
         email = self._resolve_email(identifier)
@@ -139,7 +145,7 @@ def login(self, identifier: str, password: str) -> dict:
 
         # Sign in via Supabase
         try:
-            resp = self._sb.auth.sign_in_with_password({"email": email, "password": password})
+            resp = auth_client.sign_in_with_password({"email": email, "password": password})
         except AuthApiError:
             raise ValueError("邮箱或密码错误")
         if resp.user is None or resp.session is None:
@@ -174,7 +180,17 @@ def login(self, identifier: str, password: str) -> dict:
         }
 
     def verify_token(self, token: str) -> dict:
-        """Verify Supabase JWT. Returns {user_id}."""
+        """Verify Supabase JWT. Returns {user_id, entity_id}."""
+        auth_client = self._sb_auth_factory() if self._sb_auth_factory is not None else self._sb_auth
+        if auth_client is not None:
+            auth_api = self._auth_api(auth_client)
+            try:
+                user_resp = auth_api.get_user(token)
+            except Exception as e:
+                raise ValueError(f"Token 无效: {e}") from e
+            if user_resp is None or getattr(user_resp, "user", None) is None:
+                raise ValueError("Token 无效: user not found")
+            return {"user_id": str(user_resp.user.id), "entity_id": None}
         jwt_secret = os.getenv("SUPABASE_JWT_SECRET")
         if not jwt_secret:
             raise RuntimeError("SUPABASE_JWT_SECRET env var required for token verification.")
@@ -204,6 +220,16 @@ def _resolve_email(self, identifier: str) -> str:
             return member.email
         return identifier.strip()
 
+    def _require_auth_client(self):
+        if self._sb_auth_factory is not None:
+            return self._sb_auth_factory()
+        if self._sb_auth is None:
+            raise RuntimeError("Supabase auth client required. Configure SUPABASE_ANON_KEY for auth runtime.")
+        return self._sb_auth
+
+    def _auth_api(self, auth_client):
+        return getattr(auth_client, "auth", auth_client)
+
     def _create_initial_agents(self, owner_user_id: str, now: float) -> dict | None:
         """Create Toad and Morel agents for a new user. Returns first agent info."""
         from pathlib import Path
diff --git a/backend/web/services/display_builder.py b/backend/web/services/display_builder.py
index 25f034ed5..c6b24bc5f 100644
--- a/backend/web/services/display_builder.py
+++ b/backend/web/services/display_builder.py
@@ -38,18 +38,46 @@
 # Helpers — ported from message-mapper.ts
 # ---------------------------------------------------------------------------
 
-_CHAT_MESSAGE_RE = re.compile(r"<chat-message[^>]*>([\s\S]*?)</chat-message>")
-
-
-def _extract_chat_message(text: str) -> str | None:
-    m = _CHAT_MESSAGE_RE.search(text)
-    return m.group(1).strip() if m else None
+_TASK_NOTIFICATION_RUN_ID_RE = re.compile(r"<run-id>(.*?)</run-id>", re.IGNORECASE | re.DOTALL)
+_TASK_NOTIFICATION_STATUS_RE = re.compile(r"<status>(.*?)</status>", re.IGNORECASE | re.DOTALL)
 
 
 def _make_id(prefix: str = "db") -> str:
     return f"{prefix}-{uuid.uuid4().hex[:12]}"
 
 
+def _extract_terminal_task_status(notification_type: str | None, text: str) -> tuple[str | None, str | None]:
+    if notification_type != "agent" or "<task-notification>" not in text:
+        return None, None
+    task_match = _TASK_NOTIFICATION_RUN_ID_RE.search(text)
+    status_match = _TASK_NOTIFICATION_STATUS_RE.search(text)
+    task_id = task_match.group(1).strip() if task_match else None
+    status = status_match.group(1).strip().lower() if status_match else None
+    return task_id, status
+
+
+def _reconcile_subagent_stream_status(
+    entries: list[dict],
+    current_turn: dict | None,
+    task_id: str,
+    status: str,
+) -> None:
+    # @@@checkpoint-status-reconcile - idle detail rebuild only sees persisted
+    # checkpoint messages, not live task_done events. If a later persisted
+    # terminal notification names the child task, reconcile the earlier Agent
+    # subagent_stream status so cold rebuild does not regress it back to running.
+    turns: list[dict] = []
+    if current_turn is not None:
+        turns.append(current_turn)
+    turns.extend(entry for entry in reversed(entries) if entry.get("role") == "assistant" and entry is not current_turn)
+    for turn in turns:
+        for seg in turn.get("segments", []):
+            stream = seg.get("step", {}).get("subagent_stream")
+            if seg.get("type") == "tool" and stream and stream.get("task_id") == task_id:
+                stream["status"] = status
+                return
+
+
 # ---------------------------------------------------------------------------
 # Entry builders
 # ---------------------------------------------------------------------------
@@ -89,6 +117,23 @@ def _append_to_turn(turn: dict, msg_id: str, segments: list[dict]) -> None:
     turn.setdefault("messageIds", []).append(msg_id)
 
 
+def _build_subagent_stream(
+    *,
+    task_id: str,
+    thread_id: str,
+    description: str | None,
+    status: str,
+) -> dict[str, Any]:
+    return {
+        "task_id": task_id,
+        "thread_id": thread_id,
+        "description": description,
+        "text": "",
+        "tool_calls": [],
+        "status": status,
+    }
+
+
 # ---------------------------------------------------------------------------
 # ThreadDisplay — per-thread in-memory state
 # ---------------------------------------------------------------------------
@@ -242,6 +287,9 @@ def _handle_human(
         if source == "system" or (source == "external" and ntype == "chat"):
             content = _extract_text_content(msg.get("content"))
             msg_run_id = meta.get("run_id") or None
+            task_id, task_status = _extract_terminal_task_status(ntype, content)
+            if task_id and task_status:
+                _reconcile_subagent_stream_status(entries, current_turn, task_id, task_status)
 
             # Fold into current turn if same run
             if current_turn and (not msg_run_id or msg_run_id == current_run_id):
@@ -332,19 +380,12 @@ def _handle_tool(self, msg: dict, _i: int, current_turn: dict | None, _now: int)
                 seg["step"]["result"] = content_str
                 seg["step"]["status"] = "done"
 
-                # Restore subagent_stream from metadata
                 meta = msg.get("metadata") or {}
-                task_id = meta.get("task_id")
-                sub_thread = meta.get("subagent_thread_id") or (f"subagent-{task_id}" if task_id else None)
-
-                if not task_id and seg["step"].get("name") == "Agent":
-                    try:
-                        parsed = json.loads(content_str)
-                        if isinstance(parsed, dict) and parsed.get("task_id"):
-                            task_id = parsed["task_id"]
-                            sub_thread = parsed.get("thread_id") or f"subagent-{task_id}"
-                    except (json.JSONDecodeError, TypeError):
-                        pass
+                task_id, sub_thread, task_status = _extract_subagent_stream_identity(
+                    seg["step"].get("name"),
+                    meta,
+                    content_str,
+                )
 
                 if sub_thread and not seg["step"].get("subagent_stream"):
                     seg["step"]["subagent_stream"] = {
@@ -353,7 +394,7 @@ def _handle_tool(self, msg: dict, _i: int, current_turn: dict | None, _now: int)
                         "description": meta.get("description"),
                         "text": "",
                         "tool_calls": [],
-                        "status": "completed",
+                        "status": task_status,
                     }
                 break
 
@@ -502,18 +543,18 @@ def _handle_tool_result(td: ThreadDisplay, data: dict) -> dict | None:
             seg["step"]["result"] = result
             seg["step"]["status"] = "done"
 
-            # Subagent stream tracking
-            task_id = metadata.get("task_id")
-            sub_thread = metadata.get("subagent_thread_id") or (f"subagent-{task_id}" if task_id else None)
+            task_id, sub_thread, task_status = _extract_subagent_stream_identity(
+                seg["step"].get("name"),
+                metadata,
+                result,
+            )
             if sub_thread and not seg["step"].get("subagent_stream"):
-                seg["step"]["subagent_stream"] = {
-                    "task_id": task_id or "",
-                    "thread_id": sub_thread,
-                    "description": metadata.get("description"),
-                    "text": "",
-                    "tool_calls": [],
-                    "status": "running",
-                }
+                seg["step"]["subagent_stream"] = _build_subagent_stream(
+                    task_id=task_id or "",
+                    thread_id=sub_thread,
+                    description=metadata.get("description"),
+                    status=task_status,
+                )
 
             return {
                 "type": "update_segment",
@@ -526,8 +567,15 @@ def _handle_tool_result(td: ThreadDisplay, data: dict) -> dict | None:
 def _handle_notice(td: ThreadDisplay, data: dict) -> dict | None:
     content = data.get("content", "")
     ntype = data.get("notification_type")
+    task_id, task_status = _extract_terminal_task_status(ntype, content)
 
     turn = _get_current_turn(td)
+    if task_id and task_status:
+        # @@@live-notice-status-reconcile - live parent detail stays on the
+        # in-memory display while the followthrough run is still active, so the
+        # terminal notice must reconcile the earlier Agent step immediately
+        # instead of waiting for a later cold rebuild from checkpoint.
+        _reconcile_subagent_stream_status(td.entries, turn, task_id, task_status)
     if turn:
         # Fold into current turn
         seg = {"type": "notice", "content": content, "notification_type": ntype}
@@ -629,22 +677,18 @@ def _handle_task_start(td: ThreadDisplay, data: dict) -> dict | None:
     task_id = data["task_id"]
     sub_thread = data.get("thread_id") or f"subagent-{task_id}"
 
-    # Find most recent Agent tool call without subagent_stream
+    # @@@late-task-start-race - background Agent tools can return their
+    # immediate "started" ToolMessage before the async task_start activity
+    # reaches the parent thread. Still patch the newest Agent step that
+    # has no child stream, even if its tool_result already marked it done.
     for seg in reversed(turn["segments"]):
-        if (
-            seg.get("type") == "tool"
-            and seg.get("step", {}).get("name") == "Agent"
-            and seg.get("step", {}).get("status") == "calling"
-            and not seg.get("step", {}).get("subagent_stream")
-        ):
-            seg["step"]["subagent_stream"] = {
-                "task_id": task_id,
-                "thread_id": sub_thread,
-                "description": data.get("description"),
-                "text": "",
-                "tool_calls": [],
-                "status": "running",
-            }
+        if seg.get("type") == "tool" and seg.get("step", {}).get("name") == "Agent" and not seg.get("step", {}).get("subagent_stream"):
+            seg["step"]["subagent_stream"] = _build_subagent_stream(
+                task_id=task_id,
+                thread_id=sub_thread,
+                description=data.get("description"),
+                status="running",
+            )
             idx = _find_seg_index(turn, seg["step"]["id"])
             return {
                 "type": "update_segment",
@@ -679,6 +723,28 @@ def _find_seg_index(turn: dict, tc_id: str) -> int:
     return -1
 
 
+def _extract_subagent_stream_identity(step_name: str | None, metadata: dict, content: str) -> tuple[str | None, str | None, str]:
+    task_id = metadata.get("task_id")
+    sub_thread = metadata.get("subagent_thread_id") or (f"subagent-{task_id}" if task_id else None)
+    task_status = "completed" if task_id else "running"
+
+    if task_id or step_name != "Agent":
+        return task_id, sub_thread, task_status
+
+    try:
+        parsed = json.loads(content)
+    except (json.JSONDecodeError, TypeError):
+        return task_id, sub_thread, task_status
+
+    if not isinstance(parsed, dict) or not parsed.get("task_id"):
+        return task_id, sub_thread, task_status
+
+    task_id = parsed["task_id"]
+    sub_thread = parsed.get("thread_id") or f"subagent-{task_id}"
+    task_status = parsed.get("status") or "running"
+    return task_id, sub_thread, task_status
+
+
 # Event type → handler
 _EVENT_HANDLERS: dict[str, Any] = {
     "user_message": _handle_user_message,
diff --git a/backend/web/services/idle_reaper.py b/backend/web/services/idle_reaper.py
index 90651365a..a739aa9fb 100644
--- a/backend/web/services/idle_reaper.py
+++ b/backend/web/services/idle_reaper.py
@@ -40,7 +40,7 @@ async def idle_reaper_loop(app_obj: FastAPI) -> None:
         try:
             count = await asyncio.to_thread(run_idle_reaper_once, app_obj)
             if count > 0:
-                print(f"[idle-reaper] paused+closed {count} expired chat session(s)")
+                print(f"[idle-reaper] reclaimed+closed {count} expired chat session(s)")
         except Exception as e:
             print(f"[idle-reaper] error: {e}")
         await asyncio.sleep(IDLE_REAPER_INTERVAL_SEC)
diff --git a/backend/web/services/message_routing.py b/backend/web/services/message_routing.py
index 7984e9552..328b10750 100644
--- a/backend/web/services/message_routing.py
+++ b/backend/web/services/message_routing.py
@@ -26,6 +26,7 @@ async def route_message_to_brain(
     ACTIVE → enqueue as steer
     """
     from backend.web.services.agent_pool import get_or_create_agent, resolve_thread_sandbox
+    from backend.web.services.resource_cache import clear_resource_overview_cache
     from backend.web.services.streaming_service import start_agent_run
 
     sandbox_type = resolve_thread_sandbox(app, thread_id)
@@ -74,4 +75,7 @@ async def route_message_to_brain(
         if attachments:
             meta["attachments"] = attachments
         run_id = start_agent_run(agent, thread_id, run_content, app, message_metadata=meta)
+        # @@@resource-cache-run-start - a fresh run can create or resume a lease immediately.
+        # Drop the cached resource snapshot so the next Resources read reflects the live topology.
+        clear_resource_overview_cache()
     return {"status": "started", "routing": "direct", "run_id": run_id, "thread_id": thread_id}
diff --git a/backend/web/services/profile_service.py b/backend/web/services/profile_service.py
index c6b755bde..60359431a 100644
--- a/backend/web/services/profile_service.py
+++ b/backend/web/services/profile_service.py
@@ -1,10 +1,11 @@
-"""Profile CRUD — config.json based."""
+"""Profile CRUD — config.json based, with auth-member override for signed-in shell."""
 
 import json
 from pathlib import Path
 from typing import Any
 
 from config.user_paths import preferred_existing_user_home_path, user_home_path
+from storage.contracts import MemberRow
 
 LEON_HOME = user_home_path()
 CONFIG_PATH = LEON_HOME / "config.json"
@@ -24,7 +25,23 @@ def _write_json(path: Path, data: Any) -> None:
     path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
 
 
-def get_profile() -> dict[str, Any]:
+def _initials_from_name(name: str) -> str:
+    stripped = name.strip()
+    if not stripped:
+        return "U"
+    compact = "".join(part[:1] for part in stripped.split() if part)
+    if len(compact) >= 2:
+        return compact[:2].upper()
+    return stripped[:2].upper()
+
+
+def get_profile(member: MemberRow | None = None) -> dict[str, Any]:
+    if member is not None:
+        return {
+            "name": member.name or "用户",
+            "initials": _initials_from_name(member.name or ""),
+            "email": member.email or "",
+        }
     cfg = _read_json(preferred_existing_user_home_path("config.json"), {})
     profile = cfg.get("profile", {})
     return {
diff --git a/backend/web/services/resource_cache.py b/backend/web/services/resource_cache.py
index 4b1d5f5fe..67875b4e8 100644
--- a/backend/web/services/resource_cache.py
+++ b/backend/web/services/resource_cache.py
@@ -55,6 +55,23 @@ def _with_refresh_metadata(
     return payload
 
 
+def _snapshot_drifted_from_live_sessions(snapshot: dict[str, Any]) -> bool:
+    live_stats = resource_service.visible_resource_session_stats()
+    for provider in snapshot.get("providers") or []:
+        provider_id = str(provider.get("id") or "")
+        current = live_stats.get(provider_id, {"sessions": 0, "running": 0})
+        cached_running = int(((provider.get("telemetry") or {}).get("running") or {}).get("used") or 0)
+        cached_sessions = len(provider.get("sessions") or [])
+        if cached_running != current["running"] or cached_sessions != current["sessions"]:
+            return True
+    for provider_id, current in live_stats.items():
+        if current["running"] or current["sessions"]:
+            cached = next((item for item in snapshot.get("providers") or [] if str(item.get("id") or "") == provider_id), None)
+            if cached is None:
+                return True
+    return False
+
+
 def refresh_resource_overview_sync() -> dict[str, Any]:
     """Refresh cached overview snapshot and return latest payload."""
     global _snapshot_cache
@@ -84,6 +101,11 @@ def get_resource_overview_snapshot() -> dict[str, Any]:
     with _snapshot_lock:
         cached = copy.deepcopy(_snapshot_cache)
     if cached is not None:
+        # @@@resource-cache-live-drift - durable session truth lands in sandbox.db after a run
+        # starts; if the cached Resources snapshot no longer matches visible lease/session
+        # counts, refresh synchronously instead of serving a stale zero-sandbox card.
+        if _snapshot_drifted_from_live_sessions(cached):
+            return refresh_resource_overview_sync()
         return cached
     # @@@cold-start-cache-fill - route fallback fills cache once to keep first call deterministic.
     return refresh_resource_overview_sync()
diff --git a/backend/web/services/resource_service.py b/backend/web/services/resource_service.py
index 236db63ab..6c0738215 100644
--- a/backend/web/services/resource_service.py
+++ b/backend/web/services/resource_service.py
@@ -23,6 +23,8 @@
     probe_and_upsert_for_instance,
 )
 from storage.models import map_lease_to_session_status
+from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
+from storage.runtime import build_member_repo, build_thread_repo
 
 _CONFIG_LOADER = SandboxConfigLoader(SANDBOXES_DIR)
 
@@ -72,7 +74,8 @@ def _resolve_console_url(provider_name: str, config_name: str, *, sandboxes_dir:
     if provider_name == "e2b":
         return "https://e2b.dev"
     if provider_name == "daytona":
-        daytona = payload.get("daytona") if isinstance(payload.get("daytona"), dict) else {}
+        raw_daytona = payload.get("daytona")
+        daytona = raw_daytona if isinstance(raw_daytona, dict) else {}
         target = str(daytona.get("target") or "").strip().lower()
         if target == "cloud":
             return "https://app.daytona.io"
@@ -216,17 +219,13 @@ def _to_session_metrics(snapshot: dict[str, Any] | None) -> dict[str, Any] | Non
 
 def _member_meta_map(member_repo: Any = None) -> dict[str, dict[str, str | None]]:
     """Build member_id → display metadata map from DB."""
+    repo = member_repo
+    own_repo = False
+    if repo is None:
+        repo = build_member_repo(main_db_path=resolve_role_db_path(SQLiteDBRole.MAIN))
+        own_repo = True
     try:
-        if member_repo is not None:
-            members = member_repo.list_all()
-        else:
-            from storage.providers.sqlite.member_repo import SQLiteMemberRepo
-
-            repo = SQLiteMemberRepo()
-            try:
-                members = repo.list_all()
-            finally:
-                repo.close()
+        members = repo.list_all()
         return {
             m.id: {
                 "member_name": m.name,
@@ -237,6 +236,9 @@ def _member_meta_map(member_repo: Any = None) -> dict[str, dict[str, str | None]
         }
     except Exception:
         return {}
+    finally:
+        if own_repo:
+            repo.close()
 
 
 def _thread_agent_refs(thread_ids: list[str], thread_repo: Any = None) -> dict[str, str]:
@@ -244,14 +246,11 @@ def _thread_agent_refs(thread_ids: list[str], thread_repo: Any = None) -> dict[s
     unique = sorted({tid for tid in thread_ids if tid})
     if not unique:
         return {}
-    if thread_repo is None:
-        from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-        repo = SQLiteThreadRepo()
+    repo = thread_repo
+    own_repo = False
+    if repo is None:
+        repo = build_thread_repo(main_db_path=resolve_role_db_path(SQLiteDBRole.MAIN))
         own_repo = True
-    else:
-        repo = thread_repo
-        own_repo = False
     try:
         refs: dict[str, str] = {}
         for tid in unique:
@@ -350,6 +349,69 @@ def _resolve_card_cpu_metric(provider_type: str, telemetry: dict[str, Any]) -> d
     return cpu
 
 
+def _is_resource_visible_thread(thread_id: str | None) -> bool:
+    raw = str(thread_id or "").strip()
+    if raw.startswith("subagent-"):
+        return False
+    return True
+
+
+def _resource_session_identity(session: dict[str, Any]) -> str:
+    lease_id = str(session.get("lease_id") or "")
+    thread_id = str(session.get("thread_id") or "")
+    if lease_id and thread_id:
+        # @@@resource-session-contract - resource cards are lease/thread scoped, not chat-session scoped.
+        # Terminal fallback rows can carry distinct session ids for the same visible lease+thread binding.
+        return f"{lease_id}:{thread_id}"
+    session_id = str(session.get("session_id") or "")
+    if session_id:
+        return session_id
+    return f"{lease_id}:{thread_id or 'unbound'}"
+
+
+def _project_user_visible_resource_sessions(repo: Any, rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Project raw monitor rows into the user-visible resource surface.
+
+    @@@user-visible-resource-projection - raw monitor rows may be bound to a newer
+    subagent terminal even though the lease still belongs to a user-visible parent
+    thread. Keep raw monitor truth in the repo; only the Resources UI gets this
+    parent-thread preference.
+    """
+    grouped: dict[str, list[dict[str, Any]]] = {}
+    for row in rows:
+        lease_id = str(row.get("lease_id") or "")
+        grouped.setdefault(lease_id, []).append(dict(row))
+
+    projected: list[dict[str, Any]] = []
+    for lease_id, group in grouped.items():
+        visible_rows = [row for row in group if _is_resource_visible_thread(row.get("thread_id"))]
+        if visible_rows:
+            projected.extend(visible_rows)
+            continue
+
+        if not lease_id:
+            continue
+
+        try:
+            thread_rows = repo.query_lease_threads(lease_id)
+        except Exception:
+            thread_rows = []
+
+        preferred_thread_id = next(
+            (str(item.get("thread_id") or "").strip() for item in thread_rows if _is_resource_visible_thread(item.get("thread_id"))),
+            "",
+        )
+        if not preferred_thread_id:
+            continue
+
+        base = dict(group[0])
+        base["thread_id"] = preferred_thread_id
+        base["session_id"] = None
+        projected.append(base)
+
+    return projected
+
+
 # ---------------------------------------------------------------------------
 # Public API: resource overview
 # ---------------------------------------------------------------------------
@@ -359,7 +421,8 @@ def list_resource_providers() -> dict[str, Any]:
     # @@@overview-fast-path - avoid provider-network calls; overview uses DB session snapshot.
     repo = make_sandbox_monitor_repo()
     try:
-        sessions = repo.list_sessions_with_leases()
+        raw_sessions = repo.list_sessions_with_leases()
+        sessions = _project_user_visible_resource_sessions(repo, raw_sessions)
     finally:
         repo.close()
 
@@ -386,6 +449,7 @@ def list_resource_providers() -> dict[str, Any]:
 
         provider_sessions = grouped.get(config_name, [])
         normalized_sessions: list[dict[str, Any]] = []
+        seen_session_ids: set[str] = set()
         running_count = 0
         # @@@running-dedup - lease-driven query may yield multiple rows per lease (one per crew member).
         # Count each running lease only once.
@@ -402,11 +466,18 @@ def list_resource_providers() -> dict[str, Any]:
                 seen_running_leases.add(lease_id)
             session_metrics = _to_session_metrics(snapshot_by_lease.get(lease_id))
             owner = owners.get(thread_id, {"member_id": None, "member_name": "未绑定Agent"})
+            session_identity = _resource_session_identity(session)
+            # @@@resource-session-dedup - terminal fallback can surface multiple
+            # monitor rows for the same lease/thread binding. The overview
+            # contract is one session row per stable session identity.
+            if session_identity in seen_session_ids:
+                continue
+            seen_session_ids.add(session_identity)
             normalized_sessions.append(
                 {
                     # @@@resource-session-identity - monitor rows can legitimately have empty chat session ids.
                     # Use stable lease+thread identity so React keys do not collapse when one lease has multiple threads.
-                    "id": str(session.get("session_id") or f"{lease_id}:{thread_id or 'unbound'}"),
+                    "id": session_identity,
                     "leaseId": lease_id,
                     "threadId": thread_id,
                     "memberId": str(owner.get("member_id") or ""),
@@ -469,6 +540,36 @@ def list_resource_providers() -> dict[str, Any]:
     return {"summary": summary, "providers": providers}
 
 
+def visible_resource_session_stats() -> dict[str, dict[str, int]]:
+    """Return the current user-visible session/running counts per provider."""
+    repo = make_sandbox_monitor_repo()
+    try:
+        raw_sessions = repo.list_sessions_with_leases()
+        sessions = _project_user_visible_resource_sessions(repo, raw_sessions)
+    finally:
+        repo.close()
+
+    stats: dict[str, dict[str, int]] = {}
+    seen_session_ids: set[str] = set()
+    seen_running_leases: set[tuple[str, str]] = set()
+    for session in sessions:
+        provider_instance = str(session.get("provider") or "local")
+        provider_stats = stats.setdefault(provider_instance, {"sessions": 0, "running": 0})
+        session_identity = _resource_session_identity(session)
+        if session_identity not in seen_session_ids:
+            seen_session_ids.add(session_identity)
+            provider_stats["sessions"] += 1
+
+        lease_id = str(session.get("lease_id") or "")
+        normalized = map_lease_to_session_status(session.get("observed_state"), session.get("desired_state"))
+        running_identity = (provider_instance, lease_id)
+        if normalized == "running" and lease_id and running_identity not in seen_running_leases:
+            seen_running_leases.add(running_identity)
+            provider_stats["running"] += 1
+
+    return stats
+
+
 # ---------------------------------------------------------------------------
 # Public API: sandbox filesystem browse
 # ---------------------------------------------------------------------------
diff --git a/backend/web/services/sandbox_service.py b/backend/web/services/sandbox_service.py
index 2e5e06cf0..d43227225 100644
--- a/backend/web/services/sandbox_service.py
+++ b/backend/web/services/sandbox_service.py
@@ -77,10 +77,11 @@ def list_user_leases(
                     "cwd": row.get("cwd"),
                     "thread_ids": [],
                     "agents": [],
+                    "_seen_member_ids": set(),
                 },
             )
             thread_id = str(row.get("thread_id") or "").strip()
-            if not thread_id or thread_id in group["thread_ids"]:
+            if not _is_user_visible_lease_thread(thread_id) or thread_id in group["thread_ids"]:
                 continue
             thread = _thread_repo.get_by_id(thread_id)
             if thread is None:
@@ -89,18 +90,21 @@ def list_user_leases(
             if member is None or member.owner_user_id != user_id:
                 continue
             group["thread_ids"].append(thread_id)
-            group["agents"].append(
-                {
-                    "member_id": member.id,
-                    "member_name": member.name,
-                    "avatar_url": avatar_url(member.id, bool(member.avatar)),
-                }
-            )
+            if member.id not in group["_seen_member_ids"]:
+                group["_seen_member_ids"].add(member.id)
+                group["agents"].append(
+                    {
+                        "member_id": member.id,
+                        "member_name": member.name,
+                        "avatar_url": avatar_url(member.id, bool(member.avatar)),
+                    }
+                )
             if not group["cwd"] and row.get("cwd"):
                 group["cwd"] = row.get("cwd")
 
         leases: list[dict[str, Any]] = []
         for lease in grouped.values():
+            lease.pop("_seen_member_ids", None)
             if not lease["thread_ids"]:
                 continue
             provider_name = lease["provider_name"]
@@ -123,6 +127,17 @@ def list_user_leases(
         monitor_repo.close()
 
 
+def _is_user_visible_lease_thread(thread_id: str | None) -> bool:
+    raw = str(thread_id or "").strip()
+    if not raw:
+        return False
+    if raw.startswith("subagent-"):
+        return False
+    if is_virtual_thread_id(raw):
+        return False
+    return True
+
+
 def available_sandbox_types() -> list[dict[str, Any]]:
     """Scan ~/.leon/sandboxes/ for configured providers."""
     providers, _ = init_providers_and_managers()
@@ -142,6 +157,16 @@ def available_sandbox_types() -> list[dict[str, Any]]:
         try:
             config = SandboxConfig.load(name)
             provider_obj = providers.get(name)
+            if provider_obj is None:
+                types.append(
+                    {
+                        "name": name,
+                        "provider": config.provider,
+                        "available": False,
+                        "reason": f"Provider {name} is configured but unavailable in the current process",
+                    }
+                )
+                continue
             item: dict[str, Any] = {
                 "name": name,
                 "provider": config.provider,
@@ -194,6 +219,8 @@ def _build_providers_and_managers() -> tuple[dict[str, Any], dict[str, Any]]:
                     default_context_path=config.agentbay.context_path,
                     image_id=config.agentbay.image_id,
                     provider_name=name,
+                    supports_pause=config.agentbay.supports_pause,
+                    supports_resume=config.agentbay.supports_resume,
                 )
             elif config.provider == "docker":
                 from sandbox.providers.docker import DockerProvider
diff --git a/backend/web/services/streaming_service.py b/backend/web/services/streaming_service.py
index 9e6e71a77..5992e4ca7 100644
--- a/backend/web/services/streaming_service.py
+++ b/backend/web/services/streaming_service.py
@@ -13,11 +13,22 @@
 from backend.web.services.event_store import cleanup_old_runs
 from backend.web.utils.serializers import extract_text_content
 from core.runtime.middleware.monitor import AgentState
+from core.runtime.notifications import is_terminal_background_notification
 from sandbox.thread_context import set_current_run_id, set_current_thread_id
 from storage.contracts import RunEventRepo
 
 logger = logging.getLogger(__name__)
 
+_TERMINAL_FOLLOWTHROUGH_SYSTEM_NOTE = (
+    "Terminal background completion notifications require an explicit assistant followthrough. "
+    "Treat these notifications as fresh inputs that need a visible assistant reply. "
+    "You must produce at least one visible assistant message for them; "
+    "do not stay silent and do not end the run after only surfacing a notice. "
+    "Do not call TaskOutput or TaskStop for a terminal notification. "
+    "If no further tool is truly needed, answer directly in natural language "
+    "and briefly acknowledge the completion, failure, or cancellation honestly."
+)
+
 
 def _resolve_run_event_repo(agent: Any) -> RunEventRepo | None:
     storage_container = getattr(agent, "storage_container", None)
@@ -28,6 +39,18 @@ def _resolve_run_event_repo(agent: Any) -> RunEventRepo | None:
     return storage_container.run_event_repo()
 
 
+def _augment_system_prompt_for_terminal_followthrough(system_prompt: Any) -> Any:
+    content = getattr(system_prompt, "content", None)
+    if not isinstance(content, str):
+        return system_prompt
+    if _TERMINAL_FOLLOWTHROUGH_SYSTEM_NOTE in content:
+        return system_prompt
+    # @@@terminal-followthrough-system-note - live models can otherwise treat
+    # terminal background notifications as internal reminders and emit no
+    # assistant text, leaving caller surfaces notice-only.
+    return system_prompt.__class__(content=f"{content}\n\n{_TERMINAL_FOLLOWTHROUGH_SYSTEM_NOTE}")
+
+
 async def prime_sandbox(agent: Any, thread_id: str) -> None:
     """Prime sandbox session before tool calls to avoid race conditions."""
 
@@ -256,8 +279,7 @@ def _ensure_thread_handlers(agent: Any, thread_id: str, app: Any) -> None:
     runtime = getattr(agent, "runtime", None)
     if not runtime:
         return
-    # Already bound? Skip.
-    if getattr(runtime, "_activity_sink", None) is not None:
+    if getattr(runtime, "_bound_thread_id", None) == thread_id and getattr(runtime, "_bound_thread_app", None) is app:
         return
     # Runtime must support bind_thread (AgentRuntime does, test fakes may not)
     if not hasattr(runtime, "bind_thread"):
@@ -288,6 +310,7 @@ async def activity_sink(event: dict) -> None:
         if event_type and isinstance(data, dict):
             delta = display_builder_ref.apply_event(thread_id, event_type, data)
             if delta:
+                delta["_seq"] = seq
                 await thread_buf.put(
                     {
                         "event": "display_delta",
@@ -373,6 +396,8 @@ async def _start_run():
                 agent.runtime.transition(AgentState.IDLE)
 
     runtime.bind_thread(activity_sink=activity_sink)
+    runtime._bound_thread_id = thread_id
+    runtime._bound_thread_app = app
     qm.register_wake(thread_id, wake_handler)
 
     # Subscribe to EventBus so sub-agent events (spawned via AgentService)
@@ -380,11 +405,221 @@ async def _start_run():
     try:
         from backend.web.event_bus import get_event_bus
 
-        get_event_bus().subscribe(thread_id, activity_sink)
+        unsubscribe = getattr(runtime, "_thread_event_unsubscribe", None)
+        if callable(unsubscribe):
+            unsubscribe()
+        runtime._thread_event_unsubscribe = get_event_bus().subscribe(thread_id, activity_sink)
     except ImportError:
         pass
 
 
+def _is_terminal_background_notification_message(
+    message: str,
+    *,
+    source: str | None,
+    notification_type: str | None,
+) -> bool:
+    return is_terminal_background_notification(
+        message,
+        source=source,
+        notification_type=notification_type,
+    )
+
+
+def _partition_terminal_followups(items: list[Any]) -> tuple[list[Any], list[Any]]:
+    terminal = []
+    passthrough = []
+    for item in items:
+        if _is_terminal_background_notification_message(
+            item.content,
+            source=item.source or "system",
+            notification_type=item.notification_type,
+        ):
+            terminal.append(item)
+        else:
+            passthrough.append(item)
+    return terminal, passthrough
+
+
+def _message_metadata_dict(message_metadata: dict[str, Any] | None) -> dict[str, Any]:
+    return dict(message_metadata or {})
+
+
+def _message_already_persisted(message: Any, *, content: str, metadata: dict[str, Any]) -> bool:
+    if message.__class__.__name__ != "HumanMessage":
+        return False
+    if getattr(message, "content", None) != content:
+        return False
+    return (getattr(message, "metadata", None) or {}) == metadata
+
+
+async def _persist_cancelled_run_input_if_missing(
+    *,
+    agent: Any,
+    config: dict[str, Any],
+    message: str,
+    message_metadata: dict[str, Any] | None,
+) -> None:
+    graph = getattr(agent, "agent", None)
+    if graph is None or not hasattr(graph, "aget_state") or not hasattr(graph, "aupdate_state"):
+        return
+
+    from langchain_core.messages import HumanMessage
+
+    metadata = _message_metadata_dict(message_metadata)
+    state = await graph.aget_state(config)
+    persisted = list((getattr(state, "values", None) or {}).get("messages", []))
+    if persisted and _message_already_persisted(persisted[-1], content=message, metadata=metadata):
+        return
+
+    # @@@cancelled-run-input-persist - a started run has already accepted this
+    # input at the caller boundary. If cancellation lands before the next loop
+    # checkpoint save, persist the input here so later turns do not pretend it
+    # never happened.
+    candidate = HumanMessage(content=message, metadata=metadata) if metadata else HumanMessage(content=message)
+    await graph.aupdate_state(config, {"messages": [candidate]})
+
+
+def _is_owner_steer_followup_message(
+    *,
+    source: str | None,
+    notification_type: str | None,
+) -> bool:
+    return source == "owner" and notification_type == "steer"
+
+
+async def _persist_cancelled_owner_steers(
+    *,
+    agent: Any,
+    config: dict[str, Any],
+    items: list[dict[str, str | None]],
+) -> None:
+    graph = getattr(agent, "agent", None)
+    if graph is None or not hasattr(graph, "aupdate_state") or not items:
+        return
+
+    from langchain_core.messages import HumanMessage
+
+    # @@@cancelled-steer-persist - accepted steer is a real user turn. If the
+    # active run is cancelled before the next model call, we must checkpoint it
+    # now instead of letting it silently relaunch as a ghost instruction.
+    await graph.aupdate_state(
+        config,
+        {
+            "messages": [
+                HumanMessage(
+                    content=str(item["content"] or ""),
+                    metadata={
+                        "source": "owner",
+                        "notification_type": "steer",
+                        "is_steer": True,
+                    },
+                )
+                for item in items
+            ]
+        },
+    )
+
+
+async def _flush_cancelled_owner_steers(
+    *,
+    agent: Any,
+    config: dict[str, Any],
+    thread_id: str,
+    app: Any,
+) -> None:
+    qm = app.state.queue_manager
+    queued_items = qm.drain_all(thread_id)
+    if not queued_items:
+        return
+
+    owner_steers: list[dict[str, str | None]] = []
+    passthrough: list[Any] = []
+    for item in queued_items:
+        if _is_owner_steer_followup_message(
+            source=item.source,
+            notification_type=item.notification_type,
+        ):
+            owner_steers.append(
+                {
+                    "content": item.content,
+                    "source": item.source or "owner",
+                    "notification_type": item.notification_type,
+                }
+            )
+        else:
+            passthrough.append(item)
+
+    await _persist_cancelled_owner_steers(agent=agent, config=config, items=owner_steers)
+
+    for item in passthrough:
+        qm.enqueue(
+            item.content,
+            thread_id,
+            notification_type=item.notification_type,
+            source=item.source,
+            sender_id=item.sender_id,
+            sender_name=item.sender_name,
+            sender_avatar_url=item.sender_avatar_url,
+            is_steer=item.is_steer,
+        )
+
+
+async def _emit_queued_terminal_followups(
+    *,
+    app: Any,
+    thread_id: str,
+    emit: Any,
+) -> list[dict[str, str | None]]:
+    emitted_terminal: list[dict[str, str | None]] = []
+
+    async def _drain_once() -> bool:
+        queued_items = app.state.queue_manager.drain_all(thread_id)
+        extra_terminal, passthrough = _partition_terminal_followups(queued_items)
+        for item in passthrough:
+            app.state.queue_manager.enqueue(
+                item.content,
+                thread_id,
+                notification_type=item.notification_type,
+                source=item.source,
+                sender_id=item.sender_id,
+                sender_name=item.sender_name,
+                sender_avatar_url=item.sender_avatar_url,
+                is_steer=item.is_steer,
+            )
+        for item in extra_terminal:
+            await emit(
+                {
+                    "event": "notice",
+                    "data": json.dumps(
+                        {
+                            "content": item.content,
+                            "source": item.source or "system",
+                            "notification_type": item.notification_type,
+                        },
+                        ensure_ascii=False,
+                    ),
+                }
+            )
+            emitted_terminal.append(
+                {
+                    "content": item.content,
+                    "source": item.source or "system",
+                    "notification_type": item.notification_type,
+                }
+            )
+        return bool(extra_terminal)
+
+    # @@@terminal-followup-race-window - multiple background tasks can finish
+    # while the first notice-only followthrough run is being emitted. Drain once
+    # for already-persisted notices, yield one loop tick, then drain again so
+    # same-turn terminal completions are folded into the same stable followthrough.
+    await _drain_once()
+    await asyncio.sleep(0)
+    await _drain_once()
+    return emitted_terminal
+
+
 # ---------------------------------------------------------------------------
 # Producer: runs agent, writes events to ThreadEventBuffer
 # ---------------------------------------------------------------------------
@@ -399,7 +634,8 @@ async def _run_agent_to_buffer(
     thread_buf: ThreadEventBuffer,
     run_id: str,
     message_metadata: dict[str, Any] | None = None,
-) -> None:
+    input_messages: list[Any] | None = None,
+) -> str:
     """Run agent execution and write all SSE events into *thread_buf*."""
     from backend.web.services.event_store import append_event
 
@@ -428,12 +664,16 @@ async def emit(event: dict, message_id: str | None = None) -> None:
             event = {**event, "data": json.dumps(data, ensure_ascii=False)}
         await thread_buf.put(event)
 
-        # Compute display delta and emit it (no _seq — avoids dedup conflict
-        # with the raw event that shares the same seq)
+        # Compute display delta and emit it alongside the raw event.
         event_type = event.get("event", "")
         if event_type and isinstance(data, dict):
             delta = display_builder.apply_event(thread_id, event_type, data)
             if delta:
+                # @@@display-delta-source-seq - replay after-filter only knows raw
+                # event seqs. Carry the source seq onto the derived delta so a
+                # reconnect after GET /thread can skip stale display_delta
+                # replays instead of rebuilding the same thread a second time.
+                delta["_seq"] = seq
                 await thread_buf.put(
                     {
                         "event": "display_delta",
@@ -444,6 +684,7 @@ async def emit(event: dict, message_id: str | None = None) -> None:
     task = None
     stream_gen = None
     pending_tool_calls: dict[str, dict] = {}
+    output_parts: list[str] = []
     try:
         config = {"configurable": {"thread_id": thread_id, "run_id": run_id}}
         if hasattr(agent, "_current_model_config"):
@@ -625,9 +866,10 @@ def on_activity_event(event: dict) -> None:
         )
 
         # @@@run-notice — emit notice right after run_start so frontend folds it
-        # into the (re)opened turn.  Only for external notifications (not owner steer).
+        # into the (re)opened turn. Mirror the cold-path DisplayBuilder rule:
+        # any source=system message is a notice; external notices stay chat-only.
         ntype = meta.get("notification_type")
-        if src and src != "owner" and ntype == "chat":
+        if src == "system" or (src == "external" and ntype == "chat"):
             await emit(
                 {
                     "event": "notice",
@@ -642,7 +884,46 @@ def on_activity_event(event: dict) -> None:
                 }
             )
 
-        if message_metadata:
+        terminal_followthrough_items: list[dict[str, str | None]] | None = None
+        original_system_prompt = None
+        # @@@terminal-followthrough-reentry - terminal background completions
+        # still surface as durable notices first, but they must then re-enter the
+        # model as a real followthrough turn instead of terminating at notice-only.
+        if _is_terminal_background_notification_message(
+            message,
+            source=src,
+            notification_type=ntype,
+        ):
+            terminal_followthrough_items = [
+                {
+                    "content": message,
+                    "source": src or "system",
+                    "notification_type": ntype,
+                }
+            ]
+            terminal_followthrough_items.extend(await _emit_queued_terminal_followups(app=app, thread_id=thread_id, emit=emit))
+            if hasattr(agent, "agent") and hasattr(agent.agent, "system_prompt"):
+                original_system_prompt = agent.agent.system_prompt
+                agent.agent.system_prompt = _augment_system_prompt_for_terminal_followthrough(original_system_prompt)
+
+        if terminal_followthrough_items:
+            from langchain_core.messages import HumanMessage
+
+            _initial_input = {
+                "messages": [
+                    HumanMessage(
+                        content=str(item["content"] or ""),
+                        metadata={
+                            "source": item["source"] or "system",
+                            "notification_type": item["notification_type"],
+                        },
+                    )
+                    for item in terminal_followthrough_items
+                ]
+            }
+        elif input_messages is not None:
+            _initial_input = {"messages": input_messages}
+        elif message_metadata:
             from langchain_core.messages import HumanMessage
 
             _initial_input: dict | None = {"messages": [HumanMessage(content=message, metadata=message_metadata)]}
@@ -725,7 +1006,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                 mode, data = chunk
 
                 if mode == "messages":
-                    msg_chunk, metadata = data
+                    msg_chunk, _metadata = data
                     msg_class = msg_chunk.__class__.__name__
                     if msg_class == "AIMessageChunk":
                         # @@@compact-leak-guard — skip chunks from compact's summary LLM call.
@@ -735,6 +1016,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                         content = extract_text_content(getattr(msg_chunk, "content", ""))
                         chunk_msg_id = getattr(msg_chunk, "id", None)
                         if content:
+                            output_parts.append(content)
                             await emit(
                                 {
                                     "event": "text",
@@ -792,14 +1074,13 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                             msg_class = msg.__class__.__name__
 
                             if msg_class == "HumanMessage":
-                                # @@@mid-turn-chat-notice — emit notice for chat
-                                # notifications injected by before_model. display_builder
-                                # folds it into the current turn as a segment (same as
-                                # cold-path checkpoint rebuild behavior).
+                                # @@@mid-turn-notice-parity — hot streaming must use the
+                                # same notice contract as cold checkpoint rebuild:
+                                # source=system always folds as notice; external stays
+                                # limited to chat notifications.
                                 meta = getattr(msg, "metadata", None) or {}
-                                if meta.get("notification_type") == "chat" and meta.get("source") in (
-                                    "external",
-                                    "system",
+                                if meta.get("source") == "system" or (
+                                    meta.get("source") == "external" and meta.get("notification_type") == "chat"
                                 ):
                                     await emit(
                                         {
@@ -808,7 +1089,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                                                 {
                                                     "content": msg.content if isinstance(msg.content, str) else str(msg.content),
                                                     "source": meta.get("source", "external"),
-                                                    "notification_type": "chat",
+                                                    "notification_type": meta.get("notification_type"),
                                                 },
                                                 ensure_ascii=False,
                                             ),
@@ -861,8 +1142,11 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                                     continue
                                 if tc_id:
                                     pending_tool_calls.pop(tc_id, None)
-                                if hasattr(msg, "metadata") and isinstance(msg.metadata, dict):
-                                    msg.metadata["run_id"] = run_id
+                                merged_meta = dict(getattr(msg, "metadata", None) or {})
+                                tool_result_meta = getattr(msg, "additional_kwargs", {}).get("tool_result_meta")
+                                if isinstance(tool_result_meta, dict):
+                                    merged_meta = {**tool_result_meta, **merged_meta}
+                                merged_meta["run_id"] = run_id
                                 tool_name = getattr(msg, "name", "") or ""
                                 await emit(
                                     {
@@ -872,7 +1156,7 @@ def _is_retryable_stream_error(err: Exception) -> bool:
                                                 "tool_call_id": tc_id,
                                                 "name": tool_name,
                                                 "content": str(getattr(msg, "content", "")),
-                                                "metadata": getattr(msg, "metadata", None) or {},
+                                                "metadata": merged_meta,
                                                 "showing": True,
                                             },
                                             ensure_ascii=False,
@@ -954,8 +1238,21 @@ def _is_retryable_stream_error(err: Exception) -> bool:
 
         # A5: emit run_done instead of done (persistent buffer — no mark_done)
         await emit({"event": "run_done", "data": json.dumps({"thread_id": thread_id, "run_id": run_id})})
+        return "".join(output_parts).strip()
     except asyncio.CancelledError:
         cancelled_tool_call_ids = await write_cancellation_markers(agent, config, pending_tool_calls)
+        await _persist_cancelled_run_input_if_missing(
+            agent=agent,
+            config=config,
+            message=message,
+            message_metadata=message_metadata,
+        )
+        await _flush_cancelled_owner_steers(
+            agent=agent,
+            config=config,
+            thread_id=thread_id,
+            app=app,
+        )
         await emit(
             {
                 "event": "cancelled",
@@ -969,11 +1266,15 @@ def _is_retryable_stream_error(err: Exception) -> bool:
         )
         # Also emit run_done so frontend knows the run ended
         await emit({"event": "run_done", "data": json.dumps({"thread_id": thread_id, "run_id": run_id})})
+        return ""
     except Exception as e:
         traceback.print_exc()
         await emit({"event": "error", "data": json.dumps({"error": str(e)}, ensure_ascii=False)})
         await emit({"event": "run_done", "data": json.dumps({"thread_id": thread_id, "run_id": run_id})})
+        return ""
     finally:
+        if original_system_prompt is not None and hasattr(agent, "agent") and hasattr(agent.agent, "system_prompt"):
+            agent.agent.system_prompt = original_system_prompt
         # @@@typing-lifecycle-stop — guaranteed cleanup even on crash/cancel
         typing_tracker = getattr(app.state, "typing_tracker", None)
         if typing_tracker is not None:
@@ -1036,22 +1337,29 @@ async def _consume_followup_queue(agent: Any, thread_id: str, app: Any) -> None:
     item = None
     try:
         qm = app.state.queue_manager
+        if not qm.peek(thread_id) or not app:
+            return
+        if not (hasattr(agent, "runtime") and agent.runtime.transition(AgentState.ACTIVE)):
+            return
         item = qm.dequeue(thread_id)
-        if item and app:
-            if hasattr(agent, "runtime") and agent.runtime.transition(AgentState.ACTIVE):
-                start_agent_run(
-                    agent,
-                    thread_id,
-                    item.content,
-                    app,
-                    message_metadata={
-                        "source": item.source or "system",
-                        "notification_type": item.notification_type,
-                        "sender_name": item.sender_name,
-                        "sender_avatar_url": item.sender_avatar_url,
-                        "is_steer": getattr(item, "is_steer", False),
-                    },
-                )
+        if item is None:
+            logger.warning("followup dequeue lost race for thread %s; reverting to IDLE", thread_id)
+            if hasattr(agent, "runtime"):
+                agent.runtime.transition(AgentState.IDLE)
+            return
+        start_agent_run(
+            agent,
+            thread_id,
+            item.content,
+            app,
+            message_metadata={
+                "source": item.source or "system",
+                "notification_type": item.notification_type,
+                "sender_name": item.sender_name,
+                "sender_avatar_url": item.sender_avatar_url,
+                "is_steer": getattr(item, "is_steer", False),
+            },
+        )
     except Exception:
         logger.exception("Failed to consume followup queue for thread %s", thread_id)
         # Re-enqueue the message if it was already dequeued to prevent data loss
@@ -1074,18 +1382,90 @@ def start_agent_run(
     app: Any,
     enable_trajectory: bool = False,
     message_metadata: dict[str, Any] | None = None,
+    input_messages: list[Any] | None = None,
 ) -> str:
     """Launch agent producer on the persistent ThreadEventBuffer. Returns run_id."""
     thread_buf = get_or_create_thread_buffer(app, thread_id)
     run_id = str(_uuid.uuid4())
     bg_task = asyncio.create_task(
-        _run_agent_to_buffer(agent, thread_id, message, app, enable_trajectory, thread_buf, run_id, message_metadata)
+        _run_agent_to_buffer(
+            agent,
+            thread_id,
+            message,
+            app,
+            enable_trajectory,
+            thread_buf,
+            run_id,
+            message_metadata,
+            input_messages,
+        )
     )
     # Store the background task so cancel_run can still cancel it
     app.state.thread_tasks[thread_id] = bg_task
     return run_id
 
 
+async def run_child_thread_live(
+    agent: Any,
+    thread_id: str,
+    message: str,
+    app: Any,
+    *,
+    input_messages: list[Any],
+) -> str:
+    """Run a spawned child agent through the normal web thread bridge."""
+    from backend.web.services.agent_pool import resolve_thread_sandbox
+    from backend.web.utils.serializers import extract_text_content
+
+    sandbox_type = resolve_thread_sandbox(app, thread_id)
+    app.state.agent_pool[f"{thread_id}:{sandbox_type}"] = agent
+    thread_buf = get_or_create_thread_buffer(app, thread_id)
+    error_cursor = thread_buf.total_count
+    _ensure_thread_handlers(agent, thread_id, app)
+    if not (hasattr(agent, "runtime") and agent.runtime.transition(AgentState.ACTIVE)):
+        raise RuntimeError(f"Child thread {thread_id} could not transition to active")
+
+    start_agent_run(
+        agent,
+        thread_id,
+        message,
+        app,
+        input_messages=input_messages,
+    )
+    task = app.state.thread_tasks[thread_id]
+    result = await task
+    recent_events, _ = await thread_buf.read_with_timeout(error_cursor, timeout=0.01)
+    if recent_events:
+        # @@@child-live-error-surfacing - child live runs can emit an error event
+        # and still return an empty string from _run_agent_to_buffer(); treat that
+        # as a real child failure instead of laundering it into fake completion.
+        for event in recent_events:
+            if event.get("event") != "error":
+                continue
+            try:
+                payload = json.loads(event.get("data", "{}"))
+            except (json.JSONDecodeError, TypeError):
+                payload = {}
+            error_text = payload.get("error") if isinstance(payload, dict) else None
+            raise RuntimeError(error_text or f"Child thread {thread_id} failed")
+    if isinstance(result, str) and result.strip():
+        return result.strip()
+
+    state = await agent.agent.aget_state({"configurable": {"thread_id": thread_id}})
+    values = getattr(state, "values", {}) if state else {}
+    messages = values.get("messages", []) if isinstance(values, dict) else []
+    visible_ai = [
+        extract_text_content(getattr(msg, "content", "")).strip()
+        for msg in messages
+        if msg.__class__.__name__ == "AIMessage" and extract_text_content(getattr(msg, "content", "")).strip()
+    ]
+    runtime_status = agent.runtime.get_status_dict() if hasattr(agent, "runtime") and hasattr(agent.runtime, "get_status_dict") else {}
+    runtime_calls = runtime_status.get("calls") if isinstance(runtime_status, dict) else None
+    if not visible_ai and runtime_calls == 0:
+        raise RuntimeError(f"Child thread {thread_id} failed before first model call")
+    return "\n".join(visible_ai) if visible_ai else "(Agent completed with no text output)"
+
+
 # ---------------------------------------------------------------------------
 # Consumer: persistent thread event stream
 # ---------------------------------------------------------------------------
@@ -1101,40 +1481,12 @@ async def observe_thread_events(
     disconnect (or server shutdown) closes the connection.
     run_done is a flow event, not a terminal signal.
     """
-    yield {"retry": 5000}
-
     # Always start from the beginning of the ring buffer.
     # For after=0 (new connection): replay all buffered events so we never miss
     # events emitted between postRun and SSE connect (race condition fix).
     # For after>0 (reconnect): start from ring start, filter by _seq below.
-    cursor = 0
-
-    while True:
-        events, cursor = await thread_buf.read_with_timeout(cursor, timeout=30)
-        if events is None:
-            yield {"comment": "keepalive"}
-            continue
-        if not events:
-            continue
-        for event in events:
-            parsed_data = None
-            try:
-                parsed_data = json.loads(event.get("data", "{}"))
-            except (json.JSONDecodeError, TypeError):
-                pass
-
-            # @@@after-filter — skip events already seen on reconnect.
-            # Events without _seq (e.g. display_delta) are never filtered —
-            # they are ephemeral derivatives of persisted events.
-            if after > 0 and isinstance(parsed_data, dict) and "_seq" in parsed_data:
-                if parsed_data["_seq"] <= after:
-                    continue
-
-            seq_id = str(parsed_data["_seq"]) if isinstance(parsed_data, dict) and "_seq" in parsed_data else None
-            if seq_id:
-                yield {**event, "id": seq_id}
-            else:
-                yield event
+    async for event in _observe_sse_buffer(thread_buf, after=after, stop_on_finish=False):
+        yield event
 
 
 async def observe_run_events(
@@ -1142,6 +1494,17 @@ async def observe_run_events(
     after: int = 0,
 ) -> AsyncGenerator[dict[str, str], None]:
     """Consume events from a RunEventBuffer (subagent streams only). Yields SSE event dicts."""
+    async for event in _observe_sse_buffer(buf, after=after, stop_on_finish=True):
+        yield event
+
+
+async def _observe_sse_buffer(
+    buf: ThreadEventBuffer | RunEventBuffer,
+    *,
+    after: int,
+    stop_on_finish: bool,
+) -> AsyncGenerator[dict[str, str], None]:
+    """Shared SSE observer loop for thread and run buffers."""
     yield {"retry": 5000}
 
     cursor = 0
@@ -1150,7 +1513,7 @@ async def observe_run_events(
         if events is None and not buf.finished.is_set():
             yield {"comment": "keepalive"}
             continue
-        if not events and buf.finished.is_set():
+        if stop_on_finish and not events and buf.finished.is_set():
             break
         if not events:
             continue
@@ -1162,8 +1525,8 @@ async def observe_run_events(
                 pass
 
             # @@@after-filter — skip events already seen on reconnect.
-            # Events without _seq (e.g. display_delta) are never filtered —
-            # they are ephemeral derivatives of persisted events.
+            # display_delta now carries the source raw-event seq too, so stale
+            # derived deltas are filtered together with their persisted source.
             if after > 0 and isinstance(parsed_data, dict) and "_seq" in parsed_data:
                 if parsed_data["_seq"] <= after:
                     continue
diff --git a/backend/web/services/task_service.py b/backend/web/services/task_service.py
index 86197b584..af041dc03 100644
--- a/backend/web/services/task_service.py
+++ b/backend/web/services/task_service.py
@@ -3,6 +3,7 @@
 from typing import Any
 
 from backend.web.core.storage_factory import make_panel_task_repo
+from storage.runtime import build_thread_repo
 
 
 def _repo() -> Any:
@@ -12,11 +13,35 @@ def _repo() -> Any:
 def list_tasks() -> list[dict[str, Any]]:
     repo = _repo()
     try:
-        return repo.list_all()
+        return _enrich_task_thread_members(repo.list_all())
     finally:
         repo.close()
 
 
+def _enrich_task_thread_members(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    thread_ids = [str(task.get("thread_id") or "").strip() for task in tasks]
+    thread_ids = [thread_id for thread_id in dict.fromkeys(thread_ids) if thread_id]
+    if not thread_ids:
+        return tasks
+
+    # @@@task-thread-member-enrichment - panel tasks persist thread_id only, so enrich member_id
+    # from canonical thread metadata before frontend deep-links are rendered.
+    thread_repo = build_thread_repo()
+    try:
+        member_ids = {thread_id: (thread_repo.get_by_id(thread_id) or {}).get("member_id") for thread_id in thread_ids}
+    finally:
+        thread_repo.close()
+
+    enriched: list[dict[str, Any]] = []
+    for task in tasks:
+        thread_id = str(task.get("thread_id") or "").strip()
+        if thread_id and member_ids.get(thread_id):
+            enriched.append({**task, "member_id": member_ids[thread_id]})
+        else:
+            enriched.append(task)
+    return enriched
+
+
 def get_task(task_id: str) -> dict[str, Any] | None:
     repo = _repo()
     try:
diff --git a/backend/web/services/wechat_service.py b/backend/web/services/wechat_service.py
deleted file mode 100644
index b19261d79..000000000
--- a/backend/web/services/wechat_service.py
+++ /dev/null
@@ -1,517 +0,0 @@
-"""WeChat connection service — ilink API client + connection lifecycle + background poll.
-
-Uses the official WeChat ClawBot ilink API at ilinkai.weixin.qq.com.
-Protocol: HTTP/JSON long-polling, modeled after Telegram Bot API.
-Auth: Bearer token obtained via QR code scan.
-
-@@@per-user — each human user_id gets its own WeChatConnection.
-user_id is the social identity in Leon's network (Supabase auth UUID for humans).
-Polling auto-starts at backend boot via lifespan.py for all users with saved credentials.
-
-@@@no-globals — WeChatConnectionRegistry lives on app.state, not module-level.
-"""
-
-import asyncio
-import json
-import logging
-import os
-import random
-import struct
-import time
-from base64 import b64encode
-from collections.abc import Awaitable, Callable
-from pathlib import Path
-from typing import Literal
-
-import httpx
-from pydantic import BaseModel
-
-from config.user_paths import user_home_path, user_home_read_candidates
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_BASE_URL = "https://ilinkai.weixin.qq.com"
-BOT_TYPE = "3"
-CHANNEL_VERSION = "0.1.0"
-LONG_POLL_TIMEOUT_S = 35
-SEND_TIMEOUT_S = 15
-
-MSG_TYPE_USER = 1
-MSG_TYPE_BOT = 2
-MSG_ITEM_TEXT = 1
-MSG_ITEM_VOICE = 3
-MSG_STATE_FINISH = 2
-
-CONNECTIONS_BASE = user_home_path("connections", "wechat")
-
-RoutingType = Literal["thread", "chat"]
-
-# @@@delivery-callback — injected at construction, avoids circular import of app
-DeliveryFn = Callable[["WeChatConnection", "WeChatMessage"], Awaitable[None]]
-
-
-# --- Pydantic models for API ---
-
-
-class WeChatCredentials(BaseModel):
-    token: str
-    base_url: str = DEFAULT_BASE_URL
-    account_id: str
-    user_id: str = ""
-    saved_at: str = ""
-
-
-class RoutingConfig(BaseModel):
-    type: RoutingType | None = None
-    id: str | None = None
-    label: str = ""
-
-
-class QrPollRequest(BaseModel):
-    qrcode: str
-
-
-class RoutingSetRequest(BaseModel):
-    type: RoutingType
-    id: str
-    label: str = ""
-
-
-class WeChatMessage(BaseModel):
-    from_user_id: str
-    text: str
-    context_token: str
-
-    class Config:
-        frozen = True
-
-
-class WeChatAPIError(Exception):
-    pass
-
-
-class SessionExpiredError(WeChatAPIError):
-    pass
-
-
-# --- ilink protocol helpers ---
-
-
-def _random_wechat_uin() -> str:
-    val = struct.unpack(">I", os.urandom(4))[0]
-    return b64encode(str(val).encode()).decode()
-
-
-def _build_headers(token: str | None = None, body: str | None = None) -> dict[str, str]:
-    headers: dict[str, str] = {
-        "Content-Type": "application/json",
-        "AuthorizationType": "ilink_bot_token",
-        "X-WECHAT-UIN": _random_wechat_uin(),
-    }
-    if body:
-        headers["Content-Length"] = str(len(body.encode()))
-    if token:
-        headers["Authorization"] = f"Bearer {token.strip()}"
-    return headers
-
-
-def _extract_text(msg: dict) -> str:
-    items = msg.get("item_list") or []
-    for item in items:
-        if item.get("type") == MSG_ITEM_TEXT:
-            text = (item.get("text_item") or {}).get("text", "")
-            ref = item.get("ref_msg")
-            if ref and ref.get("title"):
-                return f"[引用: {ref['title']}]\n{text}"
-            return text
-        if item.get("type") == MSG_ITEM_VOICE:
-            return (item.get("voice_item") or {}).get("text", "")
-    return ""
-
-
-# --- Per-user persistence (keyed by user_id) ---
-
-
-def _user_dir(user_id: str) -> Path:
-    return CONNECTIONS_BASE / user_id
-
-
-def _user_dir_candidates(user_id: str) -> tuple[Path, ...]:
-    return tuple(path / user_id for path in user_home_read_candidates("connections", "wechat"))
-
-
-def _save_json(user_id: str, filename: str, data: dict) -> None:
-    d = _user_dir(user_id)
-    d.mkdir(parents=True, exist_ok=True)
-    path = d / filename
-    path.write_text(json.dumps(data, indent=2))
-    if filename == "credentials.json":
-        path.chmod(0o600)
-
-
-def _load_json(user_id: str, filename: str) -> dict | None:
-    for path in reversed(_user_dir_candidates(user_id)):
-        candidate = path / filename
-        if not candidate.exists():
-            continue
-        try:
-            return json.loads(candidate.read_text())
-        except (json.JSONDecodeError, KeyError) as e:
-            logger.error("Failed to load %s for %s: %s", filename, user_id[:12], e)
-    return None
-
-
-def _delete_file(user_id: str, filename: str) -> None:
-    seen: set[Path] = set()
-    for user_dir in _user_dir_candidates(user_id):
-        path = user_dir / filename
-        if path in seen:
-            continue
-        seen.add(path)
-        if path.exists():
-            path.unlink()
-
-
-def migrate_entity_id_dirs() -> None:
-    """Startup migration: rename {user_id}-1/ → {user_id}/ for existing connections."""
-    if not CONNECTIONS_BASE.exists():
-        return
-    for user_dir in list(CONNECTIONS_BASE.iterdir()):
-        if not user_dir.is_dir():
-            continue
-        name = user_dir.name
-        # Old entity_id format was "{user_id}-1" — strip the suffix
-        if name.endswith("-1"):
-            new_name = name[:-2]
-            new_dir = CONNECTIONS_BASE / new_name
-            if not new_dir.exists():
-                try:
-                    user_dir.rename(new_dir)
-                    logger.info("Migrated WeChat dir: %s → %s", name, new_name)
-                except Exception as e:
-                    logger.error("Failed to migrate WeChat dir %s: %s", name, e)
-
-
-# --- WeChatConnection (one per human user) ---
-
-
-class WeChatConnection:
-    """A single user's WeChat connection. Keyed by user_id."""
-
-    def __init__(self, user_id: str, delivery_fn: DeliveryFn | None = None) -> None:
-        self.user_id = user_id
-        self._delivery_fn = delivery_fn
-        self._credentials: WeChatCredentials | None = None
-        self._context_tokens: dict[str, str] = {}
-        self._sync_buf: str = ""
-        self._poll_task: asyncio.Task | None = None
-        self._routing = RoutingConfig()
-        # @@@no-proxy — trust_env=False prevents httpx from inheriting
-        # http_proxy/all_proxy which causes bimodal latency on long-poll.
-        self._http = httpx.AsyncClient(
-            timeout=httpx.Timeout(LONG_POLL_TIMEOUT_S + 5),
-            trust_env=False,
-        )
-
-        # Load persisted state
-        routing_data = _load_json(user_id, "routing.json")
-        if routing_data:
-            try:
-                self._routing = RoutingConfig(**routing_data)
-            except Exception:
-                pass
-
-        ctx = _load_json(user_id, "context_tokens.json")
-        if ctx:
-            self._context_tokens = ctx
-
-        creds_data = _load_json(user_id, "credentials.json")
-        if creds_data:
-            try:
-                self._credentials = WeChatCredentials(**creds_data)
-                logger.info("Loaded WeChat credentials for user=%s", user_id[:12])
-            except Exception as e:
-                logger.error("Invalid WeChat credentials for %s: %s", user_id[:12], e)
-
-    @property
-    def connected(self) -> bool:
-        return self._credentials is not None
-
-    @property
-    def polling(self) -> bool:
-        return self._poll_task is not None and not self._poll_task.done()
-
-    @property
-    def routing(self) -> RoutingConfig:
-        return self._routing
-
-    def set_routing(self, config: RoutingConfig) -> None:
-        self._routing = config
-        _save_json(self.user_id, "routing.json", config.model_dump())
-
-    def get_state(self) -> dict:
-        if not self._credentials:
-            return {"connected": False, "routing": self._routing.model_dump()}
-        return {
-            "connected": True,
-            "polling": self.polling,
-            "account_id": self._credentials.account_id,
-            "user_id": self._credentials.user_id,
-            "contact_count": len(self._context_tokens),
-            "contacts": self.list_contacts(),
-            "routing": self._routing.model_dump(),
-        }
-
-    def list_contacts(self) -> list[dict[str, str]]:
-        return [{"user_id": uid, "display_name": uid.split("@")[0] or uid} for uid in self._context_tokens]
-
-    # --- QR Login ---
-
-    async def get_qr_code(self) -> dict:
-        url = f"{DEFAULT_BASE_URL}/ilink/bot/get_bot_qrcode?bot_type={BOT_TYPE}"
-        resp = await self._http.get(url, timeout=10)
-        resp.raise_for_status()
-        data = resp.json()
-        return {"qrcode": data["qrcode"], "qrcode_img_url": data["qrcode_img_content"]}
-
-    async def poll_qr_status(self, qrcode: str) -> dict:
-        url = f"{DEFAULT_BASE_URL}/ilink/bot/get_qrcode_status?qrcode={qrcode}"
-        try:
-            resp = await self._http.get(
-                url,
-                headers={"iLink-App-ClientVersion": "1"},
-                timeout=LONG_POLL_TIMEOUT_S + 5,
-            )
-            resp.raise_for_status()
-            data = resp.json()
-        except httpx.TimeoutException:
-            return {"status": "wait"}
-
-        status = data.get("status", "wait")
-        if status == "confirmed":
-            bot_token = data.get("bot_token")
-            bot_id = data.get("ilink_bot_id")
-            if not bot_token or not bot_id:
-                return {"status": "error", "message": "Missing bot credentials in response"}
-            creds = WeChatCredentials(
-                token=bot_token,
-                base_url=data.get("baseurl") or DEFAULT_BASE_URL,
-                account_id=bot_id,
-                user_id=data.get("ilink_user_id", ""),
-                saved_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
-            )
-            self._credentials = creds
-            _save_json(self.user_id, "credentials.json", creds.model_dump())
-            logger.info("WeChat connected for user=%s account=%s", self.user_id[:12], creds.account_id)
-            self.start_polling()
-            return {"status": "confirmed", "account_id": creds.account_id}
-        return {"status": status}
-
-    # --- Disconnect ---
-
-    def disconnect(self) -> None:
-        self.stop_polling()
-        self._credentials = None
-        self._context_tokens.clear()
-        self._sync_buf = ""
-        _delete_file(self.user_id, "credentials.json")
-        _delete_file(self.user_id, "context_tokens.json")
-        logger.info("WeChat disconnected for user=%s", self.user_id[:12])
-
-    async def close(self) -> None:
-        """Shutdown: stop polling + close HTTP client."""
-        self.stop_polling()
-        await self._http.aclose()
-
-    # --- Polling ---
-
-    def start_polling(self) -> None:
-        if self.polling:
-            return
-        if not self._credentials:
-            raise RuntimeError("Cannot start polling: not connected")
-        self._poll_task = asyncio.create_task(self._poll_loop())
-        logger.info("WeChat polling started for user=%s", self.user_id[:12])
-
-    def stop_polling(self) -> None:
-        if self._poll_task and not self._poll_task.done():
-            self._poll_task.cancel()
-            self._poll_task = None
-
-    async def _deliver_message(self, msg: WeChatMessage) -> None:
-        """Deliver via injected callback. No circular imports."""
-        if not self._delivery_fn:
-            logger.warning("No delivery function configured for user=%s", self.user_id[:12])
-            return
-        if not self._routing.type or not self._routing.id:
-            logger.debug("WeChat message not delivered — no routing configured")
-            return
-        try:
-            await self._delivery_fn(self, msg)
-        except Exception:
-            logger.exception("Failed to deliver WeChat message")
-
-    async def _poll_loop(self) -> None:
-        consecutive_failures = 0
-        while True:
-            try:
-                messages = await self._get_updates()
-                consecutive_failures = 0
-                for msg in messages:
-                    logger.info("WeChat[%s] from=%s: %s", self.user_id[:8], msg.from_user_id[:20], msg.text[:60])
-                    asyncio.create_task(self._deliver_message(msg))
-            except asyncio.CancelledError:
-                return
-            except SessionExpiredError:
-                logger.error("WeChat session expired for user=%s", self.user_id[:12])
-                self._credentials = None
-                _delete_file(self.user_id, "credentials.json")
-                return
-            except Exception:
-                consecutive_failures += 1
-                logger.exception("WeChat poll error #%d user=%s", consecutive_failures, self.user_id[:12])
-                if consecutive_failures >= 3:
-                    consecutive_failures = 0
-                    await asyncio.sleep(30)
-                else:
-                    await asyncio.sleep(2)
-
-    async def _get_updates(self) -> list[WeChatMessage]:
-        if not self._credentials:
-            raise RuntimeError("Not connected")
-        body = json.dumps(
-            {
-                "get_updates_buf": self._sync_buf,
-                "base_info": {"channel_version": CHANNEL_VERSION},
-            }
-        )
-        headers = _build_headers(self._credentials.token, body)
-        try:
-            resp = await self._http.post(
-                f"{self._credentials.base_url}/ilink/bot/getupdates",
-                content=body,
-                headers=headers,
-                timeout=LONG_POLL_TIMEOUT_S + 5,
-            )
-            resp.raise_for_status()
-            data = resp.json()
-        except httpx.TimeoutException:
-            return []
-
-        if data.get("ret", 0) != 0 or data.get("errcode", 0) != 0:
-            errcode = data.get("errcode", 0)
-            errmsg = data.get("errmsg", "")
-            if errcode == -14:
-                raise SessionExpiredError("Session expired")
-            raise WeChatAPIError(f"getUpdates: errcode={errcode} {errmsg}")
-
-        if data.get("get_updates_buf"):
-            self._sync_buf = data["get_updates_buf"]
-
-        messages = []
-        tokens_changed = False
-        for msg in data.get("msgs") or []:
-            if msg.get("message_type") != MSG_TYPE_USER:
-                continue
-            text = _extract_text(msg)
-            if not text:
-                continue
-            sender = msg.get("from_user_id", "unknown")
-            ctx_token = msg.get("context_token", "")
-            if ctx_token:
-                self._context_tokens[sender] = ctx_token
-                tokens_changed = True
-            messages.append(
-                WeChatMessage(
-                    from_user_id=sender,
-                    text=text,
-                    context_token=ctx_token,
-                )
-            )
-        if tokens_changed:
-            await asyncio.to_thread(_save_json, self.user_id, "context_tokens.json", self._context_tokens)
-        return messages
-
-    # --- Send ---
-
-    async def send_message(self, to_user_id: str, text: str) -> str:
-        if not self._credentials:
-            raise RuntimeError("WeChat not connected")
-        context_token = self._context_tokens.get(to_user_id)
-        if not context_token:
-            raise RuntimeError(f"No context_token for {to_user_id}. The user needs to message the bot first.")
-        client_id = f"leon:{int(time.time())}-{random.randint(0, 0xFFFF):04x}"
-        body = json.dumps(
-            {
-                "msg": {
-                    "from_user_id": "",
-                    "to_user_id": to_user_id,
-                    "client_id": client_id,
-                    "message_type": MSG_TYPE_BOT,
-                    "message_state": MSG_STATE_FINISH,
-                    "item_list": [{"type": MSG_ITEM_TEXT, "text_item": {"text": text}}],
-                    "context_token": context_token,
-                },
-                "base_info": {"channel_version": CHANNEL_VERSION},
-            }
-        )
-        headers = _build_headers(self._credentials.token, body)
-        resp = await self._http.post(
-            f"{self._credentials.base_url}/ilink/bot/sendmessage",
-            content=body,
-            headers=headers,
-            timeout=SEND_TIMEOUT_S,
-        )
-        resp.raise_for_status()
-        return client_id
-
-
-# --- WeChatConnectionRegistry (lives on app.state) ---
-
-
-class WeChatConnectionRegistry:
-    """Manages per-user WeChatConnections. Lives on app.state, not module-level."""
-
-    def __init__(self, delivery_fn: DeliveryFn | None = None) -> None:
-        self._connections: dict[str, WeChatConnection] = {}
-        self._delivery_fn = delivery_fn
-
-    def get(self, user_id: str) -> WeChatConnection:
-        if user_id not in self._connections:
-            self._connections[user_id] = WeChatConnection(user_id, self._delivery_fn)
-        return self._connections[user_id]
-
-    def auto_start_all(self) -> None:
-        """Resume polling for all users with saved credentials on disk."""
-        if not CONNECTIONS_BASE.exists():
-            return
-        for user_dir in CONNECTIONS_BASE.iterdir():
-            if user_dir.is_dir() and (user_dir / "credentials.json").exists():
-                conn = self.get(user_dir.name)
-                if conn.connected and not conn.polling:
-                    conn.start_polling()
-
-    def evict_duplicates(self, account_id: str, keep_user_id: str) -> None:
-        """@@@unique-wechat — one WeChat account → one Leon user. Last one wins."""
-        for uid, conn in list(self._connections.items()):
-            if uid == keep_user_id:
-                continue
-            if conn._credentials and conn._credentials.account_id == account_id:
-                logger.info("Evicting WeChat: user=%s (same account=%s)", uid[:12], account_id[:12])
-                conn.disconnect()
-
-        if CONNECTIONS_BASE.exists():
-            for user_dir in CONNECTIONS_BASE.iterdir():
-                if not user_dir.is_dir() or user_dir.name == keep_user_id:
-                    continue
-                data = _load_json(user_dir.name, "credentials.json")
-                if data and data.get("account_id") == account_id:
-                    logger.info("Evicting persisted WeChat: user=%s", user_dir.name[:12])
-                    _delete_file(user_dir.name, "credentials.json")
-                    _delete_file(user_dir.name, "context_tokens.json")
-
-    async def shutdown(self) -> None:
-        """Close all connections gracefully."""
-        for conn in self._connections.values():
-            await conn.close()
-        self._connections.clear()
diff --git a/backend/web/utils/serializers.py b/backend/web/utils/serializers.py
index 4c070f285..abeb8a856 100644
--- a/backend/web/utils/serializers.py
+++ b/backend/web/utils/serializers.py
@@ -38,7 +38,15 @@ def extract_text_content(raw_content: Any) -> str:
 def serialize_message(msg: Any) -> dict[str, Any]:
     """Serialize a LangChain message to a JSON-compatible dict."""
     content = getattr(msg, "content", "")
-    metadata = getattr(msg, "metadata", None) or {}
+    metadata = dict(getattr(msg, "metadata", None) or {})
+    additional_kwargs = getattr(msg, "additional_kwargs", None) or {}
+    tool_result_meta = additional_kwargs.get("tool_result_meta")
+    # @@@tool-result-meta-bridge - LangChain ToolMessage keeps durable tool
+    # metadata in additional_kwargs, but Leon display rebuild consumes
+    # serialized metadata. Merge the exact structured tool_result_meta here so
+    # checkpoint rebuild can recover blocking subagent identity honestly.
+    if isinstance(tool_result_meta, dict):
+        metadata = {**tool_result_meta, **metadata}
 
     # Strip system tags from owner HumanMessages (context-shift hints).
     # External HumanMessages keep their <system-reminder> so frontend can
diff --git a/config/defaults/tool_catalog.py b/config/defaults/tool_catalog.py
index 294293874..1c2e67d2e 100644
--- a/config/defaults/tool_catalog.py
+++ b/config/defaults/tool_catalog.py
@@ -21,7 +21,9 @@ class ToolGroup(StrEnum):
     COMMAND = "command"
     WEB = "web"
     AGENT = "agent"
+    CHAT = "chat"
     TODO = "todo"
+    CRON = "cron"
     SKILLS = "skills"
     SYSTEM = "system"
     TASKBOARD = "taskboard"
@@ -62,16 +64,26 @@ class ToolDef(BaseModel):
     ToolDef(name="TaskOutput", desc="获取后台任务输出", group=ToolGroup.AGENT),
     ToolDef(name="TaskStop", desc="停止后台任务", group=ToolGroup.AGENT),
     ToolDef(name="Agent", desc="启动子 Agent 执行任务", group=ToolGroup.AGENT),
-    ToolDef(name="SendMessage", desc="向其他 Agent 发送消息", group=ToolGroup.AGENT),
+    ToolDef(name="SendMessage", desc="向运行中的 Agent 发送排队消息", group=ToolGroup.AGENT),
+    # chat
+    ToolDef(name="list_chats", desc="列出当前实体可访问的聊天会话", group=ToolGroup.CHAT),
+    ToolDef(name="read_messages", desc="读取聊天消息并标记为已读", group=ToolGroup.CHAT),
+    ToolDef(name="send_message", desc="向聊天对象发送消息", group=ToolGroup.CHAT),
+    ToolDef(name="search_messages", desc="搜索历史聊天消息", group=ToolGroup.CHAT),
     # todo
     ToolDef(name="TaskCreate", desc="创建待办任务", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
     ToolDef(name="TaskGet", desc="获取任务详情", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
     ToolDef(name="TaskList", desc="列出所有任务", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
     ToolDef(name="TaskUpdate", desc="更新任务状态", group=ToolGroup.TODO, mode=ToolMode.DEFERRED),
+    # cron — backed by existing cron_jobs substrate; off by default until explicitly enabled
+    ToolDef(name="CronCreate", desc="创建定时任务", group=ToolGroup.CRON, mode=ToolMode.DEFERRED, default=False),
+    ToolDef(name="CronDelete", desc="删除定时任务", group=ToolGroup.CRON, mode=ToolMode.DEFERRED, default=False),
+    ToolDef(name="CronList", desc="列出定时任务", group=ToolGroup.CRON, mode=ToolMode.DEFERRED, default=False),
     # skills
     ToolDef(name="load_skill", desc="加载 Skill", group=ToolGroup.SKILLS),
     # system
     ToolDef(name="tool_search", desc="搜索可用工具", group=ToolGroup.SYSTEM),
+    ToolDef(name="LSP", desc="Language Server Protocol 操作", group=ToolGroup.SYSTEM, mode=ToolMode.DEFERRED, default=False),
     # taskboard — all off by default; enable on dedicated scheduler members
     ToolDef(name="ListBoardTasks", desc="列出任务板上的任务", group=ToolGroup.TASKBOARD, default=False),
     ToolDef(name="ClaimTask", desc="认领一个任务板任务", group=ToolGroup.TASKBOARD, default=False),
diff --git a/config/loader.py b/config/loader.py
index 7b2f3190c..7dccb1c00 100644
--- a/config/loader.py
+++ b/config/loader.py
@@ -153,7 +153,7 @@ def _load_agents_from_members(self, members_dir: Path) -> None:
                 continue
             config = self.parse_agent_file(agent_md)
             if config:
-                # source_dir is already set to member_dir by parse_agent_file
+                config.source_dir = member_dir.resolve()
                 self._agents[config.name] = config
 
     @staticmethod
@@ -184,7 +184,7 @@ def parse_agent_file(path: Path) -> AgentConfig | None:
             tools=fm.get("tools", ["*"]),
             system_prompt=parts[2].strip(),
             model=fm.get("model"),
-            source_dir=path.resolve().parent,
+            source_dir=None,
         )
 
     def get_agent(self, name: str) -> AgentConfig | None:
diff --git a/config/schema.py b/config/schema.py
index 53a0cc8ea..62ba9f7df 100644
--- a/config/schema.py
+++ b/config/schema.py
@@ -215,6 +215,10 @@ class ToolsConfig(BaseModel):
 class MCPServerConfig(BaseModel):
     """Configuration for a single MCP server."""
 
+    transport: str | None = Field(
+        None,
+        description="MCP transport type: stdio | streamable_http | sse | websocket",
+    )
     command: str | None = Field(None, description="Command to run the MCP server")
     args: list[str] = Field(default_factory=list, description="Command arguments")
     env: dict[str, str] = Field(default_factory=dict, description="Environment variables")
diff --git a/config/types.py b/config/types.py
index 9731d5aff..0c49458fd 100644
--- a/config/types.py
+++ b/config/types.py
@@ -20,10 +20,12 @@ class AgentConfig(BaseModel):
 class McpServerConfig(BaseModel):
     """Single MCP server entry from .mcp.json."""
 
+    transport: str | None = None
     command: str | None = None
     args: list[str] = Field(default_factory=list)
     env: dict[str, str] = Field(default_factory=dict)
     url: str | None = None
+    instructions: str | None = None
     allowed_tools: list[str] | None = None
     disabled: bool = False
 
diff --git a/core/agents/communication/chat_tool_service.py b/core/agents/communication/chat_tool_service.py
index f5464abb4..66078d7f6 100644
--- a/core/agents/communication/chat_tool_service.py
+++ b/core/agents/communication/chat_tool_service.py
@@ -1,4 +1,4 @@
-"""Chat tool service — 7 tools for entity-to-entity communication.
+"""Chat tool service — Mycel-native tools for entity-to-entity communication.
 
 Tools use user_ids as parameters (human = Supabase auth UUID, agent = member_id).
 Two users share at most one chat; the system auto-resolves user_id → chat.
@@ -12,11 +12,11 @@
 from datetime import UTC, datetime
 from typing import Any
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 
 logger = logging.getLogger(__name__)
 
-# @@@range-parser — parse range strings for chat_read history queries.
+# @@@range-parser — parse range strings for read_messages history queries.
 # Supports: negative index (-10:-1), relative time (-2h:, -1d:-6h), ISO dates (2026-03-20:2026-03-22).
 _RELATIVE_RE = re.compile(r"^-(\d+)([hdm])$")
 
@@ -89,7 +89,7 @@ def _parse_time_endpoint(s: str, now: float) -> float | None:
 
 
 class ChatToolService:
-    """Registers 5 chat tools into ToolRegistry.
+    """Registers the chat tool surface into ToolRegistry.
 
     Each tool closure captures user_id (the calling agent's social identity = member_id).
     """
@@ -120,19 +120,44 @@ def __init__(
         self._register(registry)
 
     def _register(self, registry: ToolRegistry) -> None:
-        self._register_chats(registry)
-        self._register_chat_read(registry)
-        self._register_chat_send(registry)
-        self._register_chat_search(registry)
-        self._register_directory(registry)
+        self._register_list_chats(registry)
+        self._register_read_messages(registry)
+        self._register_send_message(registry)
+        self._register_search_messages(registry)
+
+    def _latest_notified_chat_id(self, request: Any) -> str | None:
+        state = getattr(request, "state", None)
+        messages = getattr(state, "messages", None)
+        if not isinstance(messages, list):
+            return None
+        for message in reversed(messages):
+            metadata = getattr(message, "metadata", None) or {}
+            if metadata.get("source") != "external" or metadata.get("notification_type") != "chat":
+                continue
+            content = getattr(message, "content", "")
+            text = content if isinstance(content, str) else str(content)
+            match = re.search(r'read_messages\(chat_id="([^"]+)"\)', text)
+            if match:
+                return match.group(1)
+        return None
+
+    def _fill_missing_chat_target(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        if args.get("user_id"):
+            return args
+        if isinstance(args.get("chat_id"), str) and args["chat_id"].strip():
+            return args
+        notified_chat_id = self._latest_notified_chat_id(request)
+        if notified_chat_id:
+            return {**args, "chat_id": notified_chat_id}
+        return args
 
     def _resolve_name(self, user_id: str) -> str:
         """Resolve display name: entity_repo (agents) → member_repo (humans)."""
-        e = self._entities.get_by_id(user_id)
-        if e:
-            return e.name
-        m = self._members.get_by_id(user_id) if self._members else None
-        return m.name if m else "unknown"
+        entity = self._entities.get_by_id(user_id) if self._entities else None
+        if entity:
+            return entity.name
+        member = self._members.get_by_id(user_id) if self._members else None
+        return member.name if member else "unknown"
 
     def _format_msgs(self, msgs: list, eid: str) -> str:
         lines = []
@@ -159,309 +184,262 @@ def _fetch_by_range(self, chat_id: str, parsed: dict) -> list:
                 before=parsed["before"],
             )
 
-    def _register_chats(self, registry: ToolRegistry) -> None:
+    def _handle_list_chats(self, unread_only: bool = False, limit: int = 20) -> str:
         eid = self._user_id
+        chats = self._chat_service.list_chats_for_user(eid)
+        if unread_only:
+            chats = [c for c in chats if c.get("unread_count", 0) > 0]
+        chats = chats[:limit]
+        if not chats:
+            return "No chats found."
+        lines = []
+        for c in chats:
+            others = [e for e in c.get("entities", []) if e["id"] != eid]
+            name = ", ".join(e["name"] for e in others) or "Unknown"
+            unread = c.get("unread_count", 0)
+            last = c.get("last_message")
+            last_preview = f' — last: "{last["content"][:50]}"' if last else ""
+            unread_str = f" ({unread} unread)" if unread > 0 else ""
+            is_group = len(others) >= 2
+            if is_group:
+                id_str = f" [chat_id: {c['id']}]"
+            else:
+                other_id = others[0]["id"] if others else ""
+                id_str = f" [user_id: {other_id}]" if other_id else ""
+            lines.append(f"- {name}{id_str}{unread_str}{last_preview}")
+        return "\n".join(lines)
+
+    def _handle_read_messages(self, user_id: str | None = None, chat_id: str | None = None, range: str | None = None) -> str:
+        eid = self._user_id
+        if chat_id:
+            pass  # use chat_id directly
+        elif user_id:
+            chat_id = self._chat_entities.find_chat_between(eid, user_id)
+            if not chat_id:
+                name = self._resolve_name(user_id)
+                return f"No chat history with {name}."
+        else:
+            return "Provide user_id or chat_id."
+
+        # @@@range-dispatch — if range is provided, use it regardless of unread state.
+        if range:
+            try:
+                parsed = _parse_range(range)
+            except ValueError as e:
+                return str(e)
+            msgs = self._fetch_by_range(chat_id, parsed)
+            if not msgs:
+                return "No messages in that range."
+            # @@@range-marks-read — WORKAROUND: unblock send_message by pushing
+            # last_read_at to now. This marks ALL messages as read, not just
+            # the requested range. Proper fix needs per-message read tracking
+            # instead of the current single-timestamp waterline model.
+            self._chat_entities.update_last_read(chat_id, eid, time.time())
+            return self._format_msgs(msgs, eid)
+
+        # @@@read-unread-only — default to unread messages only.
+        msgs = self._messages.list_unread(chat_id, eid)
+        if msgs:
+            self._chat_entities.update_last_read(chat_id, eid, time.time())
+            return self._format_msgs(msgs, eid)
+
+        # Nothing unread — prompt agent to use range parameter
+        return (
+            "No unread messages. To read history, call again with range:\n"
+            "  range='-10:-1'  (last 10 messages)\n"
+            "  range='-5:'     (last 5 messages)\n"
+            "  range='-1h:'    (last hour)\n"
+            "  range='-2d:-1d' (yesterday)\n"
+            "  range='2026-03-20:2026-03-22' (date range)"
+        )
+
+    def _handle_send_message(
+        self,
+        content: str,
+        user_id: str | None = None,
+        chat_id: str | None = None,
+        signal: str = "open",
+        mentions: list[str] | None = None,
+    ) -> str:
+        eid = self._user_id
+        # @@@read-before-write — resolve chat_id, then check unread
+        resolved_chat_id = chat_id
+        target_name = "chat"
+
+        if chat_id:
+            if not self._chat_entities.is_participant_in_chat(chat_id, eid):
+                raise RuntimeError(f"You are not a member of chat {chat_id}")
+        elif user_id:
+            if user_id == eid:
+                raise RuntimeError("Cannot send a message to yourself.")
+            target_name = self._resolve_name(user_id)
+            resolved_chat_id = self._chat_entities.find_chat_between(eid, user_id)
+            if not resolved_chat_id:
+                # New chat — no unread possible, create and send
+                chat = self._chat_service.find_or_create_chat([eid, user_id])
+                resolved_chat_id = chat.id
+        else:
+            raise RuntimeError("Provide user_id (for 1:1) or chat_id (for group)")
+
+        # @@@read-before-write-gate — reject if unread messages exist
+        unread = self._messages.count_unread(resolved_chat_id, eid)
+        if unread > 0:
+            raise RuntimeError(f"You have {unread} unread message(s). Call read_messages(chat_id='{resolved_chat_id}') first.")
 
-        def handle(unread_only: bool = False, limit: int = 20) -> str:
-            chats = self._chat_service.list_chats_for_user(eid)
-            if unread_only:
-                chats = [c for c in chats if c.get("unread_count", 0) > 0]
-            chats = chats[:limit]
-            if not chats:
-                return "No chats found."
-            lines = []
-            for c in chats:
-                others = [e for e in c.get("entities", []) if e["id"] != eid]
-                name = ", ".join(e["name"] for e in others) or "Unknown"
-                unread = c.get("unread_count", 0)
-                last = c.get("last_message")
-                last_preview = f' — last: "{last["content"][:50]}"' if last else ""
-                unread_str = f" ({unread} unread)" if unread > 0 else ""
-                is_group = len(others) >= 2
-                if is_group:
-                    id_str = f" [chat_id: {c['id']}]"
-                else:
-                    other_id = others[0]["id"] if others else ""
-                    id_str = f" [user_id: {other_id}]" if other_id else ""
-                lines.append(f"- {name}{id_str}{unread_str}{last_preview}")
-            return "\n".join(lines)
+        # Append signal to content (for read_messages) + pass through chain (for notification)
+        effective_signal = signal if signal in ("yield", "close") else None
+        if effective_signal:
+            content = f"{content}\n[signal: {effective_signal}]"
 
+        self._chat_service.send_message(resolved_chat_id, eid, content, mentions, signal=effective_signal)
+        return f"Message sent to {target_name}."
+
+    def _handle_search_messages(self, query: str, user_id: str | None = None) -> str:
+        eid = self._user_id
+        chat_id = None
+        if user_id:
+            chat_id = self._chat_entities.find_chat_between(eid, user_id)
+        results = self._messages.search(query, chat_id=chat_id, limit=20)
+        if not results:
+            return f"No messages matching '{query}'."
+        lines = []
+        for m in results:
+            name = self._resolve_name(m.sender_id)
+            lines.append(f"[{name}] {m.content[:100]}")
+        return "\n".join(lines)
+
+    def _register_list_chats(self, registry: ToolRegistry) -> None:
         registry.register(
             ToolEntry(
-                name="chats",
+                name="list_chats",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "chats",
-                    "description": "List your chats. Returns chat summaries with user_ids of participants.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "unread_only": {
-                                "type": "boolean",
-                                "description": "Only show chats with unread messages",
-                                "default": False,
-                            },
-                            "limit": {"type": "integer", "description": "Max number of chats to return", "default": 20},
+                schema=make_tool_schema(
+                    name="list_chats",
+                    description="List your chats. Returns chat summaries with user_ids of participants.",
+                    properties={
+                        "unread_only": {
+                            "type": "boolean",
+                            "description": "Only show chats with unread messages",
+                            "default": False,
                         },
+                        "limit": {"type": "integer", "description": "Max number of chats to return", "default": 20},
                     },
-                },
-                handler=handle,
+                ),
+                handler=self._handle_list_chats,
                 source="chat",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
-    def _register_chat_read(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
-
-        def handle(user_id: str | None = None, chat_id: str | None = None, range: str | None = None) -> str:
-            if chat_id:
-                pass  # use chat_id directly
-            elif user_id:
-                chat_id = self._chat_entities.find_chat_between(eid, user_id)
-                if not chat_id:
-                    name = self._resolve_name(user_id)
-                    return f"No chat history with {name}."
-            else:
-                return "Provide user_id or chat_id."
-
-            # @@@range-dispatch — if range is provided, use it regardless of unread state.
-            if range:
-                try:
-                    parsed = _parse_range(range)
-                except ValueError as e:
-                    return str(e)
-                msgs = self._fetch_by_range(chat_id, parsed)
-                if not msgs:
-                    return "No messages in that range."
-                # @@@range-marks-read — WORKAROUND: unblock chat_send by pushing
-                # last_read_at to now. This marks ALL messages as read, not just
-                # the requested range. Proper fix needs per-message read tracking
-                # instead of the current single-timestamp waterline model.
-                self._chat_entities.update_last_read(chat_id, eid, time.time())
-                return self._format_msgs(msgs, eid)
-
-            # @@@read-unread-only — default to unread messages only.
-            msgs = self._messages.list_unread(chat_id, eid)
-            if msgs:
-                self._chat_entities.update_last_read(chat_id, eid, time.time())
-                return self._format_msgs(msgs, eid)
-
-            # Nothing unread — prompt agent to use range parameter
-            return (
-                "No unread messages. To read history, call again with range:\n"
-                "  range='-10:-1'  (last 10 messages)\n"
-                "  range='-5:'     (last 5 messages)\n"
-                "  range='-1h:'    (last hour)\n"
-                "  range='-2d:-1d' (yesterday)\n"
-                "  range='2026-03-20:2026-03-22' (date range)"
-            )
-
+    def _register_read_messages(self, registry: ToolRegistry) -> None:
         registry.register(
             ToolEntry(
-                name="chat_read",
+                name="read_messages",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "chat_read",
-                    "description": (
+                schema=make_tool_schema(
+                    name="read_messages",
+                    description=(
                         "Read chat messages. Returns unread messages by default.\n"
                         "If nothing unread, use range to read history:\n"
                         "  Negative index: '-10:-1' (last 10), '-5:' (last 5)\n"
                         "  Time interval: '-1h:', '-2d:-1d', '2026-03-20:2026-03-22'\n"
                         "Positive indices are NOT allowed."
                     ),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "user_id": {"type": "string", "description": "user_id for 1:1 chat history"},
-                            "chat_id": {"type": "string", "description": "Chat_id for group chat history"},
-                            "range": {
-                                "type": "string",
-                                "description": (
-                                    "History range. Negative index '-X:-Y' or time '-1h:', '2026-03-20:'. Positive indices NOT allowed."
-                                ),
-                            },
+                    properties={
+                        "user_id": {"type": "string", "description": "user_id for 1:1 chat history"},
+                        "chat_id": {"type": "string", "description": "Chat_id for group chat history"},
+                        "range": {
+                            "type": "string",
+                            "description": (
+                                "History range. Negative index '-X:-Y' or time '-1h:', '2026-03-20:'. Positive indices NOT allowed."
+                            ),
                         },
                     },
-                },
-                handler=handle,
+                    parameter_overrides={
+                        "x-leon-required-any-of": [
+                            ["user_id"],
+                            ["chat_id"],
+                        ],
+                    },
+                ),
+                handler=self._handle_read_messages,
                 source="chat",
+                search_hint="read chat messages history conversation",
+                is_read_only=True,
+                is_concurrency_safe=True,
+                validate_input=self._fill_missing_chat_target,
             )
         )
 
-    def _register_chat_send(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
-
-        def handle(
-            content: str,
-            user_id: str | None = None,
-            chat_id: str | None = None,
-            signal: str = "open",
-            mentions: list[str] | None = None,
-        ) -> str:
-            # @@@read-before-write — resolve chat_id, then check unread
-            resolved_chat_id = chat_id
-            target_name = "chat"
-
-            if chat_id:
-                if not self._chat_entities.is_participant_in_chat(chat_id, eid):
-                    raise RuntimeError(f"You are not a member of chat {chat_id}")
-            elif user_id:
-                if user_id == eid:
-                    raise RuntimeError("Cannot send a message to yourself.")
-                target_name = self._resolve_name(user_id)
-                resolved_chat_id = self._chat_entities.find_chat_between(eid, user_id)
-                if not resolved_chat_id:
-                    # New chat — no unread possible, create and send
-                    chat = self._chat_service.find_or_create_chat([eid, user_id])
-                    resolved_chat_id = chat.id
-            else:
-                raise RuntimeError("Provide user_id (for 1:1) or chat_id (for group)")
-
-            # @@@read-before-write-gate — reject if unread messages exist
-            unread = self._messages.count_unread(resolved_chat_id, eid)
-            if unread > 0:
-                raise RuntimeError(f"You have {unread} unread message(s). Call chat_read(chat_id='{resolved_chat_id}') first.")
-
-            # Append signal to content (for chat_read) + pass through chain (for notification)
-            effective_signal = signal if signal in ("yield", "close") else None
-            if effective_signal:
-                content = f"{content}\n[signal: {effective_signal}]"
-
-            self._chat_service.send_message(resolved_chat_id, eid, content, mentions, signal=effective_signal)
-            return f"Message sent to {target_name}."
-
+    def _register_send_message(self, registry: ToolRegistry) -> None:
         registry.register(
             ToolEntry(
-                name="chat_send",
+                name="send_message",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "chat_send",
-                    "description": (
+                schema=make_tool_schema(
+                    name="send_message",
+                    description=(
                         "Send a message. Use user_id for 1:1 chats, chat_id for group chats.\n\n"
-                        "You MUST call chat_read() first if you have unread messages — sending will fail otherwise.\n\n"
+                        "You MUST call read_messages() first if you have unread messages — sending will fail otherwise.\n\n"
                         "Signal protocol — append to content:\n"
                         "  (no tag) = I expect a reply from you\n"
                         "  ::yield = I'm done with my turn; reply only if you want to\n"
                         "  ::close = conversation over, do NOT reply\n\n"
                         "For games/turns: do NOT append ::yield — just send the move and expect a reply."
                     ),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "content": {"type": "string", "description": "Message content"},
-                            "user_id": {"type": "string", "description": "Target user_id (for 1:1 chat)"},
-                            "chat_id": {"type": "string", "description": "Target chat_id (for group chat)"},
-                            "signal": {
-                                "type": "string",
-                                "enum": ["open", "yield", "close"],
-                                "description": "Signal intent to recipient",
-                                "default": "open",
-                            },
-                            "mentions": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "Entity IDs to @mention (overrides mute for these recipients)",
-                            },
+                    properties={
+                        "content": {"type": "string", "description": "Message content"},
+                        "user_id": {"type": "string", "description": "Target user_id (for 1:1 chat)"},
+                        "chat_id": {"type": "string", "description": "Target chat_id (for group chat)"},
+                        "signal": {
+                            "type": "string",
+                            "enum": ["open", "yield", "close"],
+                            "description": "Signal intent to recipient",
+                            "default": "open",
                         },
-                        "required": ["content"],
-                    },
-                },
-                handler=handle,
-                source="chat",
-            )
-        )
-
-    def _register_chat_search(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
-
-        def handle(query: str, user_id: str | None = None) -> str:
-            chat_id = None
-            if user_id:
-                chat_id = self._chat_entities.find_chat_between(eid, user_id)
-            results = self._messages.search(query, chat_id=chat_id, limit=20)
-            if not results:
-                return f"No messages matching '{query}'."
-            lines = []
-            for m in results:
-                name = self._resolve_name(m.sender_id)
-                lines.append(f"[{name}] {m.content[:100]}")
-            return "\n".join(lines)
-
-        registry.register(
-            ToolEntry(
-                name="chat_search",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "chat_search",
-                    "description": "Search messages. Optionally filter by user_id.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {"type": "string", "description": "Search query"},
-                            "user_id": {
-                                "type": "string",
-                                "description": "Optional: only search in chat with this user",
-                            },
+                        "mentions": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Entity IDs to @mention (overrides mute for these recipients)",
                         },
-                        "required": ["query"],
                     },
-                },
-                handler=handle,
+                    required=["content"],
+                    parameter_overrides={
+                        "x-leon-required-any-of": [
+                            ["content", "user_id"],
+                            ["content", "chat_id"],
+                        ],
+                    },
+                ),
+                handler=self._handle_send_message,
                 source="chat",
+                search_hint="send message reply chat entity",
+                validate_input=self._fill_missing_chat_target,
             )
         )
 
-    def _register_directory(self, registry: ToolRegistry) -> None:
-        eid = self._user_id
-
-        def handle(search: str | None = None, type: str | None = None) -> str:
-            lines = []
-            all_members = self._members.list_all() if self._members else []
-            member_map = {m.id: m for m in all_members}
-
-            if type is None or type == "human":
-                for m in all_members:
-                    if m.id == eid or m.type != "human":
-                        continue
-                    if search and search.lower() not in m.name.lower():
-                        continue
-                    lines.append(f"- {m.name} [human] user_id={m.id}")
-
-            if type is None or type == "agent":
-                all_entities = self._entities.list_all()
-                for e in all_entities:
-                    if e.id == eid or e.type != "agent":
-                        continue
-                    if search and search.lower() not in e.name.lower():
-                        continue
-                    member = member_map.get(e.member_id)
-                    owner_info = ""
-                    if member and member.owner_user_id:
-                        owner = member_map.get(member.owner_user_id)
-                        if owner:
-                            owner_info = f" (owner: {owner.name})"
-                    lines.append(f"- {e.name} [{e.type}] user_id={e.id}{owner_info}")
-
-            if not lines:
-                return "No users found."
-            return "\n".join(lines)
-
+    def _register_search_messages(self, registry: ToolRegistry) -> None:
         registry.register(
             ToolEntry(
-                name="directory",
+                name="search_messages",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "directory",
-                    "description": "Browse the user directory. Returns user_ids for use with chat_send, chat_read.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "search": {"type": "string", "description": "Search by name"},
-                            "type": {"type": "string", "description": "Filter by type: 'human' or 'agent'"},
+                schema=make_tool_schema(
+                    name="search_messages",
+                    description="Search messages. Optionally filter by user_id.",
+                    properties={
+                        "query": {"type": "string", "description": "Search query"},
+                        "user_id": {
+                            "type": "string",
+                            "description": "Optional: only search in chat with this user",
                         },
                     },
-                },
-                handler=handle,
+                    required=["query"],
+                ),
+                handler=self._handle_search_messages,
                 source="chat",
+                search_hint="search messages query chat history",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
diff --git a/core/agents/communication/delivery.py b/core/agents/communication/delivery.py
index c14ee6025..7e0a502bf 100644
--- a/core/agents/communication/delivery.py
+++ b/core/agents/communication/delivery.py
@@ -1,12 +1,13 @@
 """Chat delivery — enqueues lightweight notifications for agent threads.
 
-v3: no full message text injected. Agent must chat_read to see content.
+v3: no full message text injected. Agent must read_messages to see content.
 ChatService._deliver_to_agents calls the delivery function for each
 non-sender agent entity.
 """
 
 from __future__ import annotations
 
+import functools
 import logging
 from typing import Any
 
@@ -41,18 +42,20 @@ def _deliver(
             loop,
         )
 
-        def _on_done(f):
-            exc = f.exception()
-            if exc:
-                logger.error("[delivery] async delivery failed for %s: %s", entity.id, exc, exc_info=exc)
-            else:
-                logger.info("[delivery] async delivery completed for %s", entity.id)
-
-        future.add_done_callback(_on_done)
+        future.add_done_callback(functools.partial(_log_delivery_result, entity.id))
 
     return _deliver
 
 
+def _log_delivery_result(entity_id: str, f: Any) -> None:
+    """Done-callback for async delivery futures."""
+    exc = f.exception()
+    if exc:
+        logger.error("[delivery] async delivery failed for %s: %s", entity_id, exc, exc_info=exc)
+    else:
+        logger.info("[delivery] async delivery completed for %s", entity_id)
+
+
 async def _async_deliver(
     app: Any,
     entity: EntityRow,
@@ -64,7 +67,7 @@ async def _async_deliver(
 ) -> None:
     """Enqueue chat notification to an agent's brain thread.
 
-    @@@v3-notification-only — no message content. Agent calls chat_read to see it.
+    @@@v3-notification-only — no message content. Agent calls read_messages to see it.
     """
     # @@@context-isolation — clear inherited LangChain ContextVar so the recipient
     # agent's astream doesn't inherit the sender's StreamMessagesHandler callbacks.
diff --git a/core/agents/registry.py b/core/agents/registry.py
index f74f4f4ec..cb208641d 100644
--- a/core/agents/registry.py
+++ b/core/agents/registry.py
@@ -59,6 +59,33 @@ async def get_by_id(self, agent_id: str) -> AgentEntry | None:
             subagent_type=row[5],
         )
 
+    async def list_running_by_name(self, name: str) -> list[AgentEntry]:
+        rows = self._repo.list_running_by_name(name)
+        return [
+            AgentEntry(
+                agent_id=row[0],
+                name=row[1],
+                thread_id=row[2],
+                status=row[3],
+                parent_agent_id=row[4],
+                subagent_type=row[5],
+            )
+            for row in rows
+        ]
+
+    async def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> AgentEntry | None:
+        row = self._repo.get_latest_by_name_and_parent(name, parent_agent_id)
+        if row is None:
+            return None
+        return AgentEntry(
+            agent_id=row[0],
+            name=row[1],
+            thread_id=row[2],
+            status=row[3],
+            parent_agent_id=row[4],
+            subagent_type=row[5],
+        )
+
     async def update_status(self, agent_id: str, status: str) -> None:
         async with self._lock:
             self._repo.update_status(agent_id, status)
diff --git a/core/agents/service.py b/core/agents/service.py
index e7baff89b..823d37a4e 100644
--- a/core/agents/service.py
+++ b/core/agents/service.py
@@ -11,89 +11,306 @@
 import asyncio
 import json
 import logging
+import os
+import time
 import uuid
+from collections.abc import Awaitable, Callable
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
+from config.loader import AgentLoader
 from core.agents.registry import AgentEntry, AgentRegistry
-from core.runtime.middleware.queue.formatters import format_background_notification
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.middleware.queue.formatters import (
+    format_agent_message,
+    format_background_notification,
+    format_progress_notification,
+)
+from core.runtime.permissions import ToolPermissionContext
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+from core.runtime.state import BootstrapConfig, ToolUseContext
+from core.runtime.tool_result import tool_error, tool_permission_request, tool_success
+from storage.contracts import EntityRow
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from core.runtime.agent import LeonAgent
 
-AGENT_SCHEMA = {
-    "name": "Agent",
-    "description": (
-        "Launch a new agent to handle complex tasks autonomously. "
-        "Use subagent_type to select a specialized agent, or omit for default. "
-        "Agents run independently with their own tool stack."
+
+EventEmitter = Callable[[dict[str, Any]], Awaitable[None] | None]
+ChildAgentFactory = Callable[..., "LeonAgent"]
+
+
+def _resolve_default_child_agent_factory() -> ChildAgentFactory:
+    from core.runtime.agent import create_leon_agent
+
+    return cast(ChildAgentFactory, create_leon_agent)
+
+
+# ── Sub-agent tool filtering (CC alignment) ──────────────────────────────────
+# Tools that sub-agents must never access (prevents controlling parent).
+AGENT_DISALLOWED: set[str] = {"TaskOutput", "TaskStop", "Agent"}
+
+# Per-type allowed tool sets. Tools not in the set are blocked.
+EXPLORE_ALLOWED: set[str] = {"Read", "Grep", "Glob", "list_dir", "WebSearch", "WebFetch", "tool_search"}
+PLAN_ALLOWED: set[str] = EXPLORE_ALLOWED  # plan agents are also read-only
+BASH_ALLOWED: set[str] = {"Bash", "Read", "Grep", "Glob", "list_dir", "tool_search"}
+
+
+def _get_tool_filters(subagent_type: str) -> tuple[set[str], set[str] | None]:
+    """Return (extra_blocked_tools, allowed_tools) for a sub-agent type.
+
+    For explore/plan/bash: use allowed_tools whitelist (ToolRegistry skips unmatched).
+    For general: only block AGENT_DISALLOWED, no whitelist.
+    """
+    agent_type = subagent_type.lower()
+    allowed_map: dict[str, set[str]] = {
+        "explore": EXPLORE_ALLOWED,
+        "plan": PLAN_ALLOWED,
+        "bash": BASH_ALLOWED,
+    }
+
+    if agent_type in allowed_map:
+        return AGENT_DISALLOWED, allowed_map[agent_type]
+
+    # general: only block parent-controlling tools, no whitelist
+    return AGENT_DISALLOWED, None
+
+
+def _get_subagent_agent_name(subagent_type: str) -> str:
+    return subagent_type.lower()
+
+
+def _resolve_subagent_model(
+    workspace_root: Path,
+    subagent_type: str,
+    requested_model: str | None,
+    inherited_model: str,
+    fallback_model: str | None = None,
+) -> str:
+    def _is_inherit_marker(value: str | None) -> bool:
+        return value is None or value.lower() in {"default", "inherit"}
+
+    env_model = os.getenv("CLAUDE_CODE_SUBAGENT_MODEL")
+    if env_model:
+        return env_model
+    if requested_model and not _is_inherit_marker(requested_model):
+        return requested_model
+
+    agent_def = AgentLoader(workspace_root=workspace_root).load_all_agents().get(_get_subagent_agent_name(subagent_type))
+    if agent_def and agent_def.model:
+        return agent_def.model
+
+    if inherited_model and not _is_inherit_marker(inherited_model):
+        return inherited_model
+    if fallback_model and not _is_inherit_marker(fallback_model):
+        return fallback_model
+    return inherited_model
+
+
+def _normalize_child_workspace_prompt(prompt: str, workspace_root: Path) -> str:
+    workspace_text = str(workspace_root)
+    for suffix in ("current working directory", "working directory"):
+        prompt = prompt.replace(f"{workspace_text}/{suffix}", workspace_text)
+    return prompt
+
+
+def _filter_fork_messages(messages: list) -> list:
+    """Filter parent messages for forkContext sub-agent spawning.
+
+    Equivalent to CC's yF0: removes assistant messages whose tool_use blocks
+    have no matching tool_result in a subsequent user message (orphan tool_use).
+    Orphan tool_use blocks cause Anthropic API validation errors.
+    """
+    # Collect all tool_use_ids that have a corresponding tool_result
+    answered: set[str] = set()
+    for msg in messages:
+        # ToolMessage or user message with tool_result content
+        tool_call_id = getattr(msg, "tool_call_id", None)
+        if tool_call_id:
+            answered.add(tool_call_id)
+        content = getattr(msg, "content", None)
+        if isinstance(content, list):
+            for block in content:
+                if isinstance(block, dict) and block.get("type") == "tool_result":
+                    tid = block.get("tool_use_id") or block.get("tool_call_id")
+                    if tid:
+                        answered.add(tid)
+
+    result = []
+    for msg in messages:
+        content = getattr(msg, "content", None)
+        if isinstance(content, list):
+            tool_uses = [b for b in content if isinstance(b, dict) and b.get("type") == "tool_use"]
+            if tool_uses and any(b.get("id") not in answered for b in tool_uses):
+                continue  # skip assistant message with unanswered tool_use
+        result.append(msg)
+    return result
+
+
+AGENT_SCHEMA = make_tool_schema(
+    name="Agent",
+    description=(
+        "Launch a sub-agent for independent task execution. "
+        "Types: explore (read-only codebase search), plan (architecture design, read-only), "
+        "bash (shell commands only), general (broad tool access except Agent, TaskOutput, and TaskStop). "
+        "Use for: multi-step tasks, parallel work, tasks needing isolation. "
+        "Do NOT use for simple file reads or single grep searches — use the tools directly."
     ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "subagent_type": {
-                "type": "string",
-                "description": "Type of agent to spawn (e.g. 'Explore', 'Coder'). Omit for general-purpose.",
-            },
-            "prompt": {
-                "type": "string",
-                "description": "Task for the agent",
-            },
-            "name": {
-                "type": "string",
-                "description": "Name for the agent (used for SendMessage routing)",
-            },
-            "description": {
-                "type": "string",
-                "description": (
-                    "Short description of what agent will do. Required when run_in_background is true; "
-                    "shown in the background task indicator."
-                ),
-            },
-            "run_in_background": {
-                "type": "boolean",
-                "default": False,
-                "description": "Fire-and-forget: return immediately with task_id instead of waiting for completion",
-            },
-            "max_turns": {
-                "type": "integer",
-                "description": "Maximum turns the agent can take",
-            },
+    properties={
+        "subagent_type": {
+            "type": "string",
+            "enum": ["explore", "plan", "general", "bash"],
+            "description": "Type of agent to spawn. Omit for general-purpose.",
+        },
+        "prompt": {
+            "type": "string",
+            "description": "Task for the agent",
+        },
+        "name": {
+            "type": "string",
+            "description": "Optional display name for the spawned agent",
+        },
+        "description": {
+            "type": "string",
+            "description": (
+                "Short description of what agent will do. Required when run_in_background is true; shown in the background task indicator."
+            ),
+        },
+        "run_in_background": {
+            "type": "boolean",
+            "default": False,
+            "description": "Fire-and-forget: return immediately with task_id instead of waiting for completion",
+        },
+        "model": {
+            "type": "string",
+            "description": "Optional sub-agent model override. Priority: env > this field > agent frontmatter > inherit.",
+        },
+        "max_turns": {
+            "type": "integer",
+            "description": "Maximum turns the agent can take",
+        },
+        "fork_context": {
+            "type": "boolean",
+            "default": False,
+            "description": (
+                "Inherit parent conversation history as read-only context. "
+                "Use when the sub-agent needs background from the parent's work. "
+                "Adds a ### ENTERING SUB-AGENT ROUTINE ### marker so the sub-agent "
+                "knows which messages are context vs its actual task."
+            ),
         },
-        "required": ["prompt"],
     },
-}
-
-TASK_OUTPUT_SCHEMA = {
-    "name": "TaskOutput",
-    "description": "Get the output of a background agent task by its task_id.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID returned when starting a background agent",
-            },
+    required=["prompt", "description"],
+)
+
+TASK_OUTPUT_SCHEMA = make_tool_schema(
+    name="TaskOutput",
+    description=(
+        "Get output of a background task (agent or bash). Blocks until task completes by default. Returns full text output or error."
+    ),
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID returned when starting a background agent",
+        },
+        "block": {
+            "type": "boolean",
+            "default": True,
+            "description": "Whether to wait for completion. Use false for a non-blocking status check.",
+        },
+        "timeout": {
+            "type": "integer",
+            "default": 30000,
+            "minimum": 0,
+            "maximum": 600000,
+            "description": "Maximum wait time in milliseconds when block=true (default: 30000, max: 600000).",
         },
-        "required": ["task_id"],
     },
-}
-
-TASK_STOP_SCHEMA = {
-    "name": "TaskStop",
-    "description": "Stop a running background agent task.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID to stop",
+    required=["task_id"],
+)
+
+TASK_STOP_SCHEMA = make_tool_schema(
+    name="TaskStop",
+    description="Cancel a running background task. Sends cancellation signal; task may take a moment to stop.",
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID to stop",
+        },
+    },
+    required=["task_id"],
+)
+
+SEND_MESSAGE_SCHEMA = make_tool_schema(
+    name="SendMessage",
+    description="Send a queued message to another running agent by name. Delivered before that agent's next model turn.",
+    properties={
+        "target_name": {
+            "type": "string",
+            "description": "Display name of the running target agent",
+        },
+        "message": {
+            "type": "string",
+            "description": "Message body to deliver",
+        },
+        "sender_name": {
+            "type": "string",
+            "description": "Optional sender label for the delivered message",
+        },
+    },
+    required=["target_name", "message"],
+)
+
+ASK_USER_QUESTION_SCHEMA = make_tool_schema(
+    name="AskUserQuestion",
+    description=(
+        "Ask the user one or more structured questions when progress requires their choice or clarification. "
+        "Use for genuine ambiguity, preference selection, or approval that needs an explicit answer before continuing."
+    ),
+    properties={
+        "questions": {
+            "type": "array",
+            "description": "Questions to present to the user.",
+            "minItems": 1,
+            "items": {
+                "type": "object",
+                "properties": {
+                    "header": {"type": "string", "description": "Short UI label for the question."},
+                    "question": {"type": "string", "description": "Full question text shown to the user."},
+                    "multiSelect": {
+                        "type": "boolean",
+                        "default": False,
+                        "description": "Whether the user may pick multiple options.",
+                    },
+                    "options": {
+                        "type": "array",
+                        "minItems": 1,
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "label": {"type": "string"},
+                                "description": {"type": "string"},
+                                "preview": {"type": "string"},
+                            },
+                            "required": ["label", "description"],
+                        },
+                    },
+                },
+                "required": ["header", "question", "options"],
             },
         },
-        "required": ["task_id"],
+        "annotations": {
+            "type": "object",
+            "description": "Optional structured annotations kept with the question request.",
+        },
+        "metadata": {
+            "type": "object",
+            "description": "Optional metadata describing the source of the question request.",
+        },
     },
-}
+    required=["questions"],
+)
 
 
 class _RunningTask:
@@ -150,6 +367,33 @@ def get_result(self) -> str | None:
 BackgroundRun = _RunningTask | _BashBackgroundRun
 
 
+def _background_run_running_message(running: BackgroundRun) -> str:
+    return "Command is still running." if isinstance(running, _BashBackgroundRun) else "Agent is still running."
+
+
+def _background_run_result_status(result: str | None) -> str:
+    return "error" if (result and result.startswith("<tool_use_error>")) else "completed"
+
+
+async def _wait_for_background_run(running: BackgroundRun, timeout_ms: int) -> bool:
+    timeout_s = max(timeout_ms, 0) / 1000.0
+    if isinstance(running, _RunningTask):
+        try:
+            await asyncio.wait_for(asyncio.shield(running.task), timeout=timeout_s)
+            return True
+        except TimeoutError:
+            return running.is_done
+
+    loop = asyncio.get_running_loop()
+    deadline = loop.time() + timeout_s
+    while True:
+        if running.is_done:
+            return True
+        if loop.time() >= deadline:
+            return False
+        await asyncio.sleep(0.1)
+
+
 class AgentService:
     """Registers Agent, TaskOutput, TaskStop tools into ToolRegistry.
 
@@ -170,11 +414,25 @@ def __init__(
         model_name: str,
         queue_manager: Any | None = None,
         shared_runs: dict[str, BackgroundRun] | None = None,
+        background_progress_interval_s: float = 30.0,
+        thread_repo: Any = None,
+        entity_repo: Any = None,
+        member_repo: Any = None,
+        web_app: Any = None,
+        child_agent_factory: ChildAgentFactory | None = None,
     ):
         self._agent_registry = agent_registry
         self._workspace_root = workspace_root
         self._model_name = model_name
         self._queue_manager = queue_manager
+        self._background_progress_interval_s = background_progress_interval_s
+        self._thread_repo = thread_repo
+        self._entity_repo = entity_repo
+        self._member_repo = member_repo
+        self._web_app = web_app
+        self._child_agent_factory = child_agent_factory or _resolve_default_child_agent_factory()
+        self._parent_bootstrap: BootstrapConfig | None = None
+        self._parent_tool_context: Any | None = None
         # Shared with CommandService so TaskOutput covers both bash and agent runs.
         self._tasks: dict[str, BackgroundRun] = shared_runs if shared_runs is not None else {}
 
@@ -185,6 +443,7 @@ def __init__(
                 schema=AGENT_SCHEMA,
                 handler=self._handle_agent,
                 source="AgentService",
+                search_hint="launch sub-agent spawn parallel task independent",
             )
         )
         tool_registry.register(
@@ -194,6 +453,9 @@ def __init__(
                 schema=TASK_OUTPUT_SCHEMA,
                 handler=self._handle_task_output,
                 source="AgentService",
+                search_hint="get background task output result poll",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
         tool_registry.register(
@@ -203,9 +465,97 @@ def __init__(
                 schema=TASK_STOP_SCHEMA,
                 handler=self._handle_task_stop,
                 source="AgentService",
+                search_hint="stop cancel background task agent",
+            )
+        )
+        tool_registry.register(
+            ToolEntry(
+                name="SendMessage",
+                mode=ToolMode.INLINE,
+                schema=SEND_MESSAGE_SCHEMA,
+                handler=self._handle_send_message,
+                source="AgentService",
+                search_hint="send message running agent delivery queue",
+            )
+        )
+        tool_registry.register(
+            ToolEntry(
+                name="AskUserQuestion",
+                mode=ToolMode.INLINE,
+                schema=ASK_USER_QUESTION_SCHEMA,
+                handler=self._handle_ask_user_question,
+                source="AgentService",
+                search_hint="ask user question clarification choice preference",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
+    @staticmethod
+    def _normalize_child_sandbox(sandbox_type: str | None) -> str | None:
+        return None if not sandbox_type or sandbox_type == "local" else sandbox_type
+
+    def _ensure_subagent_thread_metadata(
+        self,
+        *,
+        thread_id: str,
+        parent_thread_id: str | None,
+        agent_name: str,
+        model_name: str,
+    ) -> None:
+        if self._thread_repo is None or self._entity_repo is None or self._member_repo is None or not parent_thread_id:
+            return
+        existing_thread = self._thread_repo.get_by_id(thread_id)
+        if existing_thread is not None:
+            if self._entity_repo.get_by_thread_id(thread_id) is None:
+                self._entity_repo.create(
+                    EntityRow(
+                        id=thread_id,
+                        type="agent",
+                        member_id=existing_thread["member_id"],
+                        name=agent_name,
+                        thread_id=thread_id,
+                        created_at=time.time(),
+                    )
+                )
+            return
+
+        parent_thread = self._thread_repo.get_by_id(parent_thread_id)
+        if parent_thread is None:
+            return
+
+        member_id = parent_thread["member_id"]
+        member = self._member_repo.get_by_id(member_id)
+        if member is None:
+            return
+
+        created_at = time.time()
+        branch_index = self._thread_repo.get_next_branch_index(member_id)
+        sandbox_type = parent_thread.get("sandbox_type") or "local"
+        cwd = parent_thread.get("cwd")
+        self._thread_repo.create(
+            thread_id=thread_id,
+            member_id=member_id,
+            sandbox_type=sandbox_type,
+            cwd=cwd,
+            created_at=created_at,
+            model=model_name or parent_thread.get("model"),
+            is_main=False,
+            branch_index=branch_index,
+        )
+
+        if self._entity_repo.get_by_thread_id(thread_id) is None:
+            self._entity_repo.create(
+                EntityRow(
+                    id=thread_id,
+                    type="agent",
+                    member_id=member_id,
+                    name=agent_name,
+                    thread_id=thread_id,
+                    created_at=created_at,
+                )
+            )
+
     async def _handle_agent(
         self,
         prompt: str,
@@ -213,15 +563,22 @@ async def _handle_agent(
         name: str | None = None,
         description: str | None = None,
         run_in_background: bool = False,
+        model: str | None = None,
         max_turns: int | None = None,
-    ) -> str:
+        fork_context: bool = False,
+        tool_context: ToolUseContext | None = None,
+    ) -> Any:
         """Spawn an independent LeonAgent and run it with the given prompt."""
         from sandbox.thread_context import get_current_thread_id
 
         task_id = uuid.uuid4().hex[:8]
         agent_name = name or f"agent-{task_id}"
-        thread_id = f"subagent-{task_id}"
         parent_thread_id = get_current_thread_id()
+        existing_child = None
+        lookup_existing_child = getattr(self._agent_registry, "get_latest_by_name_and_parent", None)
+        if name and parent_thread_id and lookup_existing_child is not None:
+            existing_child = await lookup_existing_child(name, parent_thread_id)
+        thread_id = existing_child.thread_id if existing_child is not None and existing_child.status != "running" else f"subagent-{task_id}"
 
         # Register in AgentRegistry immediately
         entry = AgentEntry(
@@ -233,6 +590,12 @@ async def _handle_agent(
             subagent_type=subagent_type,
         )
         await self._agent_registry.register(entry)
+        self._ensure_subagent_thread_metadata(
+            thread_id=thread_id,
+            parent_thread_id=parent_thread_id,
+            agent_name=agent_name,
+            model_name=model or self._model_name,
+        )
 
         # Create async task (independent LeonAgent runs inside)
         task = asyncio.create_task(
@@ -243,33 +606,57 @@ async def _handle_agent(
                 prompt,
                 subagent_type,
                 max_turns,
+                model=model,
                 description=description or "",
                 run_in_background=run_in_background,
+                fork_context=fork_context,
+                parent_tool_context=tool_context,
             )
         )
         if run_in_background:
             # True fire-and-forget: track in self._tasks for TaskOutput/TaskStop
             running = _RunningTask(task=task, agent_id=task_id, thread_id=thread_id, description=description or "")
             self._tasks[task_id] = running
-            return json.dumps(
-                {
+            return tool_success(
+                json.dumps(
+                    {
+                        "task_id": task_id,
+                        "agent_name": agent_name,
+                        "thread_id": thread_id,
+                        "status": "running",
+                        "message": "Agent started in background. Use TaskOutput to get result.",
+                    },
+                    ensure_ascii=False,
+                ),
+                metadata={
                     "task_id": task_id,
-                    "agent_name": agent_name,
-                    "thread_id": thread_id,
-                    "status": "running",
-                    "message": "Agent started in background. Use TaskOutput to get result.",
+                    "subagent_thread_id": thread_id,
+                    "description": description or agent_name,
                 },
-                ensure_ascii=False,
             )
 
         # Default: parent blocks until sub-agent completes (does not block frontend event loop)
         try:
             result = await task
             await self._agent_registry.update_status(task_id, "completed")
-            return result
+            return tool_success(
+                result,
+                metadata={
+                    "task_id": task_id,
+                    "subagent_thread_id": thread_id,
+                    "description": description or agent_name,
+                },
+            )
         except Exception as e:
             await self._agent_registry.update_status(task_id, "error")
-            return f"<tool_use_error>Agent failed: {e}</tool_use_error>"
+            return tool_error(
+                f"<tool_use_error>Agent failed: {e}</tool_use_error>",
+                metadata={
+                    "task_id": task_id,
+                    "subagent_thread_id": thread_id,
+                    "description": description or agent_name,
+                },
+            )
 
     async def _run_agent(
         self,
@@ -279,8 +666,11 @@ async def _run_agent(
         prompt: str,
         subagent_type: str,
         max_turns: int | None,
+        model: str | None = None,
         description: str = "",
         run_in_background: bool = False,
+        fork_context: bool = False,
+        parent_tool_context: ToolUseContext | None = None,
     ) -> str:
         """Create and run an independent LeonAgent, collect its text output."""
         # Isolate this sub-agent from the parent's LangChain callback chain.
@@ -294,48 +684,164 @@ async def _run_agent(
 
         var_child_runnable_config.set(None)
 
-        # Lazy import avoids circular dependency (agent.py imports AgentService)
-        from core.runtime.agent import create_leon_agent
         from sandbox.thread_context import get_current_thread_id, set_current_thread_id
 
         parent_thread_id = get_current_thread_id()
+        self._ensure_subagent_thread_metadata(
+            thread_id=thread_id,
+            parent_thread_id=parent_thread_id,
+            agent_name=agent_name,
+            model_name=model or self._model_name,
+        )
 
         # emit_fn is set if EventBus is available; used for task lifecycle SSE events
-        emit_fn = None
+        emit_fn: EventEmitter | None = None
         try:
             from backend.web.event_bus import get_event_bus
 
-            event_bus = get_event_bus()
-            emit_fn = event_bus.make_emitter(
-                thread_id=parent_thread_id,
-                agent_id=task_id,
-                agent_name=agent_name,
-            )
+            if parent_thread_id:
+                event_bus = get_event_bus()
+                emit_fn = event_bus.make_emitter(
+                    thread_id=parent_thread_id,
+                    agent_id=task_id,
+                    agent_name=agent_name,
+                )
         except ImportError:
             pass  # backend not available in standalone core usage
 
-        agent = None
+        agent: LeonAgent | None = None
+        progress_task: asyncio.Task | None = None
+        progress_stop: asyncio.Event | None = None
+        child_bootstrap_start_cost = 0.0
+        child_bootstrap_start_tool_duration_ms = 0
         try:
-            agent = create_leon_agent(
-                model_name=self._model_name,
-                workspace_root=self._workspace_root,
-                verbose=False,
-            )
+            # Sub-agent context trimming: each spawn creates a fresh LeonAgent
+            # with its own _build_system_prompt(). No CLAUDE.md content or
+            # gitStatus is injected into the prompt pipeline (core/runtime/prompts
+            # has no such injection). Therefore explore/plan/bash sub-agents
+            # already run lightweight — no extra trimming is needed.
+            #
+            # Try to use context fork from parent agent's BootstrapConfig.
+            # Falls back to create_leon_agent when bootstrap is not available.
+            # Compute tool filtering for this sub-agent type
+            extra_blocked, allowed = _get_tool_filters(subagent_type)
+            agent_name_for_role = _get_subagent_agent_name(subagent_type)
+
+            try:
+                from core.runtime.fork import create_subagent_context
+                from core.runtime.fork import fork_context as fork_bootstrap
+
+                # Parent bootstrap is stored on the ToolUseContext or agent instance.
+                # AgentService stores workspace_root and model_name directly; use those
+                # to check if a richer bootstrap is available via a shared reference.
+                # _parent_bootstrap is injected by LeonAgent when building AgentService.
+                parent_bootstrap = getattr(self, "_parent_bootstrap", None)
+                child_tool_context = None
+                if parent_tool_context is not None:
+                    child_tool_context = create_subagent_context(parent_tool_context)
+                    child_bootstrap = child_tool_context.bootstrap
+                elif parent_bootstrap is not None:
+                    child_bootstrap = fork_bootstrap(parent_bootstrap)
+                    selected_model = _resolve_subagent_model(
+                        self._workspace_root,
+                        subagent_type,
+                        model,
+                        child_bootstrap.model_name,
+                        self._model_name,
+                    )
+                    agent = self._child_agent_factory(
+                        model_name=selected_model,
+                        workspace_root=child_bootstrap.workspace_root,
+                        sandbox=self._normalize_child_sandbox(getattr(child_bootstrap, "sandbox_type", None)),
+                        agent=agent_name_for_role,
+                        web_app=self._web_app,
+                        extra_blocked_tools=extra_blocked,
+                        allowed_tools=allowed,
+                        verbose=False,
+                    )
+                else:
+                    raise AttributeError("no parent bootstrap")
+                child_bootstrap_start_cost = float(getattr(child_bootstrap, "total_cost_usd", 0.0))
+                child_bootstrap_start_tool_duration_ms = int(getattr(child_bootstrap, "total_tool_duration_ms", 0))
+                if parent_tool_context is not None:
+                    # @@@sa-05-subagent-policy-resolution
+                    # Role-specific tool envelopes and model priority order must
+                    # be resolved explicitly here instead of leaking through
+                    # prompt text or whichever defaults happen to win later.
+                    selected_model = _resolve_subagent_model(
+                        self._workspace_root,
+                        subagent_type,
+                        model,
+                        child_bootstrap.model_name,
+                        self._model_name,
+                    )
+                    agent = self._child_agent_factory(
+                        model_name=selected_model,
+                        workspace_root=child_bootstrap.workspace_root,
+                        sandbox=self._normalize_child_sandbox(getattr(child_bootstrap, "sandbox_type", None)),
+                        agent=agent_name_for_role,
+                        web_app=self._web_app,
+                        extra_blocked_tools=extra_blocked,
+                        allowed_tools=allowed,
+                        verbose=False,
+                    )
+                # @@@sa-04-child-bootstrap-wiring
+                # Keep the forked bootstrap/context handoff behind an explicit
+                # LeonAgent API so AgentService stops reaching into QueryLoop
+                # internals directly.
+                assert agent is not None
+                agent.apply_forked_child_context(
+                    child_bootstrap,
+                    tool_context=child_tool_context,
+                )
+            except (AttributeError, ImportError):
+                inherited_model = getattr(parent_tool_context.bootstrap, "model_name", None) if parent_tool_context else None
+                selected_model = _resolve_subagent_model(
+                    self._workspace_root,
+                    subagent_type,
+                    model,
+                    inherited_model or self._model_name,
+                    self._model_name,
+                )
+                agent = self._child_agent_factory(
+                    model_name=selected_model,
+                    workspace_root=self._workspace_root,
+                    sandbox=self._normalize_child_sandbox(
+                        getattr(parent_tool_context.bootstrap, "sandbox_type", None) if parent_tool_context else None
+                    ),
+                    agent=agent_name_for_role,
+                    web_app=self._web_app,
+                    extra_blocked_tools=extra_blocked,
+                    allowed_tools=allowed,
+                    verbose=False,
+                )
             # In async context LeonAgent defers checkpointer init; call ainit() to
             # ensure state is persisted (and loadable via GET /api/threads/{thread_id}).
+            assert agent is not None
             await agent.ainit()
+            # @@@subagent-prompt-path-sanitize - Parent models sometimes satisfy
+            # "use absolute paths" by appending natural-language cwd labels onto the
+            # real workspace path. Normalize the obvious fake suffix before dispatch.
+            child_workspace_root = Path(getattr(agent, "workspace_root", self._workspace_root))
+            prompt = _normalize_child_workspace_prompt(prompt, child_workspace_root)
+
+            if parent_thread_id and parent_thread_id != thread_id:
+                from sandbox.manager import bind_thread_to_existing_thread_lease
+
+                bind_thread_to_existing_thread_lease(thread_id, parent_thread_id)
 
             # Wire child agent events to the parent's EventBus subscription
             # so the parent SSE stream shows sub-agent activity.
             if emit_fn is not None:
-                if hasattr(agent, "runtime") and hasattr(agent.runtime, "bind_thread"):
-                    agent.runtime.bind_thread(activity_sink=emit_fn)
+                runtime = getattr(agent, "runtime", None)
+                if runtime is not None and hasattr(runtime, "bind_thread"):
+                    runtime.bind_thread(activity_sink=emit_fn)
 
             set_current_thread_id(thread_id)
 
             # Notify frontend: task started
             if emit_fn is not None:
-                await emit_fn(
+                emission = emit_fn(
                     {
                         "event": "task_start",
                         "data": json.dumps(
@@ -350,38 +856,95 @@ async def _run_agent(
                         ),
                     }
                 )
+                if asyncio.iscoroutine(emission):
+                    await emission
 
             config = {"configurable": {"thread_id": thread_id}}
             output_parts: list[str] = []
+            latest_progress = description or agent_name
+
+            if run_in_background and self._queue_manager and parent_thread_id and self._background_progress_interval_s > 0:
+                progress_stop = asyncio.Event()
+                progress_task = asyncio.create_task(
+                    self._emit_background_progress(
+                        task_id=task_id,
+                        agent_name=agent_name,
+                        parent_thread_id=parent_thread_id,
+                        latest_progress=lambda: latest_progress,
+                        stop_event=progress_stop,
+                    )
+                )
+
+            # Build initial input — with or without forked parent context
+            if fork_context:
+                from sandbox.thread_context import get_current_messages
+
+                # @@@pt-04-fork-context-source
+                # The Agent tool already has an explicit parent ToolUseContext on
+                # the live ToolRunner path. Forked sub-agents must prefer that
+                # concrete message snapshot over ambient ContextVar state, or the
+                # direct runner path silently drops parent context.
+                parent_msgs = list(parent_tool_context.messages) if parent_tool_context is not None else get_current_messages()
+                fork_marker = (
+                    "\n\n### ENTERING SUB-AGENT ROUTINE ###\n"
+                    "Messages above are from the parent thread (read-only context).\n"
+                    "Only complete the specific task assigned below.\n\n"
+                )
+                initial_messages: list = [
+                    *_filter_fork_messages(parent_msgs),
+                    {"role": "user", "content": fork_marker + prompt},
+                ]
+            else:
+                initial_messages = [{"role": "user", "content": prompt}]
+
+            if self._web_app is not None:
+                from backend.web.services.streaming_service import run_child_thread_live
 
-            async for chunk in agent.agent.astream(
-                {"messages": [{"role": "user", "content": prompt}]},
-                config=config,
-                stream_mode="updates",
-            ):
-                for _, node_update in chunk.items():
-                    if not isinstance(node_update, dict):
-                        continue
-                    msgs = node_update.get("messages", [])
-                    if not isinstance(msgs, list):
-                        msgs = [msgs]
-                    for msg in msgs:
-                        if msg.__class__.__name__ == "AIMessage":
-                            content = getattr(msg, "content", "")
-                            if isinstance(content, str) and content:
-                                output_parts.append(content)
-                            elif isinstance(content, list):
-                                for block in content:
-                                    if isinstance(block, dict) and block.get("type") == "text":
-                                        text = block.get("text", "")
-                                        if text:
-                                            output_parts.append(text)
+                result = await run_child_thread_live(
+                    agent,
+                    thread_id,
+                    prompt,
+                    self._web_app,
+                    input_messages=initial_messages,
+                )
+                if result:
+                    output_parts.append(result)
+                    latest_progress = self._summarize_progress(result, description or agent_name)
+            else:
+                async for chunk in agent.agent.astream(
+                    {"messages": initial_messages},
+                    config=config,
+                    stream_mode="updates",
+                ):
+                    for _, node_update in chunk.items():
+                        if not isinstance(node_update, dict):
+                            continue
+                        msgs = node_update.get("messages", [])
+                        if not isinstance(msgs, list):
+                            msgs = [msgs]
+                        for msg in msgs:
+                            if msg.__class__.__name__ == "AIMessage":
+                                content = getattr(msg, "content", "")
+                                if isinstance(content, str) and content:
+                                    output_parts.append(content)
+                                    latest_progress = self._summarize_progress(content, description or agent_name)
+                                elif isinstance(content, list):
+                                    for block in content:
+                                        if isinstance(block, dict) and block.get("type") == "text":
+                                            text = block.get("text", "")
+                                            if text:
+                                                output_parts.append(text)
+                                                latest_progress = self._summarize_progress(text, description or agent_name)
 
             await self._agent_registry.update_status(task_id, "completed")
             result = "\n".join(output_parts) or "(Agent completed with no text output)"
+            if progress_stop is not None:
+                progress_stop.set()
+            if progress_task is not None:
+                await progress_task
             # Notify frontend: task done
             if emit_fn is not None:
-                await emit_fn(
+                emission = emit_fn(
                     {
                         "event": "task_done",
                         "data": json.dumps(
@@ -393,6 +956,8 @@ async def _run_agent(
                         ),
                     }
                 )
+                if asyncio.iscoroutine(emission):
+                    await emission
             # Queue notification only for background runs — blocking callers already
             # received the result as the tool's return value; sending a notification
             # would trigger a spurious new parent turn.
@@ -402,18 +967,23 @@ async def _run_agent(
                     task_id=task_id,
                     status="completed",
                     summary=label,
+                    result=result,
                     description=label,
                 )
                 self._queue_manager.enqueue(notification, parent_thread_id, notification_type="agent")
             return result
 
         except Exception:
+            if progress_stop is not None:
+                progress_stop.set()
+            if progress_task is not None:
+                await progress_task
             logger.exception("[AgentService] Agent %s failed", agent_name)
             await self._agent_registry.update_status(task_id, "error")
             # Notify frontend: task error
             if emit_fn is not None:
                 try:
-                    await emit_fn(
+                    emission = emit_fn(
                         {
                             "event": "task_error",
                             "data": json.dumps(
@@ -425,6 +995,8 @@ async def _run_agent(
                             ),
                         }
                     )
+                    if asyncio.iscoroutine(emission):
+                        await emission
                 except Exception:
                     pass
             if run_in_background and self._queue_manager and parent_thread_id:
@@ -433,6 +1005,7 @@ async def _run_agent(
                     task_id=task_id,
                     status="error",
                     summary=label,
+                    result="Agent failed",
                     description=label,
                 )
                 self._queue_manager.enqueue(notification, parent_thread_id, notification_type="agent")
@@ -440,37 +1013,252 @@ async def _run_agent(
         finally:
             if agent is not None:
                 try:
-                    agent.close()
+                    self._merge_child_bootstrap_accumulators(
+                        getattr(self, "_parent_bootstrap", None),
+                        getattr(agent, "_bootstrap", None),
+                        child_bootstrap_start_cost=child_bootstrap_start_cost,
+                        child_bootstrap_start_tool_duration_ms=child_bootstrap_start_tool_duration_ms,
+                    )
+                    if hasattr(agent, "_agent_service") and hasattr(agent._agent_service, "cleanup_background_runs"):
+                        await agent._agent_service.cleanup_background_runs()
+                    # @@@web-child-persistence - web child threads are user-visible
+                    # thread surfaces. Closing the LeonAgent here marks runtime
+                    # terminated and drops its live/checkpoint bridge right after
+                    # completion, so the child tab collapses to an empty shell.
+                    if self._web_app is None:
+                        # @@@subagent-sandbox-close-skip - Child agents can share the
+                        # parent's lease; closing the child sandbox here can pause the
+                        # shared lease mid-owner-turn.
+                        agent.close(cleanup_sandbox=False)
                 except Exception:
                     pass
 
-    async def _handle_task_output(self, task_id: str) -> str:
+    @staticmethod
+    def _merge_child_bootstrap_accumulators(
+        parent_bootstrap: Any,
+        child_bootstrap: Any,
+        *,
+        child_bootstrap_start_cost: float,
+        child_bootstrap_start_tool_duration_ms: int,
+    ) -> None:
+        if parent_bootstrap is None or child_bootstrap is None or parent_bootstrap is child_bootstrap:
+            return
+        # @@@sa-03-bootstrap-rollup
+        # Sub-agent loops start from a forked bootstrap snapshot. At join time we
+        # need to preserve both the parent's concurrent growth and the child's
+        # post-fork delta instead of letting one side overwrite the other.
+        child_cost_delta = max(
+            0.0,
+            float(getattr(child_bootstrap, "total_cost_usd", 0.0)) - child_bootstrap_start_cost,
+        )
+        child_tool_duration_delta = max(
+            0,
+            int(getattr(child_bootstrap, "total_tool_duration_ms", 0)) - child_bootstrap_start_tool_duration_ms,
+        )
+        parent_bootstrap.total_cost_usd = float(getattr(parent_bootstrap, "total_cost_usd", 0.0)) + child_cost_delta
+        parent_bootstrap.total_tool_duration_ms = int(getattr(parent_bootstrap, "total_tool_duration_ms", 0)) + child_tool_duration_delta
+
+    @staticmethod
+    def _summarize_progress(text: str, fallback: str) -> str:
+        collapsed = " ".join(text.split()).strip()
+        if not collapsed:
+            return fallback
+        return collapsed[:120]
+
+    async def _emit_background_progress(
+        self,
+        *,
+        task_id: str,
+        agent_name: str,
+        parent_thread_id: str,
+        latest_progress: Any,
+        stop_event: asyncio.Event,
+    ) -> None:
+        # @@@sa-06-progress-loop - keep prompt-facing coordinator updates on the
+        # real thread delivery queue instead of inventing a detached parallel channel.
+        while True:
+            try:
+                await asyncio.wait_for(stop_event.wait(), timeout=self._background_progress_interval_s)
+                return
+            except TimeoutError:
+                pass
+
+            if self._queue_manager is None:
+                return
+
+            notification = format_progress_notification(
+                task_id,
+                latest_progress(),
+                step="running",
+            )
+            self._queue_manager.enqueue(
+                notification,
+                parent_thread_id,
+                notification_type="agent",
+                source="system",
+                sender_name=agent_name,
+            )
+
+    async def _handle_task_output(self, task_id: str, block: bool = True, timeout: int = 30_000) -> str:
         """Get output of a background agent task."""
         running = self._tasks.get(task_id)
         if not running:
             return f"Error: task '{task_id}' not found"
 
+        if not block:
+            if not running.is_done:
+                return json.dumps(
+                    {
+                        "task_id": task_id,
+                        "status": "running",
+                        "message": _background_run_running_message(running),
+                    },
+                    ensure_ascii=False,
+                )
+
+            result = running.get_result()
+            return json.dumps(
+                {
+                    "task_id": task_id,
+                    "status": _background_run_result_status(result),
+                    "result": result,
+                },
+                ensure_ascii=False,
+            )
+
+        if not running.is_done:
+            completed = await _wait_for_background_run(running, min(timeout, 600_000))
+            if not completed and not running.is_done:
+                return json.dumps(
+                    {
+                        "task_id": task_id,
+                        "status": "timeout",
+                        "message": _background_run_running_message(running),
+                    },
+                    ensure_ascii=False,
+                )
+
         if not running.is_done:
             return json.dumps(
                 {
                     "task_id": task_id,
                     "status": "running",
-                    "message": "Agent is still running.",
+                    "message": _background_run_running_message(running),
                 },
                 ensure_ascii=False,
             )
 
         result = running.get_result()
-        status = "error" if (result and result.startswith("<tool_use_error>")) else "completed"
         return json.dumps(
             {
                 "task_id": task_id,
-                "status": status,
+                "status": _background_run_result_status(result),
                 "result": result,
             },
             ensure_ascii=False,
         )
 
+    async def _handle_send_message(
+        self,
+        target_name: str,
+        message: str,
+        sender_name: str | None = None,
+    ) -> str:
+        if self._queue_manager is None:
+            return "<tool_use_error>SendMessage requires queue_manager</tool_use_error>"
+
+        matches = await self._agent_registry.list_running_by_name(target_name)
+        if not matches:
+            return f"<tool_use_error>Running agent '{target_name}' not found</tool_use_error>"
+        if len(matches) > 1:
+            return (
+                f"<tool_use_error>Running agent name '{target_name}' is ambiguous. "
+                "Use a unique name before calling SendMessage.</tool_use_error>"
+            )
+        target = matches[0]
+
+        delivered = format_agent_message(sender_name or "agent", message)
+        self._queue_manager.enqueue(
+            delivered,
+            target.thread_id,
+            notification_type="agent",
+            source="system",
+            sender_name=sender_name or "agent",
+        )
+        return f"Message sent to {target.name}."
+
+    async def _handle_ask_user_question(
+        self,
+        questions: list[dict[str, Any]],
+        annotations: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+        tool_context: ToolUseContext | None = None,
+    ) -> Any:
+        if tool_context is None or tool_context.request_permission is None:
+            return tool_error("<tool_use_error>AskUserQuestion requires an interactive owner resolver</tool_use_error>")
+
+        payload: dict[str, Any] = {"questions": questions}
+        if annotations is not None:
+            payload["annotations"] = annotations
+        if metadata is not None:
+            payload["metadata"] = metadata
+
+        request_result = tool_context.request_permission(
+            "AskUserQuestion",
+            payload,
+            ToolPermissionContext(is_read_only=True, is_destructive=False),
+            None,
+            "Please answer the following questions so Leon can continue.",
+        )
+        request_id = request_result.get("request_id") if isinstance(request_result, dict) else request_result
+        if not isinstance(request_id, str) or not request_id:
+            return tool_error("<tool_use_error>AskUserQuestion could not create a user-facing request</tool_use_error>")
+
+        return tool_permission_request(
+            "User input required to continue.",
+            metadata={
+                "decision": "ask",
+                "request_id": request_id,
+                "request_kind": "ask_user_question",
+            },
+        )
+
+    async def _stop_background_run(self, task_id: str, running: BackgroundRun) -> None:
+        if isinstance(running, _RunningTask):
+            was_running = not running.task.done()
+            if was_running:
+                running.task.cancel()
+                try:
+                    await running.task
+                except asyncio.CancelledError:
+                    pass
+                await self._agent_registry.update_status(running.agent_id, "error")
+            self._tasks.pop(task_id, None)
+            return
+
+        if not running.is_done:
+            process = getattr(running._cmd, "process", None)
+            wait = getattr(process, "wait", None) if process is not None else None
+            terminate = getattr(process, "terminate", None) if process is not None else None
+            kill = getattr(process, "kill", None) if process is not None else None
+
+            if callable(terminate):
+                terminate()
+            if callable(wait):
+                wait_fn = cast(Callable[[], Awaitable[Any]], wait)
+                try:
+                    await asyncio.wait_for(wait_fn(), timeout=1.0)
+                except TimeoutError:
+                    if callable(kill):
+                        kill()
+                    await wait_fn()
+
+        self._tasks.pop(task_id, None)
+
+    async def cleanup_background_runs(self) -> None:
+        for task_id, running in list(self._tasks.items()):
+            await self._stop_background_run(task_id, running)
+
     async def _handle_task_stop(self, task_id: str) -> str:
         """Stop a running background agent task."""
         running = self._tasks.get(task_id)
@@ -480,6 +1268,5 @@ async def _handle_task_stop(self, task_id: str) -> str:
         if running.is_done:
             return f"Task {task_id} already completed"
 
-        running.task.cancel()
-        await self._agent_registry.update_status(running.agent_id, "error")
+        await self._stop_background_run(task_id, running)
         return f"Task {task_id} cancelled"
diff --git a/core/runner.py b/core/runner.py
index 6c3902e3c..fddd6b135 100644
--- a/core/runner.py
+++ b/core/runner.py
@@ -153,7 +153,7 @@ def _print_memory_stats(self, status: dict) -> None:
 
     def _process_chunk(self, chunk: dict, result: dict) -> None:
         """Process streaming chunk, extract tool calls and response"""
-        for node_name, node_update in chunk.items():
+        for _node_name, node_update in chunk.items():
             if not isinstance(node_update, dict):
                 continue
 
diff --git a/core/runtime/abort.py b/core/runtime/abort.py
new file mode 100644
index 000000000..f95ca4e2f
--- /dev/null
+++ b/core/runtime/abort.py
@@ -0,0 +1,48 @@
+"""Minimal abort controller tree for runtime lifecycle wiring."""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+
+
+class AbortController:
+    def __init__(self) -> None:
+        self._aborted = False
+        self._listeners: dict[int, Callable[[], None]] = {}
+        self._next_listener_id = 0
+
+    def abort(self) -> None:
+        if self._aborted:
+            return
+        self._aborted = True
+        listeners = list(self._listeners.values())
+        self._listeners.clear()
+        for listener in listeners:
+            listener()
+
+    def is_aborted(self) -> bool:
+        return self._aborted
+
+    def on_abort(self, listener: Callable[[], None]) -> Callable[[], None]:
+        if self._aborted:
+            listener()
+            return lambda: None
+
+        listener_id = self._next_listener_id
+        self._next_listener_id += 1
+        self._listeners[listener_id] = listener
+
+        def unsubscribe() -> None:
+            self._listeners.pop(listener_id, None)
+
+        return unsubscribe
+
+
+def create_child_abort_controller(parent: AbortController | None) -> AbortController:
+    child = AbortController()
+    if parent is None:
+        return child
+
+    unsubscribe = parent.on_abort(child.abort)
+    child.on_abort(unsubscribe)
+    return child
diff --git a/core/runtime/agent.py b/core/runtime/agent.py
index e4d7299c6..89c8eb172 100644
--- a/core/runtime/agent.py
+++ b/core/runtime/agent.py
@@ -18,18 +18,18 @@
 All paths must be absolute. Full security mechanisms and audit logging.
 """
 
+import asyncio
+import concurrent.futures
+import inspect
+import logging
 import os
-import threading
 from pathlib import Path
 from typing import Any
 
-from langchain.agents import create_agent
 from langchain.chat_models import init_chat_model
 from langchain_core.messages import SystemMessage
 from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
 
-from config.schema import DEFAULT_MODEL
-
 # Load .env file
 _env_file = Path(__file__).parent / ".env"
 if _env_file.exists():
@@ -53,6 +53,11 @@
 
 # Import file operation recorder for time travel
 from core.operations import get_recorder  # noqa: E402
+
+# New architecture: ToolRegistry + ToolRunner + Services
+from core.runtime.cleanup import CleanupRegistry  # noqa: E402
+from core.runtime.loop import QueryLoop  # noqa: E402
+from core.runtime.middleware.mcp_instructions import McpInstructionsDeltaMiddleware  # noqa: E402
 from core.runtime.middleware.memory import MemoryMiddleware  # noqa: E402
 from core.runtime.middleware.monitor import MonitorMiddleware, apply_usage_patches  # noqa: E402
 from core.runtime.middleware.prompt_caching import PromptCachingMiddleware  # noqa: E402
@@ -60,10 +65,9 @@
 
 # Middleware imports (migrated paths)
 from core.runtime.middleware.spill_buffer import SpillBufferMiddleware  # noqa: E402
-
-# New architecture: ToolRegistry + ToolRunner + Services
-from core.runtime.registry import ToolRegistry  # noqa: E402
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema  # noqa: E402
 from core.runtime.runner import ToolRunner  # noqa: E402
+from core.runtime.state import AppState, BootstrapConfig  # noqa: E402
 from core.runtime.validator import ToolValidator  # noqa: E402
 
 # Hooks (used by Services)
@@ -71,7 +75,9 @@
 from core.tools.command.hooks.file_access_logger import FileAccessLoggerHook  # noqa: E402
 from core.tools.command.hooks.file_permission import FilePermissionHook  # noqa: E402
 from core.tools.command.service import CommandService  # noqa: E402
+from core.tools.cron.service import CronToolService  # noqa: E402
 from core.tools.filesystem.service import FileSystemService  # noqa: E402
+from core.tools.mcp_resources.service import McpResourceToolService  # noqa: E402
 from core.tools.search.service import SearchService  # noqa: E402
 from core.tools.skills.service import SkillsService  # noqa: E402
 from core.tools.task.service import TaskService  # noqa: E402
@@ -82,10 +88,41 @@
 from core.tools.web.service import WebService  # noqa: E402
 from storage.container import StorageContainer  # noqa: E402
 
+logger = logging.getLogger(__name__)
+
 # @@@langchain-anthropic-streaming-usage-regression
 apply_usage_patches()
 
 
+def _make_mcp_tool_entry(tool) -> ToolEntry:
+    schema_model = getattr(tool, "tool_call_schema", None)
+    if schema_model is not None and hasattr(schema_model, "model_json_schema"):
+        parameters = schema_model.model_json_schema()
+    else:
+        parameters = {
+            "type": "object",
+            "properties": getattr(tool, "args", {}) or {},
+        }
+
+    async def mcp_handler(**kwargs):
+        if hasattr(tool, "ainvoke"):
+            return await tool.ainvoke(kwargs)
+        return await asyncio.to_thread(tool.invoke, kwargs)
+
+    return ToolEntry(
+        name=tool.name,
+        mode=ToolMode.INLINE,
+        schema=make_tool_schema(
+            name=tool.name,
+            description=getattr(tool, "description", "") or tool.name,
+            properties={},
+            parameter_overrides=parameters,
+        ),
+        handler=mcp_handler,
+        source="mcp",
+    )
+
+
 class LeonAgent:
     """
     Leon Agent - AI Coding Assistant
@@ -108,6 +145,7 @@ def __init__(
         workspace_root: str | Path | None = None,
         *,
         agent: str | None = None,
+        bundle_dir: str | Path | None = None,
         allowed_file_extensions: list[str] | None = None,
         block_dangerous_commands: bool | None = None,
         block_network_commands: bool | None = None,
@@ -119,9 +157,16 @@ def __init__(
         jina_api_key: str | None = None,
         sandbox: Any = None,
         storage_container: StorageContainer | None = None,
+        thread_repo: Any = None,
+        entity_repo: Any = None,
+        member_repo: Any = None,
         queue_manager: MessageQueueManager | None = None,
         chat_repos: dict | None = None,
+        web_app: Any = None,
         extra_allowed_paths: list[str] | None = None,
+        extra_blocked_tools: set[str] | None = None,
+        allowed_tools: set[str] | None = None,
+        permission_resolver_scope: str = "none",
         verbose: bool = False,
     ):
         """
@@ -138,7 +183,11 @@ def __init__(
             enable_audit_log: Whether to enable audit logging
             enable_web_tools: Whether to enable web search and content fetching tools
             sandbox: Sandbox instance, name string, or None for local
+            thread_repo: Optional thread metadata repo for backend-integrated subagent registration
+            entity_repo: Optional entity repo for backend-integrated subagent registration
+            member_repo: Optional member repo for backend-integrated subagent registration
             queue_manager: Shared MessageQueueManager instance (created if not provided)
+            permission_resolver_scope: Permission request surface for this agent ("none" or "thread")
             verbose: Whether to output detailed logs (default False)
         """
         self.agent_id: str | None = None
@@ -146,11 +195,23 @@ def __init__(
         self.extra_allowed_paths = extra_allowed_paths
         self.queue_manager = queue_manager or MessageQueueManager()
         self._chat_repos: dict | None = chat_repos
+        self._thread_repo = thread_repo
+        self._entity_repo = entity_repo
+        self._member_repo = member_repo
+        self._web_app = web_app
+        self._session_started = False
+        self._session_ended = False
+        self._closing = False
+        self._closed = False
+        requested_sandbox_name = sandbox if isinstance(sandbox, str) else getattr(sandbox, "name", None)
+        self._explicit_model_name = model_name is not None
 
         # New config system mode
         self.config, self.models_config = self._load_config(
             agent_name=agent,
+            bundle_dir=bundle_dir,
             workspace_root=workspace_root,
+            sandbox_name=requested_sandbox_name,
             model_name=model_name,
             api_key=api_key,
             allowed_file_extensions=allowed_file_extensions,
@@ -167,8 +228,9 @@ def __init__(
             from config.schema import DEFAULT_MODEL  # noqa: E402
 
             active_model = DEFAULT_MODEL
-        # Member model override: agent.md's model field takes precedence over global config
-        if hasattr(self, "_agent_override") and self._agent_override and self._agent_override.model:
+        # Agent frontmatter model applies only when the caller did not explicitly
+        # request a model at construction time.
+        if not self._explicit_model_name and hasattr(self, "_agent_override") and self._agent_override and self._agent_override.model:
             active_model = self._agent_override.model
         resolved_model, model_overrides = self.models_config.resolve_model(active_model)
         self.model_name = resolved_model
@@ -177,6 +239,7 @@ def __init__(
         # Resolve API key (prefer resolved provider from mapping)
         provider_name = self._resolve_provider_name(resolved_model, model_overrides)
         p = self.models_config.get_provider(provider_name) if provider_name else None
+        self._explicit_api_key = api_key is not None
         self.api_key = api_key or (p.api_key if p else None) or self.models_config.get_api_key()
 
         if not self.api_key:
@@ -213,6 +276,7 @@ def __init__(
         }
 
         # Initialize checkpointer and MCP tools
+        self.checkpointer = None
         self._aiosqlite_conn, mcp_tools = self._init_async_components()
 
         # If in async context (running loop detected), _init_async_components
@@ -225,51 +289,61 @@ def __init__(
             self.checkpointer = None
 
         # Initialize ToolRegistry and Services (new architecture)
-        self._tool_registry = ToolRegistry(blocked_tools=self._get_member_blocked_tools())
+        blocked = self._get_member_blocked_tools()
+        if extra_blocked_tools:
+            blocked = blocked | extra_blocked_tools
+        self._tool_registry = ToolRegistry(
+            blocked_tools=blocked,
+            allowed_tools=allowed_tools,
+        )
         self._init_services()
+        self._register_mcp_tools(mcp_tools)
 
         # Build middleware stack
         middleware = self._build_middleware_stack()
 
-        # Ensure ToolNode is created (middleware tools need at least one BaseTool)
+        # Ensure the bound model still sees at least one BaseTool-compatible entry.
         if not mcp_tools and not self._has_middleware_tools(middleware):
             mcp_tools = [self._create_placeholder_tool()]
 
-        # Build system prompt
-        self.system_prompt = self._build_system_prompt()
-        custom_prompt = self.config.system_prompt
-        if custom_prompt:
-            self.system_prompt += f"\n\n**Custom Instructions:**\n{custom_prompt}"
-
-        # @@@entity-identity — inject chat identity so agent knows who it is in the social layer
-        if self._chat_repos:
-            repos = self._chat_repos
-            uid = repos.get("user_id")
-            owner_uid = repos.get("owner_user_id", "")
-            if uid:
-                entity_repo = repos.get("entity_repo")
-                entity = entity_repo.get_by_id(uid) if entity_repo else None
-                member_repo = repos.get("member_repo")
-                owner_row = member_repo.get_by_id(owner_uid) if member_repo and owner_uid else None
-                name = entity.name if entity else uid
-                owner_name = owner_row.name if owner_row else "unknown"
-                self.system_prompt += (
-                    f"\n\n**Chat Identity:**\n"
-                    f"- Your name: {name}\n"
-                    f"- Your user_id: {uid}\n"
-                    f"- Your owner: {owner_name} (user_id: {owner_uid})\n"
-                    f"- When you receive a chat notification, READ the message with chat_read(), "
-                    f"then REPLY with chat_send(). Your text output goes to your owner's thread, "
-                    f"not to the chat — only chat_send() delivers to the other party.\n"
-                )
+        self._system_prompt_section_cache: dict[str, str] = {}
+        self.system_prompt = self._compose_system_prompt()
 
-        # Create agent
-        self.agent = create_agent(
+        # Build BootstrapConfig for sub-agent forking
+        self._bootstrap = BootstrapConfig(
+            workspace_root=self.workspace_root,
+            original_cwd=Path.cwd(),
+            project_root=self.workspace_root,
+            cwd=self.workspace_root,
+            model_name=self.model_name,
+            api_key=self.api_key,
+            sandbox_type=self._sandbox.name,
+            permission_resolver_scope=permission_resolver_scope,
+            block_dangerous_commands=self.block_dangerous_commands,
+            block_network_commands=self.block_network_commands,
+            enable_audit_log=self.enable_audit_log,
+            enable_web_tools=self.enable_web_tools,
+            allowed_file_extensions=self.allowed_file_extensions,
+            extra_allowed_paths=self.extra_allowed_paths,
+            model_provider=self._current_model_config.get("model_provider"),
+            base_url=self._current_model_config.get("base_url"),
+        )
+        self._app_state = AppState()
+        self.app_state = self._app_state
+        # Inject bootstrap into AgentService so sub-agents can fork from it
+        if hasattr(self, "_agent_service"):
+            self._agent_service._parent_bootstrap = self._bootstrap
+
+        # Create agent via QueryLoop (replaces LangGraph create_agent)
+        self.agent = QueryLoop(
             model=self.model,
-            tools=mcp_tools,
             system_prompt=SystemMessage(content=[{"type": "text", "text": self.system_prompt}]),
             middleware=middleware,
-            checkpointer=self.checkpointer if not self._needs_async_init else None,
+            checkpointer=self.checkpointer,
+            registry=self._tool_registry,
+            app_state=self._app_state,
+            runtime=self._monitor_middleware.runtime,
+            bootstrap=self._bootstrap,
         )
 
         # Get runtime from MonitorMiddleware
@@ -286,13 +360,39 @@ def __init__(
             print("[LeonAgent] Initialized successfully")
             print(f"[LeonAgent] Workspace: {self.workspace_root}")
             print(f"[LeonAgent] Audit log: {self.enable_audit_log}")
-            if self._needs_async_init:
+            if self.checkpointer is None:
                 print("[LeonAgent] Note: Async components need initialization via ainit()")
 
-        # Mark agent as ready (if not needing async init)
-        if not self._needs_async_init:
+        # Wire CleanupRegistry for priority-ordered resource teardown
+        self._cleanup_registry = CleanupRegistry()
+        self._cleanup_registry.register(self._cleanup_sandbox, priority=2)
+        self._cleanup_registry.register(self._mark_terminated, priority=3)
+        self._cleanup_registry.register(self._cleanup_mcp_client, priority=4)
+        self._cleanup_registry.register(self._cleanup_sqlite_connection, priority=5)
+
+        # Mark agent as ready (checkpointer is None when async init still pending)
+        if self.checkpointer is not None:
             self._monitor_middleware.mark_ready()
 
+    def apply_forked_child_context(
+        self,
+        bootstrap: BootstrapConfig,
+        *,
+        tool_context: Any | None = None,
+    ) -> None:
+        # @@@subagent-fork-wiring
+        # AgentService should not reach through LeonAgent and mutate QueryLoop
+        # internals directly. Keep the child bootstrap + abort-controller wiring
+        # behind one explicit LeonAgent seam.
+        self._bootstrap = bootstrap
+        self.agent._bootstrap = bootstrap
+        if hasattr(self, "_agent_service"):
+            self._agent_service._parent_bootstrap = bootstrap
+            if tool_context is not None:
+                self._agent_service._parent_tool_context = tool_context
+        if tool_context is not None:
+            self.agent._tool_abort_controller = tool_context.abort_controller
+
     async def ainit(self):
         """Complete async initialization (call this if initialized in async context).
 
@@ -300,22 +400,23 @@ async def ainit(self):
             agent = LeonAgent(sandbox=sandbox)
             await agent.ainit()
         """
-        if not self._needs_async_init:
-            return  # Already initialized
+        if self.checkpointer is None:
+            # Initialize async components
+            self._aiosqlite_conn = await self._init_checkpointer()
+            _mcp_tools = await self._init_mcp_tools()
+            self._register_mcp_tools(_mcp_tools)
 
-        # Initialize async components
-        self._aiosqlite_conn = await self._init_checkpointer()
-        _mcp_tools = await self._init_mcp_tools()
+            # Update agent with checkpointer
+            self.agent.checkpointer = self.checkpointer
 
-        # Update agent with checkpointer
-        self.agent.checkpointer = self.checkpointer
+            self._monitor_middleware.mark_ready()
 
-        # Mark as initialized
-        self._needs_async_init = False
-        self._monitor_middleware.mark_ready()
+            if self.verbose:
+                print("[LeonAgent] Async initialization completed")
 
-        if self.verbose:
-            print("[LeonAgent] Async initialization completed")
+        if not self._session_started:
+            await self._run_session_hooks("SessionStart")
+            self._session_started = True
 
     def _init_async_components(self) -> tuple[Any, list]:
         """Initialize async components (checkpointer and MCP tools).
@@ -350,13 +451,22 @@ def _has_middleware_tools(self, middleware: list) -> bool:
         """Check if any middleware has BaseTool instances."""
         return any(getattr(m, "tools", None) for m in middleware)
 
+    def _register_mcp_tools(self, mcp_tools: list) -> None:
+        if not mcp_tools:
+            return
+        for tool in mcp_tools:
+            try:
+                self._tool_registry.register(_make_mcp_tool_entry(tool))
+            except Exception as exc:
+                logger.warning("[LeonAgent] Failed to register MCP tool %s: %s", getattr(tool, "name", "<unknown>"), exc)
+
     def _create_placeholder_tool(self):
-        """Create placeholder tool to ensure ToolNode is created."""
+        """Create placeholder tool so the bound model still has a BaseTool."""
         from langchain_core.tools import tool
 
         @tool
         def _placeholder() -> str:
-            """Internal placeholder - ensures ToolNode is created for middleware tools."""
+            """Internal placeholder for the empty-tool edge."""
             return ""
 
         return _placeholder
@@ -391,10 +501,26 @@ def _get_member_blocked_tools(self) -> set[str]:
 
         return blocked
 
+    def _get_mcp_server_configs(self) -> dict[str, Any]:
+        if hasattr(self, "_agent_bundle") and self._agent_bundle and self._agent_bundle.mcp:
+            return {name: srv for name, srv in self._agent_bundle.mcp.items() if not srv.disabled}
+        return self.config.mcp.servers
+
+    def _get_mcp_instruction_blocks(self) -> dict[str, str]:
+        blocks: dict[str, str] = {}
+        for name, cfg in self._get_mcp_server_configs().items():
+            instructions = getattr(cfg, "instructions", None)
+            if not isinstance(instructions, str) or not instructions.strip():
+                continue
+            blocks[name] = instructions.strip()
+        return blocks
+
     def _load_config(
         self,
         agent_name: str | None,
+        bundle_dir: str | Path | None,
         workspace_root: str | Path | None,
+        sandbox_name: str | None,
         model_name: str | None,
         api_key: str | None,
         allowed_file_extensions: list[str] | None,
@@ -410,8 +536,14 @@ def _load_config(
         """
         # Build CLI overrides for runtime config
         cli_overrides: dict = {}
-
-        if workspace_root is not None:
+        use_workspace_override = sandbox_name in (None, "", "local")
+
+        if workspace_root is not None and use_workspace_override:
+            # @@@remote-sandbox-config-root
+            # Remote child agents may inherit a sandbox cwd like /home/daytona,
+            # which is valid inside the sandbox but not on the host. Feeding that
+            # path into LeonSettings makes config validation fail before sandbox
+            # init ever runs, so only local sandboxes pin workspace_root here.
             cli_overrides["workspace_root"] = str(workspace_root)
 
         # Runtime overrides go into "runtime" section
@@ -441,8 +573,14 @@ def _load_config(
         models_loader = ModelsLoader(workspace_root=workspace_root)
         models_config = models_loader.load(cli_overrides=models_cli if models_cli else None)
 
+        # @@@bundle-dir-wins - member-backed top-level agents need their own bundle even when
+        # no explicit agent type name is passed through the thread runtime wiring.
+        if bundle_dir is not None:
+            bundle_path = Path(bundle_dir).expanduser().resolve()
+            self._agent_bundle = loader.load_bundle(bundle_path)
+            self._agent_override = self._agent_bundle.agent.model_copy(update={"source_dir": bundle_path})
         # If agent specified, load agent definition to override system_prompt and tools
-        if agent_name:
+        elif agent_name:
             all_agents = loader.load_all_agents()
             agent_def = all_agents.get(agent_name)
             if not agent_def:
@@ -609,7 +747,16 @@ def _build_model_kwargs(self) -> dict:
 
         # Get credentials from the resolved provider
         p = self.models_config.get_provider(provider) if provider else None
-        base_url = (p.base_url if p else None) or self.models_config.get_base_url()
+        env_base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL")
+
+        # @@@explicit-api-key-base-url
+        # Real-model verification must not be silently redirected to a provider
+        # config endpoint when the caller explicitly injected credentials for a
+        # different OpenAI-compatible endpoint.
+        if self._explicit_api_key and env_base_url:
+            base_url = env_base_url
+        else:
+            base_url = (p.base_url if p else None) or self.models_config.get_base_url()
         if base_url:
             kwargs["base_url"] = self._normalize_base_url(base_url, provider)
 
@@ -714,12 +861,71 @@ def update_observation(self, **overrides) -> None:
         if self.verbose:
             print(f"[LeonAgent] Observation updated: active={self._observation_config.active}")
 
-    def close(self):
-        """Clean up resources."""
-        self._cleanup_sandbox()
-        self._mark_terminated()
-        self._cleanup_mcp_client()
-        self._cleanup_sqlite_connection()
+    def close(self, *, cleanup_sandbox: bool = True):
+        """Clean up resources via CleanupRegistry (priority-ordered).
+
+        Falls back to direct cleanup if CleanupRegistry is not initialized.
+        """
+        # @@@close-idempotent - child agents may explicitly skip sandbox cleanup
+        # and later still hit __del__ on GC; never let a second close silently
+        # re-enable default sandbox teardown on a shared lease.
+        if getattr(self, "_closed", False) or getattr(self, "_closing", False):
+            return
+
+        self._closing = True
+        session_end_error: Exception | None = None
+        try:
+            if getattr(self, "_session_started", False) and not getattr(self, "_session_ended", False):
+                try:
+                    self._run_async_cleanup(lambda: self._run_session_hooks("SessionEnd"), "SessionEnd hooks")
+                except Exception as exc:
+                    session_end_error = exc
+                finally:
+                    self._session_ended = True
+
+            if hasattr(self, "_cleanup_registry") and cleanup_sandbox:
+                self._run_async_cleanup(self._cleanup_registry.run_cleanup, "CleanupRegistry")
+            else:
+                # Fallback for edge cases where __init__ did not complete fully
+                cleanup_steps = [
+                    ("monitor", self._mark_terminated),
+                    ("MCP client", self._cleanup_mcp_client),
+                    ("SQLite connection", self._cleanup_sqlite_connection),
+                ]
+                if cleanup_sandbox:
+                    cleanup_steps.insert(0, ("sandbox", self._cleanup_sandbox))
+
+                for step_name, step_fn in cleanup_steps:
+                    try:
+                        step_fn()
+                    except Exception as e:
+                        print(f"[LeonAgent] {step_name} cleanup error: {e}")
+
+            if session_end_error is not None:
+                raise session_end_error
+        finally:
+            self._closed = True
+            self._closing = False
+
+    def _build_session_hook_payload(self, event: str) -> dict[str, Any]:
+        return {
+            "event": event,
+            "session_id": self._bootstrap.session_id,
+            "workspace_root": str(self.workspace_root),
+            "cwd": str(self._bootstrap.cwd or self.workspace_root),
+            "sandbox": self._sandbox.name,
+        }
+
+    async def _run_session_hooks(self, event: str) -> None:
+        hooks = self._app_state.get_session_hooks(event)
+        if not hooks:
+            return
+
+        payload = self._build_session_hook_payload(event)
+        for hook in hooks:
+            result = hook(payload)
+            if inspect.isawaitable(result):
+                await result
 
     def _cleanup_sandbox(self) -> None:
         """Clean up sandbox resources."""
@@ -734,32 +940,29 @@ def _mark_terminated(self) -> None:
         if hasattr(self, "_monitor_middleware"):
             self._monitor_middleware.mark_terminated()
 
+    _CLEANUP_TIMEOUT: float = 10.0  # seconds; prevents hanging on stuck I/O
+
     @staticmethod
     def _run_async_cleanup(coro_factory, label: str) -> None:
         import asyncio
 
         try:
-            running_loop = asyncio.get_running_loop()
+            asyncio.get_running_loop()
         except RuntimeError:
-            running_loop = None
-
-        if running_loop is None:
             asyncio.run(coro_factory())
             return
 
-        error: list[Exception] = []
-
-        def _runner() -> None:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(asyncio.run, coro_factory())
             try:
-                asyncio.run(coro_factory())
+                future.result(timeout=LeonAgent._CLEANUP_TIMEOUT)
+            except concurrent.futures.TimeoutError:
+                raise RuntimeError(
+                    f"{label} cleanup timed out after {LeonAgent._CLEANUP_TIMEOUT}s — "
+                    f"possible stuck I/O; resource abandoned to prevent hang"
+                )
             except Exception as exc:
-                error.append(exc)
-
-        thread = threading.Thread(target=_runner, daemon=True)
-        thread.start()
-        thread.join()
-        if error:
-            raise RuntimeError(f"{label} cleanup failed: {error[0]}") from error[0]
+                raise RuntimeError(f"{label} cleanup failed: {exc}") from exc
 
     def _cleanup_mcp_client(self) -> None:
         """Clean up MCP client."""
@@ -767,35 +970,23 @@ def _cleanup_mcp_client(self) -> None:
             return
 
         try:
-            self._run_async_cleanup(lambda: self._mcp_client.close(), "MCP client")
+            close_fn = getattr(self._mcp_client, "close", None)
+            if callable(close_fn):
+                self._run_async_cleanup(close_fn, "MCP client")
         except Exception as e:
             print(f"[LeonAgent] MCP cleanup error: {e}")
         self._mcp_client = None
 
     def _cleanup_sqlite_connection(self) -> None:
-        """Clean up SQLite connection.
-
-        Properly closes aiosqlite connection using asyncio.run() to avoid
-        hanging on process exit.
-        """
+        """Clean up SQLite connection."""
         if not hasattr(self, "_aiosqlite_conn") or not self._aiosqlite_conn:
             return
-
+        conn = self._aiosqlite_conn
+        self._aiosqlite_conn = None
         try:
-            import asyncio
-
-            # Close the connection asynchronously
-            async def _close():
-                if self._aiosqlite_conn:
-                    await self._aiosqlite_conn.close()
-
-            # Use asyncio.run() to properly close the connection
-            asyncio.run(_close())
+            self._run_async_cleanup(conn.close, "SQLite connection")
         except Exception:
-            # Ignore errors during cleanup
             pass
-        finally:
-            self._aiosqlite_conn = None
 
     def __del__(self):
         self.close()
@@ -830,11 +1021,19 @@ def _build_middleware_stack(self) -> list:
         if memory_enabled:
             self._add_memory_middleware(middleware)
 
-        # 4. Steering — injects queued messages before model call
+        # 4. MCP instructions delta — thread-scoped reminder when MCP guidance changes
+        middleware.append(
+            McpInstructionsDeltaMiddleware(
+                get_instruction_blocks=self._get_mcp_instruction_blocks,
+                get_app_state=lambda: self.app_state,
+            )
+        )
+
+        # 5. Steering — injects queued messages before model call
         self._steering_middleware = SteeringMiddleware(queue_manager=self.queue_manager)
         middleware.append(self._steering_middleware)
 
-        # 5. ToolRunner (innermost — routes all ToolRegistry-registered tool calls)
+        # 6. ToolRunner (innermost — routes all ToolRegistry-registered tool calls)
         self._tool_runner = ToolRunner(
             registry=self._tool_registry,
             validator=ToolValidator(),
@@ -843,7 +1042,7 @@ def _build_middleware_stack(self) -> list:
 
         # 0. SpillBuffer (outermost — catches oversized tool outputs)
         # Must be inserted at index 0 AFTER building the list:
-        # LangChain wraps middlewares as "first = outermost".
+        # QueryLoop composes middleware so the first entry remains outermost.
         if self.config.tools.spill_buffer.enabled:
             spill_cfg = self.config.tools.spill_buffer
             middleware.insert(
@@ -993,6 +1192,17 @@ def _init_services(self) -> None:
             workspace_root=self.workspace_root,
         )
 
+        # Cron tools (DEFERRED - backed by existing panel cron_jobs substrate)
+        self._cron_tool_service = CronToolService(
+            registry=self._tool_registry,
+        )
+
+        self._mcp_resource_tool_service = McpResourceToolService(
+            registry=self._tool_registry,
+            client_fn=lambda: getattr(self, "_mcp_client", None),
+            server_configs_fn=self._get_mcp_server_configs,
+        )
+
         # ToolSearch (INLINE - always available for discovering DEFERRED tools)
         self._tool_search_service = ToolSearchService(
             registry=self._tool_registry,
@@ -1005,8 +1215,13 @@ def _init_services(self) -> None:
             agent_registry=self._agent_registry,
             workspace_root=self.workspace_root,
             model_name=self.model_name,
+            thread_repo=self._thread_repo,
+            entity_repo=self._entity_repo,
+            member_repo=self._member_repo,
             queue_manager=self.queue_manager,
             shared_runs=self._background_runs,
+            web_app=self._web_app,
+            child_agent_factory=create_leon_agent,
         )
 
         # Team coordination (TeamCreate/TeamDelete — deferred mode)
@@ -1046,28 +1261,17 @@ def _init_services(self) -> None:
                     runtime_fn=lambda: getattr(self, "runtime", None),
                 )
 
-        # @@@wechat-tools — register WeChat tools via lazy connection lookup
-        owner_uid = self._chat_repos.get("owner_user_id", "") if self._chat_repos else ""
-        if owner_uid:
-            try:
-                from core.tools.wechat.service import WeChatToolService
-
-                def _get_wechat_conn(uid=owner_uid):
-                    """Lazy lookup — returns None if registry not on app.state yet."""
-                    try:
-                        from backend.web.main import app
-
-                        registry = getattr(app.state, "wechat_registry", None)
-                        return registry.get(uid) if registry else None
-                    except Exception:
-                        return None
+        # LSP tools — DEFERRED, always registered, multilspy checked at call time
+        self._lsp_service = None
+        try:
+            from core.tools.lsp.service import LSPService
 
-                self._wechat_tool_service = WeChatToolService(
-                    registry=self._tool_registry,
-                    connection_fn=_get_wechat_conn,
-                )
-            except ImportError:
-                self._wechat_tool_service = None
+            self._lsp_service = LSPService(
+                registry=self._tool_registry,
+                workspace_root=self.workspace_root,
+            )
+        except Exception as e:
+            logger.debug("[LeonAgent] LSPService init skipped: %s", e)
 
         if self.verbose:
             all_tools = self._tool_registry.list_all()
@@ -1078,11 +1282,7 @@ def _get_wechat_conn(uid=owner_uid):
     async def _init_mcp_tools(self) -> list:
         mcp_enabled = self.config.mcp.enabled
 
-        # Use member bundle MCP config if available, else fall back to global config
-        if hasattr(self, "_agent_bundle") and self._agent_bundle and self._agent_bundle.mcp:
-            mcp_servers = {name: srv for name, srv in self._agent_bundle.mcp.items() if not srv.disabled}
-        else:
-            mcp_servers = self.config.mcp.servers
+        mcp_servers = self._get_mcp_server_configs()
 
         if not mcp_enabled or not mcp_servers:
             return []
@@ -1091,10 +1291,21 @@ async def _init_mcp_tools(self) -> list:
 
         configs = {}
         for name, cfg in mcp_servers.items():
+            transport = getattr(cfg, "transport", None)
             if cfg.url:
-                config = {"transport": "streamable_http", "url": cfg.url}
+                # @@@mcp-transport-honesty - api-04 requires explicit transport
+                # config to survive loader -> runtime. URL-based MCP is not
+                # always streamable_http; websocket/sse must stay explicit.
+                config = {
+                    "transport": transport or "streamable_http",
+                    "url": cfg.url,
+                }
             else:
-                config = {"transport": "stdio", "command": cfg.command, "args": cfg.args}
+                config = {
+                    "transport": transport or "stdio",
+                    "command": cfg.command,
+                    "args": cfg.args,
+                }
             if cfg.env:
                 config["env"] = cfg.env
             configs[name] = config
@@ -1190,155 +1401,105 @@ def _build_system_prompt(self) -> str:
 
         return prompt
 
-    def _build_context_section(self) -> str:
-        """Build the context section based on sandbox mode."""
-        if self._sandbox.name != "local":
-            env_label = self._sandbox.env_label
-            working_dir = self._sandbox.working_dir
-            if self._sandbox.name == "docker":
-                mode_label = "Sandbox (isolated local container)"
-            else:
-                mode_label = "Sandbox (isolated cloud environment)"
-            return f"""- Environment: {env_label}
-- Working Directory: {working_dir}
-- Mode: {mode_label}"""
-        else:
-            import platform
-
-            os_name = platform.system()
-            if os_name == "Windows":
-                shell_name = "powershell"
-            else:
-                shell_name = os.environ.get("SHELL", "/bin/bash").split("/")[-1]
-            return f"""- Workspace: `{self.workspace_root}`
-- OS: {os_name}
-- Shell: {shell_name}
-- Mode: Local"""
+    def _compose_system_prompt(self) -> str:
+        prompt = self._build_system_prompt()
 
-    def _build_rules_section(self) -> str:
-        """Build shared rules section for all modes."""
-        is_sandbox = self._sandbox.name != "local"
-        working_dir = self._sandbox.working_dir if is_sandbox else self.workspace_root
+        custom_prompt = self.config.system_prompt
+        if custom_prompt:
+            prompt += f"\n\n**Custom Instructions:**\n{custom_prompt}"
 
-        rules = []
+        # @@@entity-identity — inject chat identity so agent knows who it is in the social layer
+        if self._chat_repos:
+            repos = self._chat_repos
+            uid = repos.get("user_id")
+            owner_uid = repos.get("owner_user_id", "")
+            if uid:
+                entity_repo = repos.get("entity_repo")
+                member_repo = repos.get("member_repo")
+                entity = entity_repo.get_by_id(uid) if entity_repo else None
+                self_member = member_repo.get_by_id(uid) if member_repo and not entity else None
+                owner_row = member_repo.get_by_id(owner_uid) if member_repo and owner_uid else None
+                name = entity.name if entity else (self_member.name if self_member else uid)
+                owner_name = owner_row.name if owner_row else "unknown"
+                prompt += (
+                    f"\n\n**Chat Identity:**\n"
+                    f"- Your name: {name}\n"
+                    f"- Your user_id: {uid}\n"
+                    f"- Your owner: {owner_name} (user_id: {owner_uid})\n"
+                    f"- When you receive a chat notification, you MUST read it with read_messages() before deciding what to do.\n"
+                    f"- If that notification already gives you a chat_id, prefer using that exact chat_id directly.\n"
+                    f"- If you reply to the other party, you MUST call send_message(). Never claim you replied unless send_message() succeeded.\n"
+                    f"- Your normal text output goes to your owner's thread, not to the chat — only send_message() delivers to the other party.\n"
+                )
+        return prompt
 
-        # Rule 1: Environment-specific
-        if is_sandbox:
-            if self._sandbox.name == "docker":
-                location_rule = "All file and command operations run in a local Docker container, NOT on the user's host filesystem."
-            else:
-                location_rule = "All file and command operations run in a remote sandbox, NOT on the user's local machine."
-            rules.append(f"1. **Sandbox Environment**: {location_rule} The sandbox is an isolated Linux environment.")
-        else:
-            rules.append("1. **Workspace**: File operations are restricted to: " + str(self.workspace_root))
+    def _invalidate_system_prompt_cache(self) -> None:
+        self._system_prompt_section_cache.clear()
 
-        # Rule 2: Absolute paths
-        rules.append(f"""2. **Absolute Paths**: All file paths must be absolute paths.
-   - ✅ Correct: `{working_dir}/project/test.py`
-   - ❌ Wrong: `test.py` or `./test.py`""")
+    def _get_cached_prompt_section(self, key: str, builder) -> str:
+        cached = self._system_prompt_section_cache.get(key)
+        if cached is not None:
+            return cached
+        value = builder()
+        self._system_prompt_section_cache[key] = value
+        return value
 
-        # Rule 3: Security
-        if is_sandbox:
-            rules.append("3. **Security**: The sandbox is isolated. You can install packages, run any commands, and modify files freely.")
-        else:
-            rules.append("3. **Security**: Dangerous commands are blocked. All operations are logged.")
+    def _build_context_section(self) -> str:
+        from core.runtime.prompts import build_context_section
+
+        def _build() -> str:
+            is_sandbox = self._sandbox.name != "local"
+            if is_sandbox:
+                return build_context_section(
+                    sandbox_name=self._sandbox.name,
+                    sandbox_env_label=self._sandbox.env_label,
+                    sandbox_working_dir=self._sandbox.working_dir,
+                )
+            import platform
 
-        # Rule 4: Tool priority
-        rules.append(
-            """4. **Tool Priority**: When a built-in tool and an MCP tool (`mcp__*`) have the same functionality, use the built-in tool."""
-        )
+            os_name = platform.system()
+            shell_name = "powershell" if os_name == "Windows" else os.environ.get("SHELL", "/bin/bash").split("/")[-1]
+            return build_context_section(
+                sandbox_name="local",
+                workspace_root=str(self.workspace_root),
+                os_name=os_name,
+                shell_name=shell_name,
+            )
 
-        # Rule 5: Dedicated tools over shell
-        rules.append("""5. **Use Dedicated Tools Instead of Shell Commands**: Do NOT use `Bash` for tasks that have dedicated tools:
-   - File search → use `Grep` (NOT `rg`, `grep`, or `find` via Bash)
-   - File listing → use `Glob` (NOT `find` or `ls` via Bash)
-   - File reading → use `Read` (NOT `cat`, `head`, `tail` via Bash)
-   - File editing → use `Edit` (NOT `sed` or `awk` via Bash)
-   - Reserve `Bash` for: git, package managers, build tools, tests, and other system operations.""")
+        return self._get_cached_prompt_section("context", _build)
 
-        # Rule 6: Background task description
-        rules.append("""6. **Background Task Description**: When using `Bash` or `Agent` with `run_in_background: true`, always include a clear `description` parameter.  # noqa: E501
-   - The description is shown to the user in the background task indicator.
-   - Keep it concise (5–10 words), action-oriented, e.g. "Run test suite", "Analyze API codebase".
-   - Without a description, the raw command or agent name is shown, which is hard to read.""")
+    def _build_rules_section(self) -> str:
+        from core.runtime.prompts import build_rules_section
+
+        def _build() -> str:
+            is_sandbox = self._sandbox.name != "local"
+            working_dir = self._sandbox.working_dir if is_sandbox else str(self.workspace_root)
+            return build_rules_section(
+                is_sandbox=is_sandbox,
+                sandbox_name=self._sandbox.name,
+                working_dir=working_dir,
+                workspace_root=str(self.workspace_root),
+                spill_buffer_enabled=self.config.tools.spill_buffer.enabled,
+                spill_keep_recent=self.config.memory.pruning.protect_recent,
+            )
 
-        return "\n\n".join(rules)
+        return self._get_cached_prompt_section("rules", _build)
 
     def _build_base_prompt(self) -> str:
-        """Build the base system prompt (context + rules), shared by all modes."""
-        context = self._build_context_section()
-        rules = self._build_rules_section()
+        from core.runtime.prompts import build_base_prompt
 
-        return f"""You are a highly capable AI assistant with access to file and system tools.
-
-**Context:**
-{context}
-
-**Important Rules:**
-
-{rules}
-"""
+        return self._get_cached_prompt_section(
+            "base_prompt",
+            lambda: build_base_prompt(self._build_context_section(), self._build_rules_section()),
+        )
 
     def _build_common_prompt_sections(self) -> str:
-        """Build common prompt sections for both sandbox and local modes."""
-        prompt = """
-**Agent Tool (Sub-agent Orchestration):**
-
-Use the Agent tool to launch specialized sub-agents for complex tasks:
-- `explore`: Read-only codebase exploration. Use for: finding files, searching code, understanding implementations.
-- `plan`: Design implementation plans. Use for: architecture decisions, multi-step planning.
-- `bash`: Execute shell commands. Use for: git operations, running tests, system commands.
-- `general`: Full tool access. Use for: independent multi-step tasks requiring file modifications.
-
-When to use Agent:
-- Open-ended searches that may require multiple rounds of exploration
-- Tasks that can run independently while you continue other work
-- Complex operations that benefit from specialized focus
-
-When NOT to use Agent:
-- Simple file reads (use Read directly)
-- Specific searches with known patterns (use Grep directly)
-- Quick operations that don't need isolation
-
-**Todo Tools (Task Management):**
-
-Use Todo tools to track progress on complex, multi-step tasks:
-- `TaskCreate`: Create a new task with subject, description, and activeForm (present continuous for spinner)
-- `TaskList`: View all tasks and their status
-- `TaskGet`: Get full details of a specific task
-- `TaskUpdate`: Update task status (pending → in_progress → completed) or details
-
-When to use Todo:
-- Complex tasks with 3+ distinct steps
-- When the user provides multiple tasks to complete
-- To show progress on non-trivial work
-
-When NOT to use Todo:
-- Single, straightforward tasks
-- Trivial operations that don't need tracking
-"""
+        from core.runtime.prompts import build_common_sections
 
-        # Add Skills section if skills are enabled
-        skills_enabled = self.config.skills.enabled and self.config.skills.paths
-
-        if skills_enabled:
-            prompt += """
-**Skills (Specialized Knowledge):**
-
-Use the `load_skill` tool to access specialized domain knowledge and workflows:
-- Skills provide focused instructions for specific tasks (e.g., TDD, debugging, git workflows)
-- Call `load_skill(skill_name)` to load a skill's content into context
-- Available skills are listed in the load_skill tool description
-
-When to use load_skill:
-- When you need specialized guidance for a specific workflow
-- To access domain-specific best practices
-- When the user mentions a skill by name (e.g., "use TDD skill")
-
-Progressive disclosure: Skills are loaded on-demand to save tokens.
-"""
-
-        return prompt
+        return self._get_cached_prompt_section(
+            "common_sections",
+            lambda: build_common_sections(bool(self.config.skills.enabled and self.config.skills.paths)),
+        )
 
     def invoke(self, message: str, thread_id: str = "default") -> dict:
         """Invoke agent with a message (sync version).
@@ -1388,6 +1549,174 @@ async def ainvoke(self, message: str, thread_id: str = "default") -> dict:
             self._monitor_middleware.mark_error(e)
             raise
 
+    async def astream(
+        self,
+        message: str,
+        thread_id: str = "default",
+        stream_mode: str | list[str] = "updates",
+        max_budget_usd: float | None = None,
+    ):
+        """Stream agent output through a caller-owned LeonAgent surface."""
+        try:
+            async for chunk in self.agent.astream(
+                {"messages": [{"role": "user", "content": message}]},
+                config={"configurable": {"thread_id": thread_id}},
+                stream_mode=stream_mode,
+            ):
+                yield chunk
+                if max_budget_usd is not None and self.runtime.cost > max_budget_usd:
+                    raise RuntimeError(f"max_budget_usd exceeded: cost={self.runtime.cost:.6f} budget={max_budget_usd:.6f}")
+        except Exception as e:
+            self._monitor_middleware.mark_error(e)
+            raise
+
+    async def aclear_thread(self, thread_id: str = "default") -> None:
+        """Clear turn-scoped state for a thread while preserving session accumulators."""
+        try:
+            await self.agent.aclear(thread_id)
+            self._invalidate_system_prompt_cache()
+            self.system_prompt = self._compose_system_prompt()
+            self.agent.system_prompt = SystemMessage(content=[{"type": "text", "text": self.system_prompt}])
+        except Exception as e:
+            self._monitor_middleware.mark_error(e)
+            raise
+
+    def clear_thread(self, thread_id: str = "default") -> None:
+        """Sync wrapper for aclear_thread()."""
+        import asyncio
+
+        async def _aclear():
+            await self.aclear_thread(thread_id)
+
+        try:
+            if hasattr(self, "_event_loop") and self._event_loop:
+                self._event_loop.run_until_complete(_aclear())
+            else:
+                asyncio.run(_aclear())
+        except Exception as e:
+            self._monitor_middleware.mark_error(e)
+            raise
+
+    def get_pending_permission_requests(self, thread_id: str | None = None) -> list[dict]:
+        requests = list(self._app_state.pending_permission_requests.values())
+        if thread_id is not None:
+            requests = [item for item in requests if item.get("thread_id") == thread_id]
+        return requests
+
+    def get_thread_permission_rules(self, thread_id: str | None = None) -> dict[str, Any]:
+        state = self._app_state.tool_permission_context
+        return {
+            "thread_id": thread_id,
+            "scope": "session",
+            "managed_only": state.allowManagedPermissionRulesOnly,
+            "rules": {
+                "allow": list(state.alwaysAllowRules.get("session", [])),
+                "deny": list(state.alwaysDenyRules.get("session", [])),
+                "ask": list(state.alwaysAskRules.get("session", [])),
+            },
+        }
+
+    def add_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        if self._app_state.tool_permission_context.allowManagedPermissionRulesOnly:
+            return False
+
+        def _update(state: AppState) -> AppState:
+            permission_state = state.tool_permission_context.model_copy(deep=True)
+            for bucket in (
+                permission_state.alwaysAllowRules.setdefault("session", []),
+                permission_state.alwaysDenyRules.setdefault("session", []),
+                permission_state.alwaysAskRules.setdefault("session", []),
+            ):
+                while tool_name in bucket:
+                    bucket.remove(tool_name)
+            target_bucket = {
+                "allow": permission_state.alwaysAllowRules.setdefault("session", []),
+                "deny": permission_state.alwaysDenyRules.setdefault("session", []),
+                "ask": permission_state.alwaysAskRules.setdefault("session", []),
+            }[behavior]
+            if tool_name not in target_bucket:
+                target_bucket.append(tool_name)
+            return state.model_copy(update={"tool_permission_context": permission_state})
+
+        self._app_state.set_state(_update)
+        return True
+
+    def remove_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        removed = False
+
+        def _update(state: AppState) -> AppState:
+            nonlocal removed
+            permission_state = state.tool_permission_context.model_copy(deep=True)
+            bucket = {
+                "allow": permission_state.alwaysAllowRules.setdefault("session", []),
+                "deny": permission_state.alwaysDenyRules.setdefault("session", []),
+                "ask": permission_state.alwaysAskRules.setdefault("session", []),
+            }[behavior]
+            if tool_name in bucket:
+                bucket.remove(tool_name)
+                removed = True
+            return state.model_copy(update={"tool_permission_context": permission_state})
+
+        self._app_state.set_state(_update)
+        return removed
+
+    def resolve_permission_request(
+        self,
+        request_id: str,
+        *,
+        decision: str,
+        message: str | None = None,
+        answers: list[dict[str, Any]] | None = None,
+        annotations: dict[str, Any] | None = None,
+    ) -> bool:
+        pending = self._app_state.pending_permission_requests.get(request_id)
+        if pending is None:
+            return False
+
+        resolved = dict(self._app_state.resolved_permission_requests)
+        payload = {
+            **pending,
+            "decision": decision,
+            "message": message or pending.get("message"),
+        }
+        if answers is not None:
+            payload["answers"] = answers
+        if annotations is not None:
+            payload["annotations"] = annotations
+        resolved[request_id] = payload
+        still_pending = dict(self._app_state.pending_permission_requests)
+        still_pending.pop(request_id, None)
+        self._app_state.set_state(
+            lambda prev: prev.model_copy(
+                update={
+                    "pending_permission_requests": still_pending,
+                    "resolved_permission_requests": resolved,
+                }
+            )
+        )
+        return True
+
+    def drop_permission_request(self, request_id: str) -> bool:
+        had_pending = request_id in self._app_state.pending_permission_requests
+        had_resolved = request_id in self._app_state.resolved_permission_requests
+        if not had_pending and not had_resolved:
+            return False
+
+        def _drop(state: AppState) -> AppState:
+            pending = dict(state.pending_permission_requests)
+            resolved = dict(state.resolved_permission_requests)
+            pending.pop(request_id, None)
+            resolved.pop(request_id, None)
+            return state.model_copy(
+                update={
+                    "pending_permission_requests": pending,
+                    "resolved_permission_requests": resolved,
+                }
+            )
+
+        self._app_state.set_state(_drop)
+        return True
+
     def get_response(self, message: str, thread_id: str = "default", **kwargs) -> str:
         """Get agent's text response.
 
@@ -1411,7 +1740,7 @@ def cleanup(self):
 
 
 def create_leon_agent(
-    model_name: str = DEFAULT_MODEL,
+    model_name: str | None = None,
     api_key: str | None = None,
     workspace_root: str | Path | None = None,
     sandbox: Any = None,
@@ -1421,7 +1750,7 @@ def create_leon_agent(
     """Create Leon Agent.
 
     Args:
-        model_name: Model name
+        model_name: Model name. None means "let LeonAgent resolve defaults".
         api_key: API key
         workspace_root: Workspace directory
         sandbox: Sandbox instance, name string, or None for local
diff --git a/core/runtime/cleanup.py b/core/runtime/cleanup.py
new file mode 100644
index 000000000..d55600684
--- /dev/null
+++ b/core/runtime/cleanup.py
@@ -0,0 +1,116 @@
+"""CleanupRegistry — priority-ordered async cleanup for LeonAgent lifecycle.
+
+Aligned with CC Pattern 5: Lifecycle & Cleanup.
+Priority numbers: lower = runs first.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import signal
+from collections.abc import Awaitable, Callable
+from itertools import groupby
+
+logger = logging.getLogger(__name__)
+
+
+class CleanupRegistry:
+    """Registry of async cleanup functions executed in priority order on shutdown.
+
+    Usage:
+        registry = CleanupRegistry()
+        registry.register(close_db, priority=1)
+        registry.register(close_sandbox, priority=2)
+        await registry.run_cleanup()
+    """
+
+    def __init__(self):
+        # List of (priority, fn) — not a dict because same priority can have multiple fns
+        self._entries: list[tuple[int, Callable[[], Awaitable[None] | None]]] = []
+        self._timeout_s = 2.0
+        self._cleanup_task: asyncio.Task[None] | None = None
+        self._shutdown_in_progress = False
+        self._signal_loop: asyncio.AbstractEventLoop | None = None
+        self._setup_signal_handlers()
+
+    def register(self, fn: Callable[[], Awaitable[None] | None], priority: int = 5) -> Callable[[], None]:
+        """Register a cleanup function.
+
+        Args:
+            fn: Sync or async callable that releases resources.
+            priority: Execution order — lower number runs first (1 before 2).
+        """
+        entry = (priority, fn)
+        self._entries.append(entry)
+
+        def unregister() -> None:
+            try:
+                self._entries.remove(entry)
+            except ValueError:
+                return
+
+        return unregister
+
+    async def run_cleanup(self) -> None:
+        """Execute all registered cleanup functions in priority order.
+
+        Different priority tiers run in order. Entries inside the same priority
+        tier run concurrently so one slow cleanup does not serialize its peers.
+        """
+        if self._cleanup_task is not None:
+            await asyncio.shield(self._cleanup_task)
+            return
+
+        async def _run_all() -> None:
+            sorted_entries = sorted(self._entries, key=lambda x: x[0])
+            for priority, grouped_entries in groupby(sorted_entries, key=lambda x: x[0]):
+                await asyncio.gather(
+                    *(self._run_entry(priority, fn) for _, fn in grouped_entries),
+                    return_exceptions=True,
+                )
+
+        self._shutdown_in_progress = True
+        self._cleanup_task = asyncio.create_task(_run_all())
+        await asyncio.shield(self._cleanup_task)
+
+    def is_shutting_down(self) -> bool:
+        return self._shutdown_in_progress
+
+    async def _run_entry(self, priority: int, fn: Callable[[], Awaitable[None] | None]) -> None:
+        try:
+            result = fn()
+            if asyncio.iscoroutine(result):
+                await asyncio.wait_for(result, timeout=self._timeout_s)
+        except TimeoutError:
+            logger.warning("CleanupRegistry: cleanup fn %s timed out after %.2fs", fn, self._timeout_s)
+        except Exception:
+            logger.exception("CleanupRegistry: error in cleanup fn %s (priority=%d)", fn, priority)
+
+    def _setup_signal_handlers(self) -> None:
+        """Register SIGINT/SIGTERM handlers to trigger async cleanup."""
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            return  # No running loop yet — signal handlers set up later
+        self._signal_loop = loop
+
+        signals = [signal.SIGINT, signal.SIGTERM]
+        if hasattr(signal, "SIGHUP"):
+            signals.append(signal.SIGHUP)
+
+        for sig in signals:
+            try:
+                loop.add_signal_handler(sig, self._handle_signal)
+            except (NotImplementedError, RuntimeError):
+                # Windows or non-main thread — skip signal handler setup
+                pass
+
+    def _handle_signal(self) -> None:
+        loop = self._signal_loop
+        if loop is None:
+            return
+        if loop.is_running():
+            loop.create_task(self.run_cleanup())
+            return
+        loop.run_until_complete(self.run_cleanup())
diff --git a/core/runtime/errors.py b/core/runtime/errors.py
index 74ffbfc1e..591ff3090 100644
--- a/core/runtime/errors.py
+++ b/core/runtime/errors.py
@@ -1,4 +1,13 @@
 class InputValidationError(Exception):
     """Tool parameter validation failed."""
 
-    pass
+    def __init__(
+        self,
+        message: str,
+        *,
+        error_code: str | None = None,
+        details: list[dict[str, object]] | None = None,
+    ) -> None:
+        super().__init__(message)
+        self.error_code = error_code
+        self.details = [] if details is None else details
diff --git a/core/runtime/fork.py b/core/runtime/fork.py
new file mode 100644
index 000000000..c3992cf74
--- /dev/null
+++ b/core/runtime/fork.py
@@ -0,0 +1,91 @@
+"""Context fork for sub-agent spawning.
+
+When a sub-agent is spawned, it inherits workspace/model/permission configuration
+from the parent but gets its own isolated messages and session identity.
+
+Aligned with CC createSubagentContext() field-by-field fork table.
+"""
+
+from __future__ import annotations
+
+import copy
+import uuid
+
+from .abort import create_child_abort_controller
+from .state import BootstrapConfig, ToolUseContext
+
+
+def fork_context(parent: BootstrapConfig) -> BootstrapConfig:
+    """Create a child BootstrapConfig for a sub-agent.
+
+    Inherits all workspace identity, model settings, and security flags
+    from parent. Generates a fresh session_id and sets parent_session_id.
+    Messages, cost, and turn_count live in AppState — not here.
+    """
+    return BootstrapConfig(
+        workspace_root=parent.workspace_root,
+        original_cwd=parent.original_cwd,
+        project_root=parent.project_root,
+        cwd=parent.cwd,
+        model_name=parent.model_name,
+        api_key=parent.api_key,
+        sandbox_type=parent.sandbox_type,
+        block_dangerous_commands=parent.block_dangerous_commands,
+        block_network_commands=parent.block_network_commands,
+        enable_audit_log=parent.enable_audit_log,
+        enable_web_tools=parent.enable_web_tools,
+        allowed_file_extensions=parent.allowed_file_extensions,
+        extra_allowed_paths=parent.extra_allowed_paths,
+        max_turns=parent.max_turns,
+        # Fresh session identity
+        session_id=uuid.uuid4().hex,
+        parent_session_id=parent.session_id,
+        total_cost_usd=parent.total_cost_usd,
+        total_tool_duration_ms=parent.total_tool_duration_ms,
+        # Model settings
+        model_provider=parent.model_provider,
+        base_url=parent.base_url,
+        context_limit=parent.context_limit,
+    )
+
+
+def create_subagent_context(
+    parent: ToolUseContext,
+    *,
+    share_set_app_state: bool = False,
+) -> ToolUseContext:
+    """Create a minimally isolated ToolUseContext for sub-agents.
+
+    Default contract:
+    - bootstrap: fresh fork
+    - set_app_state: NO-OP
+    - set_app_state_for_tasks: always reaches the root/session store
+    - turn-local refs: fresh
+    - file cache/messages: cloned snapshots
+    """
+    read_file_state = parent.read_file_state
+    if hasattr(read_file_state, "clone") and callable(read_file_state.clone):
+        cloned_read_file_state = read_file_state.clone()
+    else:
+        # @@@sa-04-read-file-state-clone
+        # Subagent fork boundaries must isolate nested file cache state too;
+        # a shallow dict copy leaks child edits back into the parent cache.
+        cloned_read_file_state = copy.deepcopy(read_file_state)
+    return ToolUseContext(
+        bootstrap=fork_context(parent.bootstrap),
+        get_app_state=parent.get_app_state,
+        set_app_state=parent.set_app_state if share_set_app_state else (lambda updater: None),
+        set_app_state_for_tasks=parent.set_app_state_for_tasks or parent.set_app_state,
+        refresh_tools=parent.refresh_tools,
+        can_use_tool=parent.can_use_tool,
+        request_permission=parent.request_permission,
+        consume_permission_resolution=parent.consume_permission_resolution,
+        read_file_state=cloned_read_file_state,
+        loaded_nested_memory_paths=set(),
+        discovered_skill_names=set(),
+        discovered_tool_names=set(),
+        nested_memory_attachment_triggers=set(),
+        abort_controller=create_child_abort_controller(getattr(parent, "abort_controller", None)),
+        messages=list(parent.messages),
+        thread_id=parent.thread_id,
+    )
diff --git a/core/runtime/loop.py b/core/runtime/loop.py
new file mode 100644
index 000000000..f27527e29
--- /dev/null
+++ b/core/runtime/loop.py
@@ -0,0 +1,2158 @@
+"""QueryLoop — self-managing agentic tool loop replacing LangGraph create_agent.
+
+Implements CC Pattern 1: Agentic Tool Loop (queryLoop).
+
+Design:
+- AsyncGenerator that alternates LLM sampling and tool execution.
+- Exposes the same .astream(input, config, stream_mode) interface as CompiledStateGraph.
+- Middleware chain (SpillBuffer/Monitor/PromptCaching/Memory/Steering/ToolRunner) is
+  preserved exactly — awrap_model_call and awrap_tool_call pass through in order.
+- is_concurrency_safe tools execute in parallel; others execute serially.
+- Checkpointer (AsyncSqliteSaver) stores/restores message history across calls.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import copy
+import inspect
+import json
+import logging
+import re
+import uuid
+from collections.abc import AsyncGenerator
+from dataclasses import dataclass
+from enum import StrEnum
+from types import SimpleNamespace
+from typing import Any
+
+from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, RemoveMessage, SystemMessage, ToolMessage
+
+from core.runtime.middleware import (
+    AgentMiddleware,
+    ModelRequest,
+    ModelResponse,
+    ToolCallRequest,
+)
+
+from .abort import AbortController
+from .permissions import ToolPermissionContext, evaluate_permission_rules
+from .registry import ToolMode, ToolRegistry
+from .state import AppState, BootstrapConfig, ToolPermissionState, ToolUseContext
+from .validator import _required_sets_match
+
+logger = logging.getLogger(__name__)
+
+_NOOP_HANDLER: Any = None  # placeholder for innermost "handler" in middleware chain
+_ESCALATED_MAX_OUTPUT_TOKENS = 64000
+_FLOOR_OUTPUT_TOKENS = 3000
+_CONTEXT_OVERFLOW_SAFETY_BUFFER = 1000
+_TRANSIENT_API_MAX_RETRIES = 3
+_TRANSIENT_API_BASE_DELAY_SECONDS = 0.5
+_PROMPT_TOO_LONG_NOTICE_TEXT = "Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one."
+
+
+class TerminalReason(StrEnum):
+    completed = "completed"
+    aborted_streaming = "aborted_streaming"
+    aborted_tools = "aborted_tools"
+    model_error = "model_error"
+    max_turns = "max_turns"
+    prompt_too_long = "prompt_too_long"
+    blocking_limit = "blocking_limit"
+    image_error = "image_error"
+    hook_stopped = "hook_stopped"
+    stop_hook_prevented = "stop_hook_prevented"
+
+
+class ContinueReason(StrEnum):
+    next_turn = "next_turn"
+    api_retry = "api_retry"
+    collapse_drain_retry = "collapse_drain_retry"
+    reactive_compact_retry = "reactive_compact_retry"
+    max_output_tokens_escalate = "max_output_tokens_escalate"
+    max_output_tokens_recovery = "max_output_tokens_recovery"
+    stop_hook_blocking = "stop_hook_blocking"
+    token_budget_continuation = "token_budget_continuation"
+
+
+@dataclass(frozen=True)
+class TerminalState:
+    reason: TerminalReason
+    turn_count: int
+    error: str | None = None
+
+
+@dataclass(frozen=True)
+class ContinueState:
+    reason: ContinueReason
+
+
+@dataclass(frozen=True)
+class _ModelErrorRecoveryResult:
+    messages: list
+    transition: ContinueState | None
+    max_output_tokens_recovery_count: int
+    has_attempted_reactive_compact: bool
+    max_output_tokens_override: int | None
+    transient_api_retry_count: int
+    terminal: TerminalState | None
+
+
+@dataclass
+class _TrackedTool:
+    order: int
+    tool_call: dict[str, Any]
+    is_concurrency_safe: bool
+    status: str = "queued"
+    task: asyncio.Task[ToolMessage] | None = None
+    result: ToolMessage | None = None
+
+
+class QueryLoop:
+    """Self-managing query loop replacing create_agent.
+
+    The .astream() method is an AsyncGenerator that yields dicts compatible
+    with LangGraph's stream_mode="updates":
+      {"agent": {"messages": [AIMessage(...)]}}
+      {"tools": {"messages": [ToolMessage(...), ...]}}
+
+    The checkpointer attribute is set post-construction (mirrors create_agent pattern).
+    """
+
+    def __init__(
+        self,
+        model: Any,
+        system_prompt: SystemMessage,
+        middleware: list[AgentMiddleware],
+        checkpointer: Any,
+        registry: ToolRegistry,
+        app_state: AppState | None = None,
+        runtime: Any = None,
+        bootstrap: BootstrapConfig | None = None,
+        refresh_tools: Any = None,
+        max_turns: int = 100,
+    ):
+        self.model = model
+        self.system_prompt = system_prompt
+        self.middleware = middleware
+        self.checkpointer = checkpointer
+        self._registry = registry
+        self._app_state = app_state
+        self._runtime = runtime
+        self._bootstrap = bootstrap
+        self._refresh_tools = refresh_tools
+        self._memory_middleware = next(
+            (mw for mw in middleware if hasattr(mw, "compact_boundary_index")),
+            None,
+        )
+        # @@@sa-02-session-tool-refs
+        # These refs must survive across turns within the same loop/session,
+        # while turn-local attachment triggers stay ephemeral per ToolUseContext.
+        self._tool_read_file_state: dict[str, Any] = {}
+        self._tool_loaded_nested_memory_paths: set[str] = set()
+        self._tool_discovered_skill_names: set[str] = set()
+        self._tool_discovered_tool_names_by_thread: dict[str, set[str]] = {}
+        self._tool_abort_controller = AbortController()
+        self.max_turns = max_turns
+        self.last_terminal: TerminalState | None = None
+        self.last_continue: ContinueState | None = None
+
+    # -------------------------------------------------------------------------
+    # Public streaming interface (LangGraph-compatible)
+    # -------------------------------------------------------------------------
+
+    async def query(
+        self,
+        input: dict,
+        config: dict | None = None,
+    ) -> AsyncGenerator[dict[str, Any], None]:
+        """Raw loop generator with an explicit final terminal event."""
+        config = config or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+
+        # Set thread context so MemoryMiddleware can find thread_id via ContextVar
+        from sandbox.thread_context import set_current_thread_id
+
+        set_current_thread_id(thread_id)
+
+        # Load message history and thread-scoped runtime state from checkpointer
+        persisted = await self._hydrate_thread_state_from_checkpoint(thread_id)
+        messages = list(persisted["messages"])
+        self._restore_discovered_tool_names_from_messages(thread_id, messages)
+
+        # Parse and append new input messages
+        new_msgs = self._parse_input(input)
+        messages.extend(new_msgs)
+        self._sync_app_state(messages=messages, turn_count=0)
+
+        terminal: TerminalState | None = None
+        transition: ContinueState | None = None
+        pending_system_notices: list[HumanMessage] = []
+        max_output_tokens_recovery_count = 0
+        has_attempted_reactive_compact = False
+        max_output_tokens_override: int | None = None
+        transient_api_retry_count = 0
+
+        turn = 0
+        try:
+            while turn < self.max_turns:
+                turn += 1
+                tool_context = self._build_tool_use_context(messages, thread_id=thread_id)
+
+                messages_for_query, injected_messages = await self._build_query_messages(messages, config)
+                if injected_messages:
+                    # @@@steer-persist - queue/steer messages accepted before the
+                    # next model call must become durable conversation state, not
+                    # request-only hints, or later replay/history lies about what
+                    # the user actually said mid-run.
+                    messages.extend(injected_messages)
+                    self._sync_app_state(messages=messages, turn_count=turn)
+                self._sync_tool_context_messages(tool_context, messages_for_query)
+
+                # --- Call model through middleware chain ---
+                streamed_tool_results: list[ToolMessage] = []
+                pending_tool_results: list[ToolMessage] = []
+                used_streaming_overlap = False
+                response: ModelResponse | None = None
+                ai_msg: AIMessage | None = None
+                tool_calls: list[dict[str, Any]] = []
+                try:
+                    if self._can_stream_tools():
+                        used_streaming_overlap = True
+                        async for stream_event in self._stream_model_with_tool_overlap(
+                            messages_for_query,
+                            config,
+                            thread_id=thread_id,
+                            tool_context=tool_context,
+                            max_output_tokens_override=max_output_tokens_override,
+                        ):
+                            if stream_event["type"] == "message_chunk":
+                                yield {"message_chunk": stream_event["chunk"]}
+                                continue
+                            if stream_event["type"] == "tools":
+                                chunk_messages = stream_event["messages"]
+                                streamed_tool_results.extend(chunk_messages)
+                                yield {"tools": {"messages": chunk_messages}}
+                                continue
+                            response = stream_event["response"]
+                            ai_msg = stream_event["ai_message"]
+                            tool_calls = stream_event["tool_calls"]
+                            pending_tool_results = stream_event["remaining_tool_results"]
+                    else:
+                        response = await self._invoke_model(
+                            messages_for_query,
+                            config,
+                            thread_id=thread_id,
+                            max_output_tokens_override=max_output_tokens_override,
+                        )
+                except Exception as exc:
+                    self._collect_memory_system_notices(pending_system_notices)
+                    handled = await self._handle_model_error_recovery(
+                        exc=exc,
+                        thread_id=thread_id,
+                        messages=messages,
+                        turn=turn,
+                        transition=transition,
+                        max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                        has_attempted_reactive_compact=has_attempted_reactive_compact,
+                        max_output_tokens_override=max_output_tokens_override,
+                        transient_api_retry_count=transient_api_retry_count,
+                    )
+                    if handled is not None:
+                        messages = handled.messages
+                        transition = handled.transition
+                        max_output_tokens_recovery_count = handled.max_output_tokens_recovery_count
+                        has_attempted_reactive_compact = handled.has_attempted_reactive_compact
+                        max_output_tokens_override = handled.max_output_tokens_override
+                        transient_api_retry_count = handled.transient_api_retry_count
+                        if handled.terminal is not None:
+                            terminal = handled.terminal
+                            break
+                        self._sync_app_state(messages=messages, turn_count=turn)
+                        continue
+                    terminal = TerminalState(
+                        reason=TerminalReason.model_error,
+                        turn_count=turn,
+                        error=str(exc),
+                    )
+                    break
+
+                if response is None or ai_msg is None:
+                    ai_messages = [m for m in (response.result if response else []) if isinstance(m, AIMessage)]
+                    if not ai_messages:
+                        # No AI message — unexpected; treat as terminal
+                        terminal = TerminalState(
+                            reason=TerminalReason.model_error,
+                            turn_count=turn,
+                            error="model returned no AIMessage",
+                        )
+                        break
+                    ai_msg = ai_messages[0]
+                self._collect_memory_system_notices(pending_system_notices)
+                self._sync_tool_context_messages(
+                    tool_context,
+                    response.request_messages or messages_for_query,
+                )
+
+                truncated = self._handle_truncated_response_recovery(
+                    ai_msg=ai_msg,
+                    messages=messages,
+                    turn=turn,
+                    max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                    max_output_tokens_override=max_output_tokens_override,
+                )
+                if truncated is not None:
+                    messages = truncated["messages"]
+                    transition = truncated["transition"]
+                    max_output_tokens_recovery_count = truncated["max_output_tokens_recovery_count"]
+                    max_output_tokens_override = truncated["max_output_tokens_override"]
+                    self._sync_app_state(messages=messages, turn_count=turn)
+                    if truncated["yield_ai"]:
+                        yield {"agent": {"messages": [ai_msg]}}
+                    if truncated["terminal"] is not None:
+                        terminal = truncated["terminal"]
+                        break
+                    continue
+
+                self._sync_app_state(messages=messages, turn_count=turn)
+
+                if not tool_calls:
+                    tool_calls = getattr(ai_msg, "tool_calls", None) or []
+                if not tool_calls:
+                    # Also check additional_kwargs for older message formats
+                    tool_calls = ai_msg.additional_kwargs.get("tool_calls", [])
+
+                if not tool_calls and not self._ai_message_has_visible_content(ai_msg):
+                    terminal_followthrough_notice = self._get_terminal_followthrough_notice(messages)
+                    if terminal_followthrough_notice is not None:
+                        ai_msg = self._build_terminal_followthrough_fallback(terminal_followthrough_notice)
+                    else:
+                        chat_followthrough_notice = self._get_chat_followthrough_notice(messages)
+                        if chat_followthrough_notice is not None:
+                            ai_msg = self._build_chat_followthrough_fallback(chat_followthrough_notice)
+
+                # Yield agent update (stream_mode="updates" format)
+                yield {"agent": {"messages": [ai_msg]}}
+
+                if not tool_calls:
+                    # No tool calls → agent is done
+                    if self._ai_message_has_visible_content(ai_msg):
+                        messages.append(ai_msg)
+                    terminal = TerminalState(
+                        reason=TerminalReason.completed,
+                        turn_count=turn,
+                    )
+                    break
+
+                # Expose current messages for forkContext sub-agent spawning
+                from sandbox.thread_context import set_current_messages
+
+                set_current_messages(messages + [ai_msg])
+
+                if used_streaming_overlap:
+                    if pending_tool_results:
+                        yield {"tools": {"messages": pending_tool_results}}
+                    tool_results = streamed_tool_results + pending_tool_results
+                else:
+                    # --- Execute tools through middleware chain ---
+                    try:
+                        tool_results = await self._execute_tools(tool_calls, response, tool_context)
+                    except Exception as exc:
+                        terminal = TerminalState(
+                            reason=TerminalReason.aborted_tools,
+                            turn_count=turn,
+                            error=str(exc),
+                        )
+                        break
+
+                    # Yield tools update
+                    yield {"tools": {"messages": tool_results}}
+
+                # Advance message history for next turn
+                messages.append(ai_msg)
+                messages.extend(tool_results)
+                await self._refresh_tools_between_turns(tool_context)
+                transition = ContinueState(reason=ContinueReason.next_turn)
+                max_output_tokens_recovery_count = 0
+                has_attempted_reactive_compact = False
+                max_output_tokens_override = None
+                transient_api_retry_count = 0
+                self._sync_app_state(messages=messages, turn_count=turn)
+        except asyncio.CancelledError:
+            # @@@cancel-persists-live-state - accepted user input from the
+            # current run must not evaporate just because the run is cancelled
+            # before the next terminal save.
+            messages = self._append_system_notices(messages, pending_system_notices)
+            await self._save_messages(thread_id, messages)
+            self._sync_app_state(messages=messages, turn_count=turn)
+            raise
+
+        if terminal is None:
+            terminal = TerminalState(
+                reason=TerminalReason.max_turns,
+                turn_count=turn,
+            )
+
+        # Persist message history
+        self._collect_memory_system_notices(pending_system_notices)
+        visible_terminal_error = self._build_visible_terminal_error_message(terminal, messages)
+        if visible_terminal_error is not None:
+            messages.append(visible_terminal_error)
+        terminal_notice = self._build_terminal_notice(terminal)
+        if terminal_notice is not None:
+            pending_system_notices.append(terminal_notice)
+        messages = self._append_system_notices(messages, pending_system_notices)
+        await self._save_messages(thread_id, messages)
+        self._sync_app_state(messages=messages, turn_count=turn)
+        self.last_terminal = terminal
+        self.last_continue = transition
+        yield {"terminal": terminal, "transition": transition}
+
+    async def astream(
+        self,
+        input: dict,
+        config: dict | None = None,
+        stream_mode: str | list[str] = "updates",
+    ) -> AsyncGenerator[Any, None]:
+        """Stream agent execution chunks compatible with LangGraph stream modes."""
+        requested_modes = [stream_mode] if isinstance(stream_mode, str) else list(stream_mode)
+        emitted_live_agent_chunks = False
+        async for event in self.query(input, config=config):
+            if "terminal" in event:
+                terminal = event["terminal"]
+                if terminal is not None and terminal.reason is not TerminalReason.completed:
+                    # @@@astream-terminal-loud-fail
+                    # query() always emits a terminal event, but caller-facing
+                    # astream() must not turn runtime failures into a silent empty
+                    # iterator. Propagate non-completed terminals back to the caller.
+                    raise RuntimeError(self._terminal_error_text(terminal))
+                continue
+            if isinstance(stream_mode, str):
+                if "message_chunk" in event:
+                    continue
+                yield event
+                continue
+
+            if "message_chunk" in event:
+                if "messages" in requested_modes:
+                    yield (
+                        "messages",
+                        (
+                            event["message_chunk"],
+                            {"langgraph_node": "agent"},
+                        ),
+                    )
+                    emitted_live_agent_chunks = True
+                continue
+
+            if "messages" in requested_modes and "agent" in event:
+                if not emitted_live_agent_chunks:
+                    for msg in event["agent"].get("messages", []):
+                        if not isinstance(msg, AIMessage):
+                            continue
+                        yield (
+                            "messages",
+                            (
+                                AIMessageChunk(**msg.model_dump(exclude={"type"})),
+                                {"langgraph_node": "agent"},
+                            ),
+                        )
+                emitted_live_agent_chunks = False
+
+            if "updates" in requested_modes:
+                yield ("updates", event)
+
+    async def ainvoke(
+        self,
+        input: dict,
+        config: dict | None = None,
+        stream_mode: str = "updates",
+    ) -> dict[str, Any]:
+        """Drain query and return messages plus explicit terminal state."""
+        drained_messages: list[Any] = []
+        terminal: TerminalState | None = None
+        transition: ContinueState | None = None
+
+        # @@@ainvoke-drains-astream
+        # QueryLoop is generator-first. ainvoke exists only as a compatibility
+        # adapter for callers like LeonAgent.invoke/ainvoke and must not invent
+        # a separate execution path.
+        async for event in self.query(input, config=config):
+            if "terminal" in event:
+                terminal = event["terminal"]
+                transition = event.get("transition")
+                continue
+            for section in ("agent", "tools"):
+                drained_messages.extend(event.get(section, {}).get("messages", []))
+
+        return {
+            "messages": drained_messages,
+            "reason": terminal.reason.value if terminal else TerminalReason.completed.value,
+            "terminal": terminal,
+            "transition": transition,
+        }
+
+    async def aget_state(self, config: dict | None = None) -> Any:
+        """Minimal graph-state bridge for backend/web callers."""
+        config = config or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+        if self._is_runtime_active():
+            # @@@active-state-no-clobber - caller surfaces like /permissions and
+            # /history can poll during an active run. Rehydrating from stale
+            # checkpoint here would erase live thread-scoped permission state.
+            values = self._snapshot_live_thread_state(thread_id)
+            return SimpleNamespace(values=values)
+        values = await self._hydrate_thread_state_from_checkpoint(thread_id)
+        return SimpleNamespace(values=values)
+
+    async def aupdate_state(
+        self,
+        config: dict | None,
+        input_data: dict[str, Any] | None,
+        as_node: str | None = None,
+    ) -> Any:
+        """Minimal graph-state update bridge for resumed-thread callers."""
+        config = config or {}
+        input_data = input_data or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+        messages = await self._load_messages(thread_id)
+        raw_updates = input_data.get("messages", [])
+
+        # @@@ql-06-state-bridge - backend/web still speaks the old graph-state
+        # contract. Only the live caller shapes are supported here: append
+        # resumed start messages, or apply RemoveMessage-based repairs before
+        # appending replacement messages.
+        if as_node == "__start__":
+            messages.extend(self._parse_input({"messages": raw_updates}))
+        else:
+            updates = raw_updates if isinstance(raw_updates, list) else [raw_updates]
+            remove_ids = {update.id for update in updates if isinstance(update, RemoveMessage) and getattr(update, "id", None)}
+            if remove_ids:
+                messages = [message for message in messages if getattr(message, "id", None) not in remove_ids]
+            messages.extend(update for update in updates if not isinstance(update, RemoveMessage))
+
+        await self._save_messages(thread_id, messages)
+        current_turn_count = self._app_state.turn_count if self._app_state is not None else 0
+        self._sync_app_state(messages=messages, turn_count=current_turn_count)
+        self._restore_discovered_tool_names_from_messages(thread_id, messages)
+        return await self.aget_state(config)
+
+    async def apersist_state(self, thread_id: str) -> None:
+        """Persist the current thread-scoped loop/app state to the checkpointer."""
+        messages = list(self._app_state.messages) if self._app_state is not None else await self._load_messages(thread_id)
+        await self._save_messages(thread_id, messages)
+
+    # -------------------------------------------------------------------------
+    # Model invocation through middleware chain
+    # -------------------------------------------------------------------------
+
+    async def _invoke_model(
+        self,
+        messages: list,
+        config: dict,
+        *,
+        thread_id: str = "default",
+        max_output_tokens_override: int | None = None,
+    ) -> ModelResponse:
+        """Call model through the full middleware chain (awrap_model_call)."""
+
+        async def innermost_handler(request: ModelRequest) -> ModelResponse:
+            """Actual model call — innermost of the chain."""
+            tools = request.tools or []
+            model = request.model
+
+            # Bind tools to model if any
+            if tools:
+                try:
+                    bound = model.bind_tools(tools)
+                except Exception:
+                    bound = model
+            else:
+                bound = model
+
+            if max_output_tokens_override is not None and hasattr(bound, "bind"):
+                try:
+                    bound = bound.bind(max_tokens=max_output_tokens_override)
+                except Exception:
+                    pass
+
+            # Build message list: system + conversation
+            call_messages = []
+            if request.system_message:
+                call_messages.append(request.system_message)
+            call_messages.extend(request.messages)
+
+            result = await bound.ainvoke(call_messages)
+            if not isinstance(result, list):
+                result = [result]
+            return ModelResponse(result=result, request_messages=list(request.messages))
+
+        # Build ModelRequest
+        inline_schemas = self._registry.get_inline_schemas(self._get_discovered_tool_names(thread_id))
+        request = ModelRequest(
+            model=self.model,
+            messages=messages,
+            system_message=self.system_prompt,
+            tools=inline_schemas,
+        )
+
+        # Walk middleware chain outside-in: each wraps the next.
+        # Only include middleware that actually overrides awrap_model_call OR wrap_model_call
+        # (not just inherits the base-class NotImplementedError stub).
+        handler = innermost_handler
+        for mw in reversed(self.middleware):
+            if _mw_overrides_model_call(mw):
+                handler = _make_model_wrapper(mw, handler)
+
+        return await handler(request)
+
+    def _bind_model(
+        self,
+        model: Any,
+        tools: list | None,
+        *,
+        max_output_tokens_override: int | None = None,
+    ) -> Any:
+        if tools:
+            try:
+                bound = model.bind_tools(tools)
+            except Exception:
+                bound = model
+        else:
+            bound = model
+
+        if max_output_tokens_override is not None and hasattr(bound, "bind"):
+            try:
+                bound = bound.bind(max_tokens=max_output_tokens_override)
+            except Exception:
+                pass
+        return bound
+
+    def _can_stream_tools(self) -> bool:
+        stream_fn = getattr(self.model, "astream", None)
+        if not callable(stream_fn):
+            return False
+        return type(self.model).__module__ != "unittest.mock"
+
+    async def _prepare_streaming_request(
+        self,
+        messages: list,
+        *,
+        thread_id: str,
+    ) -> ModelRequest:
+        inline_schemas = self._registry.get_inline_schemas(self._get_discovered_tool_names(thread_id))
+        request = ModelRequest(
+            model=self.model,
+            messages=messages,
+            system_message=self.system_prompt,
+            tools=inline_schemas,
+        )
+
+        async def prepare_handler(request: ModelRequest) -> ModelResponse:
+            return ModelResponse(
+                result=[],
+                request_messages=list(request.messages),
+                prepared_request=request,
+            )
+
+        handler = prepare_handler
+        for mw in reversed(self.middleware):
+            if _mw_overrides_model_call(mw):
+                handler = _make_model_wrapper(mw, handler)
+
+        response = await handler(request)
+        return response.prepared_request or request
+
+    async def _stream_model_with_tool_overlap(
+        self,
+        messages: list,
+        config: dict,
+        *,
+        thread_id: str,
+        tool_context: ToolUseContext | None,
+        max_output_tokens_override: int | None,
+    ) -> AsyncGenerator[dict[str, Any], None]:
+        prepared_request = await self._prepare_streaming_request(messages, thread_id=thread_id)
+        bound = self._bind_model(
+            prepared_request.model,
+            prepared_request.tools,
+            max_output_tokens_override=max_output_tokens_override,
+        )
+
+        call_messages = []
+        if prepared_request.system_message:
+            call_messages.append(prepared_request.system_message)
+        call_messages.extend(prepared_request.messages)
+
+        executor = _StreamingToolExecutor(loop=self, tool_context=tool_context)
+        aggregate: AIMessageChunk | None = None
+        seen_tool_ids: set[str] = set()
+        streamed_tool_calls: list[dict[str, Any]] = []
+
+        try:
+            async for chunk in bound.astream(call_messages):
+                if isinstance(chunk, AIMessage):
+                    chunk = AIMessageChunk(**chunk.model_dump(exclude={"type"}))
+                elif not isinstance(chunk, AIMessageChunk):
+                    continue
+
+                # @@@stream-chunk-snapshot
+                # Some providers reuse and mutate the same chunk object across
+                # yields. Snapshot before yielding/aggregating so the final
+                # AIMessage cannot collapse to the last empty chunk.
+                chunk = AIMessageChunk(**chunk.model_dump(exclude={"type"}))
+                if (
+                    aggregate is not None
+                    and getattr(chunk, "chunk_position", None) == "last"
+                    and not chunk.content
+                    and not getattr(chunk, "tool_calls", None)
+                    and not getattr(chunk, "invalid_tool_calls", None)
+                    and not getattr(chunk, "tool_call_chunks", None)
+                    and getattr(chunk, "usage_metadata", None) == getattr(aggregate, "usage_metadata", None)
+                ):
+                    chunk = chunk.model_copy(update={"usage_metadata": None})
+                aggregate = chunk if aggregate is None else aggregate + chunk
+
+                yield {"type": "message_chunk", "chunk": chunk}
+
+                tool_call_chunks = getattr(aggregate, "tool_call_chunks", None) or []
+                for tool_call in getattr(aggregate, "tool_calls", None) or []:
+                    ready_tool_call = self._normalize_stream_tool_call(tool_call, tool_call_chunks)
+                    if ready_tool_call is None:
+                        continue
+                    call_id = ready_tool_call.get("id")
+                    if not call_id or call_id in seen_tool_ids:
+                        continue
+                    seen_tool_ids.add(call_id)
+                    streamed_tool_calls.append(ready_tool_call)
+                    await executor.add_tool(ready_tool_call)
+
+                completed = await executor.get_completed_results()
+                if completed:
+                    yield {"type": "tools", "messages": completed}
+        except Exception:
+            discarded = await executor.discard(reason="streaming_error")
+            if discarded:
+                yield {"type": "tools", "messages": discarded}
+            raise
+
+        if aggregate is None:
+            raise RuntimeError("streaming model returned no AIMessageChunk")
+
+        ai_message = AIMessage(**aggregate.model_dump(exclude={"type"}))
+        self._notify_stream_response(prepared_request, ai_message)
+        remaining = await executor.drain_remaining()
+        yield {
+            "type": "done",
+            "response": ModelResponse(result=[ai_message], request_messages=list(prepared_request.messages)),
+            "ai_message": ai_message,
+            "tool_calls": list(streamed_tool_calls),
+            "remaining_tool_results": remaining,
+        }
+
+    def _notify_stream_response(self, request: ModelRequest, ai_message: AIMessage) -> None:
+        req_dict = {"messages": request.messages}
+        resp_dict = {"messages": [ai_message]}
+        for mw in self.middleware:
+            dispatch = getattr(mw, "_dispatch_monitors", None)
+            if callable(dispatch):
+                dispatch("on_response", req_dict, resp_dict)
+
+    async def _build_query_messages(self, messages: list, config: dict) -> tuple[list, list]:
+        return await self._apply_before_model(list(messages), config)
+
+    async def _apply_before_model(self, messages: list, config: dict) -> tuple[list, list]:
+        """Run middleware before_model/abefore_model hooks on the live path."""
+        current_messages = list(messages)
+        injected_messages: list[Any] = []
+        state = {"messages": current_messages}
+
+        for mw in self.middleware:
+            update: dict[str, Any] | None = None
+            abefore = getattr(mw, "abefore_model", None)
+            before = getattr(mw, "before_model", None)
+
+            if callable(abefore):
+                update = await abefore(state=state, runtime=None, config=config)
+            elif callable(before):
+                update = before(state=state, runtime=None, config=config)
+
+            if not update:
+                continue
+
+            new_messages = update.get("messages")
+            if new_messages:
+                if not isinstance(new_messages, list):
+                    new_messages = [new_messages]
+                current_messages.extend(new_messages)
+                injected_messages.extend(new_messages)
+                state["messages"] = current_messages
+
+        return current_messages, injected_messages
+
+    def _sync_app_state(self, messages: list, turn_count: int) -> None:
+        """Keep runtime AppState aligned with the loop's live state."""
+        if self._app_state is None:
+            return
+
+        snapshot = list(messages)
+        current_cost = self._read_runtime_cost()
+        bootstrap_cost = self._bootstrap.total_cost_usd if self._bootstrap is not None else 0.0
+        cumulative_cost = max(current_cost, self._app_state.total_cost, bootstrap_cost)
+        compact_boundary_index = self._read_compact_boundary_index()
+
+        # @@@sa-03-cost-accumulator-monotonic
+        # /clear must preserve session accumulators, so loop sync cannot let a
+        # lower per-run observation overwrite the accumulated session total.
+        if self._bootstrap is not None:
+            self._bootstrap.total_cost_usd = cumulative_cost
+
+        # @@@app-state-sync
+        # ql-02 needs the loop's local lifecycle to write back into AppState,
+        # but we still do not have compaction yet. Clamp the boundary so the
+        # store stays coherent without pretending compaction exists.
+        def _update(state: AppState) -> AppState:
+            return state.model_copy(
+                update={
+                    "messages": snapshot,
+                    "turn_count": turn_count,
+                    "total_cost": cumulative_cost,
+                    "compact_boundary_index": compact_boundary_index,
+                }
+            )
+
+        self._app_state.set_state(_update)
+
+    def _read_runtime_cost(self) -> float:
+        if self._runtime is None:
+            return self._app_state.total_cost if self._app_state is not None else 0.0
+        try:
+            return float(self._runtime.cost)
+        except Exception:
+            return self._app_state.total_cost if self._app_state is not None else 0.0
+
+    def _read_compact_boundary_index(self) -> int:
+        if self._memory_middleware is None:
+            return 0
+        try:
+            boundary = int(self._memory_middleware.compact_boundary_index)
+        except Exception:
+            return 0
+        return max(boundary, 0)
+
+    def _get_discovered_tool_names(self, thread_id: str) -> set[str]:
+        # @@@dt-03-thread-scoped-deferred-tools - deferred discovery must stay
+        # isolated per thread_id, or one thread's tool_search silently changes
+        # another thread's inline schema surface on the next turn.
+        return self._tool_discovered_tool_names_by_thread.setdefault(thread_id, set())
+
+    def _restore_discovered_tool_names_from_messages(
+        self,
+        thread_id: str,
+        messages: list,
+    ) -> None:
+        discovered: set[str] = set()
+        for message in messages:
+            if not isinstance(message, ToolMessage) or getattr(message, "name", None) != "tool_search":
+                continue
+            content = getattr(message, "content", None)
+            if not isinstance(content, str):
+                continue
+            try:
+                payload = json.loads(content)
+            except Exception:
+                continue
+            if not isinstance(payload, list):
+                continue
+            for item in payload:
+                if not isinstance(item, dict):
+                    continue
+                name = item.get("name")
+                if not isinstance(name, str):
+                    continue
+                entry = self._registry.get(name)
+                if entry is not None and entry.mode == ToolMode.DEFERRED:
+                    discovered.add(name)
+        self._tool_discovered_tool_names_by_thread[thread_id] = discovered
+
+    def _build_tool_use_context(self, messages: list, *, thread_id: str = "default") -> ToolUseContext | None:
+        if self._bootstrap is None or self._app_state is None:
+            return None
+        has_permission_resolver = self._bootstrap.permission_resolver_scope != "none"
+        return ToolUseContext(
+            bootstrap=self._bootstrap,
+            get_app_state=self._app_state.get_state,
+            set_app_state=self._app_state.set_state,
+            refresh_tools=self._refresh_tools,
+            can_use_tool=lambda name, args, permission_context, request: self._default_can_use_tool(
+                name=name,
+                permission_context=permission_context,
+            ),
+            request_permission=(
+                lambda name, args, context, request, message: self._request_permission(
+                    thread_id=thread_id,
+                    name=name,
+                    args=args,
+                    message=message,
+                )
+            )
+            if has_permission_resolver
+            else None,
+            consume_permission_resolution=lambda name, args, context, request: self._consume_permission_resolution(
+                thread_id=thread_id,
+                name=name,
+                args=args,
+            ),
+            read_file_state=self._tool_read_file_state,
+            loaded_nested_memory_paths=self._tool_loaded_nested_memory_paths,
+            discovered_skill_names=self._tool_discovered_skill_names,
+            discovered_tool_names=self._get_discovered_tool_names(thread_id),
+            nested_memory_attachment_triggers=set(),
+            abort_controller=self._tool_abort_controller,
+            messages=list(messages),
+            thread_id=thread_id,
+        )
+
+    def _default_can_use_tool(
+        self,
+        *,
+        name: str,
+        permission_context: ToolPermissionContext,
+    ) -> dict[str, Any] | None:
+        if self._app_state is None:
+            return None
+        permission_state = self._app_state.tool_permission_context
+        merged_context = ToolPermissionContext(
+            is_read_only=permission_context.is_read_only,
+            is_destructive=permission_context.is_destructive,
+            alwaysAllowRules=permission_state.alwaysAllowRules,
+            alwaysDenyRules=permission_state.alwaysDenyRules,
+            alwaysAskRules=permission_state.alwaysAskRules,
+            allowManagedPermissionRulesOnly=permission_state.allowManagedPermissionRulesOnly,
+        )
+        decision = evaluate_permission_rules(name, merged_context)
+        if (
+            decision is not None
+            and decision.get("decision") == "ask"
+            and self._bootstrap is not None
+            and self._bootstrap.permission_resolver_scope == "none"
+        ):
+            # @@@permission-headless-fail-loud - ask is only a real product mode
+            # when this run has an owner-facing resolver. Otherwise fail loudly
+            # instead of creating a dead-end pending request in hidden state.
+            return {
+                "decision": "deny",
+                "message": f"{decision.get('message')}. No interactive permission resolver is available for this run.",
+            }
+        return decision
+
+    def _request_permission(
+        self,
+        *,
+        thread_id: str,
+        name: str,
+        args: dict[str, Any],
+        message: str | None,
+    ) -> str | None:
+        if self._app_state is None:
+            return None
+
+        request_id = uuid.uuid4().hex[:8]
+        payload = {
+            "request_id": request_id,
+            "thread_id": thread_id,
+            "tool_name": name,
+            "args": copy.deepcopy(args),
+            "message": message,
+        }
+
+        def _store(state: AppState) -> AppState:
+            pending = dict(state.pending_permission_requests)
+            pending[request_id] = payload
+            return state.model_copy(update={"pending_permission_requests": pending})
+
+        self._app_state.set_state(_store)
+        return request_id
+
+    def _consume_permission_resolution(
+        self,
+        *,
+        thread_id: str,
+        name: str,
+        args: dict[str, Any],
+    ) -> dict[str, Any] | None:
+        if self._app_state is None:
+            return None
+
+        resolved_items = list(self._app_state.resolved_permission_requests.items())
+        matched_id: str | None = None
+        matched_payload: dict[str, Any] | None = None
+        for request_id, payload in resolved_items:
+            if payload.get("thread_id") != thread_id:
+                continue
+            if payload.get("tool_name") != name:
+                continue
+            if payload.get("args") != args:
+                continue
+            matched_id = request_id
+            matched_payload = payload
+            break
+
+        if matched_id is None or matched_payload is None:
+            return None
+
+        def _consume(state: AppState) -> AppState:
+            resolved = dict(state.resolved_permission_requests)
+            resolved.pop(matched_id, None)
+            return state.model_copy(update={"resolved_permission_requests": resolved})
+
+        self._app_state.set_state(_consume)
+        return {
+            "decision": matched_payload.get("decision"),
+            "message": matched_payload.get("message"),
+        }
+
+    def _sync_tool_context_messages(
+        self,
+        tool_context: ToolUseContext | None,
+        messages: list,
+    ) -> None:
+        if tool_context is None:
+            return
+        tool_context.messages = list(messages)
+
+    async def _refresh_tools_between_turns(self, tool_context: ToolUseContext | None) -> None:
+        refresh = self._refresh_tools
+        if refresh is None and tool_context is not None:
+            refresh = tool_context.refresh_tools
+        if refresh is None:
+            return
+        result = refresh()
+        if inspect.isawaitable(result):
+            await result
+
+    async def _handle_model_error_recovery(
+        self,
+        *,
+        exc: Exception,
+        thread_id: str,
+        messages: list,
+        turn: int,
+        transition: ContinueState | None,
+        max_output_tokens_recovery_count: int,
+        has_attempted_reactive_compact: bool,
+        max_output_tokens_override: int | None,
+        transient_api_retry_count: int,
+    ) -> _ModelErrorRecoveryResult | None:
+        error_message = str(exc)
+        error_text = error_message.lower()
+
+        parsed_overflow = self._parse_context_overflow_override(error_message)
+        if parsed_overflow is not None:
+            return _ModelErrorRecoveryResult(
+                messages=messages,
+                transition=ContinueState(reason=ContinueReason.max_output_tokens_escalate),
+                max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                has_attempted_reactive_compact=has_attempted_reactive_compact,
+                max_output_tokens_override=parsed_overflow,
+                transient_api_retry_count=transient_api_retry_count,
+                terminal=None,
+            )
+
+        if self._is_transient_api_error(exc, error_text):
+            if transient_api_retry_count >= _TRANSIENT_API_MAX_RETRIES:
+                return None
+            delay_seconds = self._retry_delay_seconds(exc, transient_api_retry_count)
+            if delay_seconds > 0:
+                await asyncio.sleep(delay_seconds)
+            return _ModelErrorRecoveryResult(
+                messages=messages,
+                transition=ContinueState(reason=ContinueReason.api_retry),
+                max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                has_attempted_reactive_compact=has_attempted_reactive_compact,
+                max_output_tokens_override=max_output_tokens_override,
+                transient_api_retry_count=transient_api_retry_count + 1,
+                terminal=None,
+            )
+
+        if "max_output_tokens" in error_text:
+            if max_output_tokens_override is None:
+                return _ModelErrorRecoveryResult(
+                    messages=messages,
+                    transition=ContinueState(reason=ContinueReason.max_output_tokens_escalate),
+                    max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                    has_attempted_reactive_compact=has_attempted_reactive_compact,
+                    max_output_tokens_override=_ESCALATED_MAX_OUTPUT_TOKENS,
+                    transient_api_retry_count=transient_api_retry_count,
+                    terminal=None,
+                )
+            if max_output_tokens_recovery_count < 3:
+                recovered_messages = list(messages)
+                recovered_messages.append(
+                    HumanMessage(
+                        content="Output token limit hit. Resume directly with no apology or recap.",
+                    )
+                )
+                return _ModelErrorRecoveryResult(
+                    messages=recovered_messages,
+                    transition=ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+                    max_output_tokens_recovery_count=max_output_tokens_recovery_count + 1,
+                    has_attempted_reactive_compact=has_attempted_reactive_compact,
+                    max_output_tokens_override=max_output_tokens_override,
+                    transient_api_retry_count=transient_api_retry_count,
+                    terminal=None,
+                )
+            return _ModelErrorRecoveryResult(
+                messages=messages,
+                transition=ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+                max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                has_attempted_reactive_compact=has_attempted_reactive_compact,
+                max_output_tokens_override=max_output_tokens_override,
+                transient_api_retry_count=transient_api_retry_count,
+                terminal=TerminalState(
+                    reason=TerminalReason.model_error,
+                    turn_count=turn,
+                    error=str(exc),
+                ),
+            )
+
+        if self._is_prompt_too_long_error(error_text):
+            if transition is None or transition.reason is not ContinueReason.collapse_drain_retry:
+                drained = await self._recover_from_overflow(messages)
+                if drained is not None and drained["committed"] > 0:
+                    return _ModelErrorRecoveryResult(
+                        messages=drained["messages"],
+                        transition=ContinueState(reason=ContinueReason.collapse_drain_retry),
+                        max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                        has_attempted_reactive_compact=has_attempted_reactive_compact,
+                        max_output_tokens_override=max_output_tokens_override,
+                        transient_api_retry_count=transient_api_retry_count,
+                        terminal=None,
+                    )
+            if not has_attempted_reactive_compact:
+                compacted = await self._force_reactive_compact(messages, thread_id=thread_id)
+                if compacted is not None:
+                    return _ModelErrorRecoveryResult(
+                        messages=compacted,
+                        transition=ContinueState(reason=ContinueReason.reactive_compact_retry),
+                        max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                        has_attempted_reactive_compact=True,
+                        max_output_tokens_override=max_output_tokens_override,
+                        transient_api_retry_count=transient_api_retry_count,
+                        terminal=None,
+                    )
+            return _ModelErrorRecoveryResult(
+                messages=messages,
+                transition=transition,
+                max_output_tokens_recovery_count=max_output_tokens_recovery_count,
+                has_attempted_reactive_compact=has_attempted_reactive_compact,
+                max_output_tokens_override=max_output_tokens_override,
+                transient_api_retry_count=transient_api_retry_count,
+                terminal=TerminalState(
+                    reason=TerminalReason.prompt_too_long,
+                    turn_count=turn,
+                    error=str(exc),
+                ),
+            )
+
+        return None
+
+    @staticmethod
+    def _parse_context_overflow_override(error_message: str) -> int | None:
+        match = re.search(
+            r"input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)",
+            error_message,
+        )
+        if match is None:
+            return None
+        input_tokens = int(match.group(1))
+        context_limit = int(match.group(3))
+        available_context = max(0, context_limit - input_tokens - _CONTEXT_OVERFLOW_SAFETY_BUFFER)
+        if available_context < _FLOOR_OUTPUT_TOKENS:
+            return None
+        return max(_FLOOR_OUTPUT_TOKENS, available_context)
+
+    @staticmethod
+    def _is_transient_api_error(exc: Exception, error_text: str) -> bool:
+        status = getattr(exc, "status", None)
+        return status in {429, 529} or '"type":"overloaded_error"' in error_text
+
+    @staticmethod
+    def _retry_delay_seconds(exc: Exception, transient_api_retry_count: int) -> float:
+        headers = getattr(exc, "headers", None) or {}
+        # @@@retry-after-shape
+        # Test doubles use plain dict headers while SDK errors expose a Headers-like
+        # object. Keep this probe shape-tolerant so the loop can honor retry-after
+        # without forcing a specific exception class.
+        if hasattr(headers, "get"):
+            retry_after = headers.get("retry-after")
+        else:
+            retry_after = None
+        try:
+            if retry_after is not None:
+                return max(0.0, float(retry_after))
+        except (TypeError, ValueError):
+            pass
+        return _TRANSIENT_API_BASE_DELAY_SECONDS * (2**transient_api_retry_count)
+
+    def _handle_truncated_response_recovery(
+        self,
+        *,
+        ai_msg: AIMessage,
+        messages: list,
+        turn: int,
+        max_output_tokens_recovery_count: int,
+        max_output_tokens_override: int | None,
+    ) -> dict[str, Any] | None:
+        if not self._is_max_output_truncated(ai_msg):
+            return None
+
+        if max_output_tokens_override is None:
+            return {
+                "messages": messages,
+                "transition": ContinueState(reason=ContinueReason.max_output_tokens_escalate),
+                "max_output_tokens_recovery_count": max_output_tokens_recovery_count,
+                "max_output_tokens_override": _ESCALATED_MAX_OUTPUT_TOKENS,
+                "yield_ai": False,
+                "terminal": None,
+            }
+
+        if max_output_tokens_recovery_count < 3:
+            recovered_messages = list(messages)
+            recovered_messages.append(ai_msg)
+            recovered_messages.append(
+                HumanMessage(
+                    content="Output token limit hit. Resume directly with no apology or recap.",
+                )
+            )
+            return {
+                "messages": recovered_messages,
+                "transition": ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+                "max_output_tokens_recovery_count": max_output_tokens_recovery_count + 1,
+                "max_output_tokens_override": max_output_tokens_override,
+                "yield_ai": False,
+                "terminal": None,
+            }
+
+        surfaced_messages = list(messages)
+        surfaced_messages.append(ai_msg)
+        return {
+            "messages": surfaced_messages,
+            "transition": ContinueState(reason=ContinueReason.max_output_tokens_recovery),
+            "max_output_tokens_recovery_count": max_output_tokens_recovery_count,
+            "max_output_tokens_override": max_output_tokens_override,
+            "yield_ai": True,
+            "terminal": TerminalState(
+                reason=TerminalReason.model_error,
+                turn_count=turn,
+                error="max_output_tokens",
+            ),
+        }
+
+    async def _force_reactive_compact(self, messages: list, *, thread_id: str) -> list | None:
+        if self._memory_middleware is None:
+            return None
+        compact = getattr(self._memory_middleware, "compact_messages_for_recovery", None)
+        if not callable(compact):
+            return None
+        signature = inspect.signature(compact)
+        if "thread_id" in signature.parameters:
+            return await compact(messages, thread_id=thread_id)
+        return await compact(messages)
+
+    async def _recover_from_overflow(self, messages: list) -> dict[str, Any] | None:
+        # @@@collapse-drain-single-shot
+        # ql-04 needs collapse-drain and reactive-compact to stay as separate
+        # phases. The drain hook is optional, but if present it only gets one
+        # chance before prompt-too-long falls through to reactive compaction.
+        for middleware in self.middleware:
+            recover = getattr(middleware, "recover_from_overflow", None)
+            if not callable(recover):
+                continue
+            drained = recover(messages)
+            if inspect.isawaitable(drained):
+                drained = await drained
+            if drained is None:
+                return None
+            committed = int(getattr(drained, "get", lambda *_: 0)("committed", 0))
+            updated_messages = getattr(drained, "get", lambda *_: None)("messages")
+            if committed <= 0 or not isinstance(updated_messages, list):
+                return None
+            return {"committed": committed, "messages": list(updated_messages)}
+        return None
+
+    @staticmethod
+    def _is_prompt_too_long_error(error_text: str) -> bool:
+        return (
+            "prompt is too long" in error_text
+            or "prompt too long" in error_text
+            or "context length" in error_text
+            or "maximum context length" in error_text
+        )
+
+    @staticmethod
+    def _is_max_output_truncated(message: AIMessage) -> bool:
+        response_metadata = getattr(message, "response_metadata", None) or {}
+        additional_kwargs = getattr(message, "additional_kwargs", None) or {}
+        finish_reason = (
+            response_metadata.get("finish_reason")
+            or response_metadata.get("stop_reason")
+            or additional_kwargs.get("finish_reason")
+            or additional_kwargs.get("stop_reason")
+        )
+        return finish_reason in {"length", "max_tokens", "max_output_tokens"}
+
+    # -------------------------------------------------------------------------
+    # Tool execution through middleware chain
+    # -------------------------------------------------------------------------
+
+    async def _execute_tools(
+        self,
+        tool_calls: list,
+        model_response: ModelResponse,
+        tool_context: ToolUseContext | None,
+    ) -> list[ToolMessage]:
+        """Execute tool calls respecting concurrency safety, via middleware chain."""
+        results: dict[int, ToolMessage] = {}
+
+        async def execute_batch(batch: list[tuple[int, dict]]) -> None:
+            if not batch:
+                return
+            batch_results = await asyncio.gather(
+                *[self._execute_single_tool(tool_call, tool_context) for _, tool_call in batch],
+                return_exceptions=True,
+            )
+            for (idx, tool_call), result in zip(batch, batch_results):
+                if isinstance(result, Exception):
+                    results[idx] = ToolMessage(
+                        content=f"<tool_use_error>{result}</tool_use_error>",
+                        tool_call_id=tool_call.get("id", ""),
+                        name=tool_call.get("name", ""),
+                    )
+                    continue
+                results[idx] = result
+
+        safe_batch: list[tuple[int, dict]] = []
+        for idx, tool_call in enumerate(tool_calls):
+            # @@@tool-order-boundary
+            # te-01 needs the non-streaming path to keep the same queue barrier
+            # semantics as the streaming executor: contiguous safe tools may fan
+            # out together, but any unsafe tool flushes the batch and blocks the
+            # next safe tool until it finishes.
+            if self._tool_is_concurrency_safe(tool_call):
+                safe_batch.append((idx, tool_call))
+                continue
+
+            await execute_batch(safe_batch)
+            safe_batch = []
+            try:
+                results[idx] = await self._execute_single_tool(tool_call, tool_context)
+            except Exception as exc:
+                results[idx] = ToolMessage(
+                    content=f"<tool_use_error>{exc}</tool_use_error>",
+                    tool_call_id=tool_call.get("id", ""),
+                    name=tool_call.get("name", ""),
+                )
+
+        await execute_batch(safe_batch)
+        return [results[i] for i in range(len(tool_calls))]
+
+    async def _execute_single_tool(
+        self,
+        tool_call: dict,
+        tool_context: ToolUseContext | None,
+    ) -> ToolMessage:
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        call_id = tool_call.get("id", "")
+        args = tool_call.get("args", {}) or tool_call.get("function", {}).get("arguments", {})
+
+        if isinstance(args, str):
+            import json
+
+            try:
+                args = json.loads(args)
+            except Exception:
+                args = {}
+
+        normalized_call = {"name": name, "args": args, "id": call_id}
+        tc_request = ToolCallRequest(
+            tool_call=normalized_call,
+            tool=None,
+            state=tool_context,
+            runtime=self._runtime,  # type: ignore[arg-type]
+        )
+
+        async def innermost_tool_handler(req: ToolCallRequest) -> ToolMessage:
+            tc = req.tool_call
+            t_name = tc.get("name", "")
+            t_id = tc.get("id", "")
+            t_args = tc.get("args", {})
+            entry = self._registry.get(t_name)
+            if entry is None:
+                return ToolMessage(
+                    content=f"<tool_use_error>Tool '{t_name}' not found</tool_use_error>",
+                    tool_call_id=t_id,
+                    name=t_name,
+                )
+            try:
+                import asyncio as _asyncio
+
+                if _asyncio.iscoroutinefunction(entry.handler):
+                    result = await entry.handler(**t_args)
+                else:
+                    result = await _asyncio.to_thread(entry.handler, **t_args)
+                return ToolMessage(content=str(result), tool_call_id=t_id, name=t_name)
+            except Exception as e:
+                return ToolMessage(
+                    content=f"<tool_use_error>{e}</tool_use_error>",
+                    tool_call_id=t_id,
+                    name=t_name,
+                )
+
+        tool_handler = innermost_tool_handler
+        for mw in reversed(self.middleware):
+            if _mw_overrides_tool_call(mw):
+                tool_handler = _make_tool_wrapper(mw, tool_handler)
+
+        return await tool_handler(tc_request)
+
+    def _tool_is_concurrency_safe(self, tool_call: dict) -> bool:
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        entry = self._registry.get(name)
+        if entry is None:
+            return False
+        safety = entry.is_concurrency_safe
+        if callable(safety):
+            args = tool_call.get("args", {})
+            if isinstance(args, str):
+                try:
+                    import json as _json
+
+                    args = _json.loads(args)
+                except Exception:
+                    args = {}
+            try:
+                return bool(safety(args if isinstance(args, dict) else {}))
+            except Exception:
+                return False
+        return bool(safety)
+
+    def _tool_call_is_ready(self, tool_call: dict) -> bool:
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        entry = self._registry.get(name)
+        if entry is None:
+            return True
+
+        args = tool_call.get("args", {})
+        if isinstance(args, str):
+            try:
+                import json as _json
+
+                args = _json.loads(args)
+            except Exception:
+                return False
+        if not isinstance(args, dict):
+            return False
+
+        schema = entry.get_schema() or {}
+        parameters = schema.get("parameters", {}) if isinstance(schema, dict) else {}
+        return _required_sets_match(parameters, args) if isinstance(parameters, dict) else True
+
+    def _normalize_stream_tool_call(
+        self,
+        tool_call: dict,
+        tool_call_chunks: list[dict[str, Any]],
+    ) -> dict[str, Any] | None:
+        call_id = tool_call.get("id")
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        args: Any = tool_call.get("args", {})
+        if isinstance(args, str):
+            try:
+                import json as _json
+
+                args = _json.loads(args)
+            except Exception:
+                args = {}
+
+        raw_arg_chunks: list[str] = []
+        for chunk in tool_call_chunks:
+            if chunk.get("id") != call_id:
+                continue
+            if chunk.get("name"):
+                name = chunk["name"]
+            raw_args = chunk.get("args")
+            if raw_args in (None, ""):
+                continue
+            if isinstance(raw_args, str):
+                raw_arg_chunks.append(raw_args)
+            else:
+                args = raw_args
+
+        if raw_arg_chunks:
+            try:
+                import json as _json
+
+                args = _json.loads("".join(raw_arg_chunks))
+            except Exception:
+                return None
+
+        normalized = {"name": name, "args": args, "id": call_id}
+        if not self._tool_call_is_ready(normalized):
+            return None
+        return normalized
+
+    # -------------------------------------------------------------------------
+    # Checkpointer persistence
+    # -------------------------------------------------------------------------
+
+    async def _load_messages(self, thread_id: str) -> list:
+        """Load message history from checkpointer (if available)."""
+        channel_values = await self._load_checkpoint_channel_values(thread_id)
+        return list(channel_values.get("messages", []))
+
+    async def _load_checkpoint_channel_values(self, thread_id: str) -> dict[str, Any]:
+        """Load raw channel values for one thread checkpoint."""
+        if self.checkpointer is None:
+            return {}
+        try:
+            cfg = self._checkpoint_config(thread_id)
+            checkpoint = await self.checkpointer.aget(cfg)
+            if checkpoint is None:
+                return {}
+            return dict(checkpoint.get("channel_values", {}) or {})
+        except Exception:
+            logger.debug("QueryLoop: could not load checkpoint for thread %s", thread_id)
+            return {}
+
+    def _thread_permission_state_snapshot(
+        self,
+        thread_id: str,
+    ) -> tuple[dict[str, Any], dict[str, dict[str, Any]], dict[str, dict[str, Any]]]:
+        if self._app_state is None:
+            return {}, {}, {}
+
+        permission_context = copy.deepcopy(self._app_state.tool_permission_context.model_dump())
+        pending = {
+            key: copy.deepcopy(value)
+            for key, value in self._app_state.pending_permission_requests.items()
+            if value.get("thread_id") == thread_id
+        }
+        resolved = {
+            key: copy.deepcopy(value)
+            for key, value in self._app_state.resolved_permission_requests.items()
+            if value.get("thread_id") == thread_id
+        }
+        return permission_context, pending, resolved
+
+    def _thread_memory_state_snapshot(self, thread_id: str) -> dict[str, Any]:
+        if self._memory_middleware is None:
+            return {}
+        snapshot = getattr(self._memory_middleware, "snapshot_thread_state", None)
+        if not callable(snapshot):
+            return {}
+        raw_snapshot = snapshot(thread_id) or {}
+        if not isinstance(raw_snapshot, dict):
+            return {}
+        return {str(key): value for key, value in raw_snapshot.items()}
+
+    def _thread_mcp_instruction_state_snapshot(self, thread_id: str) -> dict[str, Any]:
+        if self._app_state is None:
+            return {}
+        announced_blocks = dict(self._app_state.announced_mcp_instruction_blocks.get(thread_id, {}))
+        return {"announced_blocks": announced_blocks}
+
+    def _is_runtime_active(self) -> bool:
+        current_state = getattr(self._runtime, "current_state", None)
+        return getattr(current_state, "value", current_state) == "active"
+
+    def _snapshot_live_thread_state(self, thread_id: str) -> dict[str, Any]:
+        messages = list(self._app_state.messages) if self._app_state is not None else []
+        permission_context, pending, resolved = self._thread_permission_state_snapshot(thread_id)
+        memory_state = self._thread_memory_state_snapshot(thread_id)
+        return {
+            "messages": messages,
+            "tool_permission_context": permission_context,
+            "pending_permission_requests": pending,
+            "resolved_permission_requests": resolved,
+            "memory_compaction_state": memory_state,
+            "mcp_instruction_state": self._thread_mcp_instruction_state_snapshot(thread_id),
+        }
+
+    def _restore_thread_permission_state(
+        self,
+        thread_id: str,
+        *,
+        permission_context: dict[str, Any],
+        pending: dict[str, dict[str, Any]],
+        resolved: dict[str, dict[str, Any]],
+    ) -> None:
+        if self._app_state is None:
+            return
+
+        # @@@permission-checkpoint-bridge - pending/resolved permission requests
+        # are thread-scoped runtime state, not display-only metadata. They must
+        # survive checkpoint replay so backend/UI surfaces stay honest after an
+        # idle reload or agent recreation.
+        def _update(state: AppState) -> AppState:
+            kept_pending = {key: value for key, value in state.pending_permission_requests.items() if value.get("thread_id") != thread_id}
+            kept_pending.update(copy.deepcopy(pending))
+            kept_resolved = {key: value for key, value in state.resolved_permission_requests.items() if value.get("thread_id") != thread_id}
+            kept_resolved.update(copy.deepcopy(resolved))
+            return state.model_copy(
+                update={
+                    "tool_permission_context": ToolPermissionState.model_validate(copy.deepcopy(permission_context)),
+                    "pending_permission_requests": kept_pending,
+                    "resolved_permission_requests": kept_resolved,
+                }
+            )
+
+        self._app_state.set_state(_update)
+
+    def _restore_thread_memory_state(
+        self,
+        thread_id: str,
+        *,
+        memory_state: dict[str, Any],
+    ) -> None:
+        if self._memory_middleware is None:
+            return
+        restore = getattr(self._memory_middleware, "restore_thread_state", None)
+        if callable(restore):
+            restore(thread_id, memory_state)
+
+    def _restore_thread_mcp_instruction_state(
+        self,
+        thread_id: str,
+        *,
+        mcp_instruction_state: dict[str, Any],
+    ) -> None:
+        if self._app_state is None:
+            return
+        announced_blocks = mcp_instruction_state.get("announced_blocks", {})
+        if not isinstance(announced_blocks, dict):
+            announced_blocks = {}
+        kept = {key: value for key, value in self._app_state.announced_mcp_instruction_blocks.items() if key != thread_id}
+        kept[thread_id] = {name: block for name, block in announced_blocks.items() if isinstance(name, str) and isinstance(block, str)}
+        self._app_state.announced_mcp_instruction_blocks = kept
+
+    async def _hydrate_thread_state_from_checkpoint(self, thread_id: str) -> dict[str, Any]:
+        channel_values = await self._load_checkpoint_channel_values(thread_id)
+        messages = list(channel_values.get("messages", []))
+        permission_context = dict(channel_values.get("tool_permission_context", {}) or {})
+        pending = dict(channel_values.get("pending_permission_requests", {}) or {})
+        resolved = dict(channel_values.get("resolved_permission_requests", {}) or {})
+        memory_state = dict(channel_values.get("memory_compaction_state", {}) or {})
+        mcp_instruction_state = dict(channel_values.get("mcp_instruction_state", {}) or {})
+        turn_count = self._app_state.turn_count if self._app_state is not None else 0
+        self._sync_app_state(messages=messages, turn_count=turn_count)
+        self._restore_thread_permission_state(
+            thread_id,
+            permission_context=permission_context,
+            pending=pending,
+            resolved=resolved,
+        )
+        self._restore_thread_memory_state(
+            thread_id,
+            memory_state=memory_state,
+        )
+        self._restore_thread_mcp_instruction_state(
+            thread_id,
+            mcp_instruction_state=mcp_instruction_state,
+        )
+        return {
+            "messages": messages,
+            "tool_permission_context": permission_context,
+            "pending_permission_requests": pending,
+            "resolved_permission_requests": resolved,
+            "memory_compaction_state": memory_state,
+            "mcp_instruction_state": mcp_instruction_state,
+        }
+
+    async def _save_messages(self, thread_id: str, messages: list) -> None:
+        """Persist message history to checkpointer."""
+        if self.checkpointer is None:
+            return
+        try:
+            from langgraph.checkpoint.base import CheckpointMetadata, empty_checkpoint
+
+            cfg = self._checkpoint_config(thread_id)
+            checkpoint = empty_checkpoint()
+            permission_context, pending_requests, resolved_requests = self._thread_permission_state_snapshot(thread_id)
+            memory_state = self._thread_memory_state_snapshot(thread_id)
+            mcp_instruction_state = self._thread_mcp_instruction_state_snapshot(thread_id)
+            checkpoint["channel_values"] = {
+                "messages": messages,
+                "tool_permission_context": permission_context,
+                "pending_permission_requests": pending_requests,
+                "resolved_permission_requests": resolved_requests,
+                "memory_compaction_state": memory_state,
+                "mcp_instruction_state": mcp_instruction_state,
+            }
+            metadata: CheckpointMetadata = {
+                "source": "loop",
+                "step": len(messages),
+            }
+            await self.checkpointer.aput(cfg, checkpoint, metadata, {})
+        except Exception:
+            logger.debug("QueryLoop: could not save checkpoint for thread %s", thread_id, exc_info=True)
+
+    def _collect_memory_system_notices(self, pending_notices: list[HumanMessage]) -> None:
+        if self._memory_middleware is None:
+            return
+        consume_many = getattr(self._memory_middleware, "consume_pending_notices", None)
+        notices: list[dict[str, Any]] = []
+        if callable(consume_many):
+            notices = list(consume_many() or [])
+        else:
+            consume_one = getattr(self._memory_middleware, "consume_latest_compaction_notice", None)
+            if callable(consume_one):
+                notice = consume_one()
+                if notice:
+                    notices = [notice]
+        for notice in notices:
+            pending_notices.append(
+                HumanMessage(
+                    content=str(notice.get("content") or ""),
+                    metadata={
+                        "source": "system",
+                        "notification_type": str(notice.get("notification_type") or "compact"),
+                        "compact_boundary_index": int(notice.get("compact_boundary_index") or 0),
+                    },
+                )
+            )
+
+    def _append_system_notices(self, messages: list, notices: list[HumanMessage]) -> list:
+        if not notices:
+            return messages
+        # @@@compact-notice-persist - compaction changes the model-visible
+        # boundary, but the notice is for the owner surface only. Persist it
+        # after the run settles so replay stays honest without perturbing the
+        # same run's next model call.
+        return list(messages) + list(notices)
+
+    def _build_terminal_notice(self, terminal: TerminalState | None) -> HumanMessage | None:
+        # @@@terminal-recovery-notice - recovery exhaustion must survive cold
+        # rebuilds. Persist one owner-visible system notice instead of leaving
+        # prompt-too-long as a hot-stream-only error.
+        if terminal is None or terminal.reason is not TerminalReason.prompt_too_long:
+            return None
+        return HumanMessage(
+            content=_PROMPT_TOO_LONG_NOTICE_TEXT,
+            metadata={"source": "system"},
+        )
+
+    def _terminal_error_text(self, terminal: TerminalState) -> str:
+        if terminal.reason is TerminalReason.prompt_too_long:
+            return _PROMPT_TOO_LONG_NOTICE_TEXT
+        return terminal.error or terminal.reason.value
+
+    def _build_visible_terminal_error_message(
+        self,
+        terminal: TerminalState,
+        messages: list[Any],
+    ) -> AIMessage | None:
+        if terminal.reason is TerminalReason.completed:
+            return None
+        error_text = self._terminal_error_text(terminal).strip()
+        if not error_text:
+            return None
+        last_message = messages[-1] if messages else None
+        if isinstance(last_message, AIMessage) and self._ai_message_has_visible_content(last_message):
+            return None
+        return AIMessage(content=f"Error: {error_text}")
+
+    @staticmethod
+    def _checkpoint_config(thread_id: str) -> dict[str, Any]:
+        # @@@sa-03-real-checkpointer-config
+        # AsyncSqliteSaver requires checkpoint_ns even when we only use a
+        # single logical namespace; without it, aput() raises and replay dies.
+        return {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
+
+    async def aclear(self, thread_id: str) -> None:
+        """Clear turn-scoped state for a thread while preserving session accumulators."""
+        await self._save_messages(thread_id, [])
+
+        self._tool_read_file_state.clear()
+        self._tool_loaded_nested_memory_paths.clear()
+        self._tool_discovered_skill_names.clear()
+        self._tool_discovered_tool_names_by_thread.pop(thread_id, None)
+
+        if self._memory_middleware is not None:
+            summary_store = getattr(self._memory_middleware, "summary_store", None)
+            if summary_store is not None:
+                # @@@clear-thread-clears-summary-store - api-05 requires /clear
+                # to wipe replayable compaction state, not just in-memory cache.
+                summary_store.delete_thread_summaries(thread_id)
+            if hasattr(self._memory_middleware, "_cached_summary"):
+                self._memory_middleware._cached_summary = None
+            if hasattr(self._memory_middleware, "_summary_restored"):
+                self._memory_middleware._summary_restored = False
+            if hasattr(self._memory_middleware, "_summary_thread_id"):
+                self._memory_middleware._summary_thread_id = None
+            if hasattr(self._memory_middleware, "_compact_up_to_index"):
+                self._memory_middleware._compact_up_to_index = 0
+            clear_thread_state = getattr(self._memory_middleware, "clear_thread_state", None)
+            if callable(clear_thread_state):
+                clear_thread_state(thread_id)
+
+        if self._app_state is not None:
+            preserved_total_cost = self._app_state.total_cost
+            preserved_tool_overrides = dict(self._app_state.tool_overrides)
+            pending_requests = {
+                key: value for key, value in self._app_state.pending_permission_requests.items() if value.get("thread_id") != thread_id
+            }
+            resolved_requests = {
+                key: value for key, value in self._app_state.resolved_permission_requests.items() if value.get("thread_id") != thread_id
+            }
+
+            def _reset(state: AppState) -> AppState:
+                return state.model_copy(
+                    update={
+                        "messages": [],
+                        "turn_count": 0,
+                        "total_cost": preserved_total_cost,
+                        "compact_boundary_index": 0,
+                        "tool_overrides": preserved_tool_overrides,
+                        "pending_permission_requests": pending_requests,
+                        "resolved_permission_requests": resolved_requests,
+                    }
+                )
+
+            self._app_state.set_state(_reset)
+
+        await self._save_messages(thread_id, [])
+
+        if self._bootstrap is not None:
+            old_session_id = self._bootstrap.session_id
+            self._bootstrap.parent_session_id = old_session_id
+            self._bootstrap.session_id = uuid.uuid4().hex
+
+    # -------------------------------------------------------------------------
+    # Input parsing
+    # -------------------------------------------------------------------------
+
+    @staticmethod
+    def _parse_input(input: dict | None) -> list:
+        """Convert input dict to list of LangChain message objects."""
+        if input is None:
+            return []
+        raw_messages = input.get("messages", [])
+        result = []
+        for msg in raw_messages:
+            if hasattr(msg, "content"):
+                result.append(msg)
+            elif isinstance(msg, dict):
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
+                if role == "user":
+                    result.append(HumanMessage(content=content))
+                elif role == "assistant":
+                    result.append(AIMessage(content=content))
+                else:
+                    result.append(HumanMessage(content=content))
+        return result
+
+    @staticmethod
+    def _ai_message_has_visible_content(message: AIMessage) -> bool:
+        content = getattr(message, "content", None)
+        if isinstance(content, str):
+            return content.strip() != ""
+        if isinstance(content, list):
+            for item in content:
+                if isinstance(item, str) and item.strip():
+                    return True
+                if isinstance(item, dict) and str(item.get("text", "")).strip():
+                    return True
+            return False
+        return bool(content)
+
+    @staticmethod
+    def _get_terminal_followthrough_notice(messages: list[Any]) -> HumanMessage | None:
+        if not messages:
+            return None
+        last_message = messages[-1]
+        if last_message.__class__.__name__ != "HumanMessage":
+            return None
+        metadata = getattr(last_message, "metadata", None) or {}
+        if metadata.get("source") != "system":
+            return None
+        if metadata.get("notification_type") not in {"agent", "command"}:
+            return None
+        content = getattr(last_message, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        if "CommandNotification" not in text and "task-notification" not in text:
+            return None
+        return last_message
+
+    @staticmethod
+    def _get_chat_followthrough_notice(messages: list[Any]) -> HumanMessage | None:
+        if not messages:
+            return None
+        last_message = messages[-1]
+        if last_message.__class__.__name__ != "HumanMessage":
+            return None
+        metadata = getattr(last_message, "metadata", None) or {}
+        if metadata.get("source") != "external":
+            return None
+        if metadata.get("notification_type") != "chat":
+            return None
+        content = getattr(last_message, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        if "New message from" not in text or "read_messages(chat_id=" not in text:
+            return None
+        return last_message
+
+    @classmethod
+    def _build_terminal_followthrough_fallback(cls, notice: HumanMessage) -> AIMessage:
+        metadata = getattr(notice, "metadata", None) or {}
+        notification_type = str(metadata.get("notification_type") or "task")
+        content = getattr(notice, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        status_match = re.search(r"<status>(.*?)</status>", text, flags=re.IGNORECASE | re.DOTALL)
+        status = status_match.group(1).strip().lower() if status_match else ""
+        subject = "command" if notification_type == "command" else "agent"
+        # @@@terminal-followthrough-fallback - terminal background notifications
+        # must never collapse into notice-only durable history when the model
+        # reentry stays silent; surface the silence explicitly instead.
+        if status == "completed":
+            reply = f"Background {subject} completed, but the followthrough assistant reply was empty."
+        elif status == "cancelled":
+            reply = f"Background {subject} was cancelled, but the followthrough assistant reply was empty."
+        elif status == "error":
+            reply = f"Background {subject} failed, but the followthrough assistant reply was empty."
+        else:
+            reply = f"Background {subject} update arrived, but the followthrough assistant reply was empty."
+        return AIMessage(content=reply)
+
+    @classmethod
+    def _build_chat_followthrough_fallback(cls, notice: HumanMessage) -> AIMessage:
+        content = getattr(notice, "content", "")
+        text = content if isinstance(content, str) else str(content)
+        chat_id_match = re.search(r'read_messages\(chat_id="([^"]+)"\)', text)
+        if chat_id_match:
+            chat_id = chat_id_match.group(1)
+            reply = (
+                f"I received a chat notification, but the followthrough assistant reply was empty. "
+                f'Read it with read_messages(chat_id="{chat_id}") before deciding whether to reply.'
+            )
+        else:
+            reply = "I received a chat notification, but the followthrough assistant reply was empty."
+        return AIMessage(content=reply)
+
+
+class _StreamingToolExecutor:
+    def __init__(self, loop: QueryLoop, tool_context: ToolUseContext | None):
+        self._loop = loop
+        self._tool_context = tool_context
+        self._tracked: list[_TrackedTool] = []
+        self._discarded = False
+
+    async def add_tool(self, tool_call: dict[str, Any]) -> None:
+        if self._discarded:
+            return
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        if self._loop._registry.get(name) is None:
+            self._tracked.append(
+                _TrackedTool(
+                    order=len(self._tracked),
+                    tool_call=tool_call,
+                    is_concurrency_safe=False,
+                    status="completed",
+                    result=self._tool_error(tool_call, f"Tool '{name}' not found"),
+                )
+            )
+            return
+        tracked = _TrackedTool(
+            order=len(self._tracked),
+            tool_call=tool_call,
+            is_concurrency_safe=self._loop._tool_is_concurrency_safe(tool_call),
+        )
+        self._tracked.append(tracked)
+        self._process_queue()
+
+    async def get_completed_results(self) -> list[ToolMessage]:
+        await asyncio.sleep(0)
+        self._process_queue()
+        ready: list[ToolMessage] = []
+        for tracked in self._tracked:
+            if tracked.status == "yielded":
+                continue
+            if tracked.status == "completed" and tracked.result is not None:
+                tracked.status = "yielded"
+                ready.append(tracked.result)
+                continue
+            break
+        return ready
+
+    async def drain_remaining(self) -> list[ToolMessage]:
+        while True:
+            self._process_queue()
+            running = [tracked.task for tracked in self._tracked if tracked.status == "executing" and tracked.task is not None]
+            if not running:
+                break
+            await asyncio.wait(running, return_when=asyncio.FIRST_COMPLETED)
+        self._process_queue()
+        remaining: list[ToolMessage] = []
+        for tracked in self._tracked:
+            if tracked.status == "yielded":
+                continue
+            if tracked.status == "completed" and tracked.result is not None:
+                tracked.status = "yielded"
+                remaining.append(tracked.result)
+        return remaining
+
+    async def discard(self, reason: str) -> list[ToolMessage]:
+        # @@@streaming-tool-discard
+        # ql-05 must not leave orphaned tool tasks behind when streaming exits
+        # early. Synthetic error emission is still a later hardening pass, but
+        # task cleanup itself must happen now.
+        self._discarded = True
+        running: list[asyncio.Task[ToolMessage]] = []
+        for tracked in self._tracked:
+            if tracked.status == "queued":
+                tracked.status = "completed"
+                tracked.result = self._synthetic_error(tracked.tool_call, reason)
+                continue
+            if tracked.status == "executing" and tracked.task is not None:
+                tracked.task.cancel()
+                running.append(tracked.task)
+        if running:
+            await asyncio.gather(*running, return_exceptions=True)
+        for tracked in self._tracked:
+            if tracked.status == "executing":
+                tracked.status = "completed"
+                tracked.result = self._synthetic_error(tracked.tool_call, reason)
+        return await self.drain_remaining()
+
+    def _process_queue(self) -> None:
+        if self._discarded:
+            return
+        for tracked in self._tracked:
+            if tracked.status != "queued":
+                continue
+            if not self._can_execute(tracked):
+                break
+            tracked.status = "executing"
+            tracked.task = asyncio.create_task(self._run_tool(tracked))
+
+    def _can_execute(self, tracked: _TrackedTool) -> bool:
+        executing = [item for item in self._tracked if item.status == "executing"]
+        if not executing:
+            return True
+        if not tracked.is_concurrency_safe:
+            return False
+        return all(item.is_concurrency_safe for item in executing)
+
+    async def _run_tool(self, tracked: _TrackedTool) -> None:
+        # @@@streaming-tool-task-exit
+        # ql-05 cannot let middleware-level exceptions disappear into a dead
+        # task. Every tool_use must resolve to a ToolMessage, and queue
+        # progression must re-run immediately when a task exits.
+        try:
+            tracked.result = await self._loop._execute_single_tool(tracked.tool_call, self._tool_context)
+            tracked.status = "completed"
+        except asyncio.CancelledError:
+            raise
+        except Exception as exc:
+            tracked.result = self._tool_error(tracked.tool_call, str(exc))
+            tracked.status = "completed"
+        finally:
+            if self._should_abort_siblings(tracked):
+                await self._abort_siblings(
+                    excluding=tracked,
+                    reason="sibling aborted after bash error",
+                )
+            if not self._discarded:
+                self._process_queue()
+
+    def _should_abort_siblings(self, tracked: _TrackedTool) -> bool:
+        if tracked.result is None:
+            return False
+        name = tracked.tool_call.get("name") or tracked.tool_call.get("function", {}).get("name", "")
+        return name.lower() == "bash" and "<tool_use_error>" in tracked.result.content
+
+    async def _abort_siblings(self, *, excluding: _TrackedTool, reason: str) -> None:
+        # @@@bash-sibling-abort
+        # Claude Code only fan-outs this abort for bash failures. Keep it
+        # local to the current executor iteration so the parent loop survives
+        # and later turns can continue with explicit tool errors.
+        self._discarded = True
+        running: list[asyncio.Task[ToolMessage]] = []
+        for tracked in self._tracked:
+            if tracked is excluding or tracked.status in {"completed", "yielded"}:
+                continue
+            if tracked.status == "queued":
+                tracked.status = "completed"
+                tracked.result = self._tool_error(tracked.tool_call, reason)
+                continue
+            if tracked.status == "executing" and tracked.task is not None:
+                tracked.task.cancel()
+                running.append(tracked.task)
+        if running:
+            await asyncio.gather(*running, return_exceptions=True)
+        for tracked in self._tracked:
+            if tracked is excluding or tracked.status != "executing":
+                continue
+            tracked.status = "completed"
+            tracked.result = self._tool_error(tracked.tool_call, reason)
+
+    def _synthetic_error(self, tool_call: dict[str, Any], reason: str) -> ToolMessage:
+        return self._tool_error(
+            tool_call,
+            f"streaming discarded: {reason}",
+        )
+
+    def _tool_error(self, tool_call: dict[str, Any], error_text: str) -> ToolMessage:
+        name = tool_call.get("name") or tool_call.get("function", {}).get("name", "")
+        call_id = tool_call.get("id", "")
+        return ToolMessage(
+            content=f"<tool_use_error>{error_text}</tool_use_error>",
+            tool_call_id=call_id,
+            name=name,
+        )
+
+
+# -------------------------------------------------------------------------
+# Closure helpers (avoid late-binding bugs in loop-built lambdas)
+# -------------------------------------------------------------------------
+
+
+def _make_model_wrapper(mw: AgentMiddleware, next_handler):
+    """Build an awrap_model_call wrapper that correctly closes over mw and next_handler."""
+
+    async def wrapper(request: ModelRequest) -> ModelResponse:
+        return await mw.awrap_model_call(request, next_handler)
+
+    return wrapper
+
+
+def _make_tool_wrapper(mw: AgentMiddleware, next_handler):
+    """Build an awrap_tool_call wrapper that correctly closes over mw and next_handler."""
+
+    async def wrapper(request: ToolCallRequest) -> ToolMessage:
+        return await mw.awrap_tool_call(request, next_handler)
+
+    return wrapper
+
+
+# -------------------------------------------------------------------------
+# Middleware override detection helpers
+def _mw_overrides_model_call(mw: AgentMiddleware) -> bool:
+    """True if mw actually overrides awrap_model_call (not just inherits the base stub)."""
+    mw_type = type(mw)
+    own_fn = mw_type.__dict__.get("awrap_model_call")
+    if own_fn is not None:
+        return True
+    own_sync = mw_type.__dict__.get("wrap_model_call")
+    return own_sync is not None
+
+
+def _mw_overrides_tool_call(mw: AgentMiddleware) -> bool:
+    """True if mw actually overrides awrap_tool_call (not just inherits the base stub)."""
+    mw_type = type(mw)
+    own_fn = mw_type.__dict__.get("awrap_tool_call")
+    if own_fn is not None:
+        return True
+    own_sync = mw_type.__dict__.get("wrap_tool_call")
+    return own_sync is not None
diff --git a/core/runtime/middleware/__init__.py b/core/runtime/middleware/__init__.py
index e69de29bb..f777a7fde 100644
--- a/core/runtime/middleware/__init__.py
+++ b/core/runtime/middleware/__init__.py
@@ -0,0 +1,79 @@
+"""Local runtime middleware protocol and request/response types.
+
+This replaces the phantom `langchain.agents.middleware.types` dependency for
+the current runtime stack.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass, replace
+from typing import Any, ClassVar
+
+from langchain_core.messages import ToolMessage
+
+
+@dataclass(frozen=True)
+class ModelRequest:
+    model: Any
+    messages: list
+    system_message: Any = None
+    tools: list | None = None
+
+    def override(self, **changes: Any) -> ModelRequest:
+        return replace(self, **changes)
+
+
+@dataclass(frozen=True)
+class ModelResponse:
+    result: list
+    request_messages: list | None = None
+    prepared_request: ModelRequest | None = None
+
+
+ModelCallResult = ModelResponse
+
+
+@dataclass(frozen=True)
+class ToolCallRequest:
+    tool_call: dict
+    tool: Any = None
+    state: Any = None
+    runtime: Any = None
+
+    def override(self, **changes: Any) -> ToolCallRequest:
+        return replace(self, **changes)
+
+
+class AgentMiddleware:
+    """Minimal chain-of-responsibility middleware base for the runtime stack."""
+
+    tools: ClassVar[tuple[Any, ...]] = ()
+
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelResponse:
+        return handler(request)
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelResponse:
+        return await handler(request)
+
+    def wrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage],
+    ) -> ToolMessage:
+        return handler(request)
+
+    async def awrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], Awaitable[ToolMessage]],
+    ) -> ToolMessage:
+        return await handler(request)
diff --git a/core/runtime/middleware/mcp_instructions.py b/core/runtime/middleware/mcp_instructions.py
new file mode 100644
index 000000000..7cff4c7cb
--- /dev/null
+++ b/core/runtime/middleware/mcp_instructions.py
@@ -0,0 +1,80 @@
+"""Thread-scoped MCP instruction delta injection.
+
+Mycel does not have CC's attachment plane. Keep this contract smaller:
+- MCP server configs may carry `instructions`
+- the loop stores which server names have already been announced per thread
+- on the next turn after a change, inject one delta SystemMessage
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Callable
+from typing import Any
+
+from langchain_core.messages import SystemMessage
+
+from core.runtime.middleware import AgentMiddleware
+from core.runtime.state import AppState
+
+_DELTA_TAG = "mcp_instructions_delta"
+
+
+def _format_instruction_block(server_name: str, instructions: str) -> str:
+    return f"## {server_name}\n{instructions.strip()}"
+
+
+def _render_delta_message(*, added: dict[str, str], removed: list[str]) -> SystemMessage:
+    payload = {
+        "added_names": sorted(added),
+        "removed_names": sorted(removed),
+    }
+    blocks = [
+        "<system-reminder>",
+        f"<{_DELTA_TAG}>{json.dumps(payload, ensure_ascii=False)}</{_DELTA_TAG}>",
+        "MCP server instructions changed for this thread.",
+    ]
+    if added:
+        blocks.append("Use the newly available MCP instructions below for subsequent turns:")
+        blocks.extend(_format_instruction_block(name, added[name]) for name in sorted(added))
+    if removed:
+        blocks.append("The following MCP servers are no longer active for this thread:")
+        blocks.extend(f"- {name}" for name in sorted(removed))
+    blocks.append("</system-reminder>")
+    return SystemMessage(content="\n".join(blocks))
+
+
+class McpInstructionsDeltaMiddleware(AgentMiddleware):
+    """Injects MCP instruction deltas once per thread when the connected set changes."""
+
+    def __init__(
+        self,
+        *,
+        get_instruction_blocks: Callable[[], dict[str, str]],
+        get_app_state: Callable[[], AppState | None],
+    ) -> None:
+        self._get_instruction_blocks = get_instruction_blocks
+        self._get_app_state = get_app_state
+
+    def before_model(self, state: dict[str, Any], runtime: Any = None, config: dict[str, Any] | None = None) -> dict[str, Any] | None:
+        app_state = self._get_app_state()
+        if app_state is None:
+            return None
+
+        config = config or {}
+        thread_id = config.get("configurable", {}).get("thread_id", "default")
+        current_blocks = {name: block for name, block in self._get_instruction_blocks().items() if block.strip()}
+        announced_blocks = {
+            name: block
+            for name, block in app_state.announced_mcp_instruction_blocks.get(thread_id, {}).items()
+            if isinstance(name, str) and isinstance(block, str) and block.strip()
+        }
+
+        added_names = sorted(name for name, block in current_blocks.items() if announced_blocks.get(name) != block)
+        removed_names = sorted(name for name in announced_blocks if name not in current_blocks)
+        if not added_names and not removed_names:
+            return None
+
+        app_state.announced_mcp_instruction_blocks[thread_id] = dict(current_blocks)
+        added = {name: current_blocks[name] for name in added_names}
+        return {"messages": [_render_delta_message(added=added, removed=removed_names)]}
diff --git a/core/runtime/middleware/memory/compactor.py b/core/runtime/middleware/memory/compactor.py
index 67599b534..defbb7221 100644
--- a/core/runtime/middleware/memory/compactor.py
+++ b/core/runtime/middleware/memory/compactor.py
@@ -10,13 +10,22 @@
 
 from langchain_core.messages import HumanMessage, SystemMessage
 
+# CC L4b Legacy Compact: system prompt is simple (~200 tokens) — NOT inherited from parent.
+# Using a distinct simple system prompt prevents reusing the parent conversation's cache
+# (different system prompt → different prefix hash), and reduces input token cost.
+COMPACT_SYSTEM_PROMPT = "You are a helpful AI assistant tasked with summarizing conversations."
+
 SUMMARY_PROMPT = """\
-Provide a detailed summary for continuing our conversation. Include:
-1. Key decisions made and their rationale
-2. Files created, modified, or read and their current state
-3. Errors encountered and how they were resolved
-4. Outstanding tasks and current progress
-5. Important context that would be needed to continue the work
+Summarize this conversation in the following 9 sections:
+1. Request/Intent — what the user asked for
+2. Technical Concepts — key technologies and approaches discussed
+3. Files/Code — files created or modified and their current state
+4. Errors — errors encountered and how they were resolved
+5. Problem Solving — decisions made and rationale
+6. User Messages — key user inputs and feedback
+7. Pending Tasks — unfinished work
+8. Current Work — what was actively being done at the end
+9. Next Step — the immediate next action needed
 Be concise but retain all information needed to continue seamlessly."""
 
 SPLIT_TURN_PREFIX_PROMPT = """\
@@ -80,19 +89,41 @@ def split_messages(self, messages: list[Any]) -> tuple[list[Any], list[Any]]:
 
         return messages[:split_idx], messages[split_idx:]
 
-    async def compact(self, messages_to_summarize: list[Any], model: Any) -> str:
+    async def compact(
+        self,
+        messages_to_summarize: list[Any],
+        model: Any,
+        compact_boundary: int = 0,
+    ) -> str:
         """Generate a summary of the given messages using the LLM.
 
+        Aligned with CC L4b Legacy Compact:
+        - Uses COMPACT_SYSTEM_PROMPT (simple, ~200 tokens — NOT parent system prompt)
+        - No tools passed (extended thinking disabled, tools=[])
+        - Slices from compact_boundary forward
+        - max_tokens capped at 20000 (CC max summary output)
+
         Returns plain text summary string.
         """
-        # Build the summarization request
+        # Slice from compact_boundary forward (CC: from last compact_boundary marker)
+        if compact_boundary > 0 and compact_boundary < len(messages_to_summarize):
+            messages_to_summarize = messages_to_summarize[compact_boundary:]
+
         formatted = self._format_messages_for_summary(messages_to_summarize)
+        # CC L4b: system prompt is simple — does NOT inherit parent's system prompt.
+        # No tools, no extended thinking.
         summary_messages = [
-            SystemMessage(content=SUMMARY_PROMPT),
-            HumanMessage(content=f"Here is the conversation to summarize:\n\n{formatted}"),
+            SystemMessage(content=COMPACT_SYSTEM_PROMPT),
+            HumanMessage(content=f"Summarize this conversation:\n\n{formatted}\n\n{SUMMARY_PROMPT}"),
         ]
 
-        response = await model.ainvoke(summary_messages)
+        # Bind max_tokens=20000 (CC max summary output), no tools
+        try:
+            bound_model = model.bind(max_tokens=20000)
+        except Exception:
+            bound_model = model
+
+        response = await bound_model.ainvoke(summary_messages)
         return response.content if hasattr(response, "content") else str(response)
 
     def _estimate_msg_tokens(self, msg: Any) -> int:
diff --git a/core/runtime/middleware/memory/middleware.py b/core/runtime/middleware/memory/middleware.py
index 8775e1c21..6dfbc6e96 100644
--- a/core/runtime/middleware/memory/middleware.py
+++ b/core/runtime/middleware/memory/middleware.py
@@ -7,19 +7,20 @@
 
 from __future__ import annotations
 
+import json
 import logging
 from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any
 
-from langchain.agents.middleware.types import (
+from langchain_core.messages import SystemMessage
+
+from core.runtime.middleware import (
     AgentMiddleware,
     ModelCallResult,
     ModelRequest,
     ModelResponse,
 )
-from langchain_core.messages import SystemMessage
-
 from storage.contracts import SummaryRepo
 
 from .compactor import ContextCompactor
@@ -27,6 +28,7 @@
 from .summary_store import SummaryStore
 
 logger = logging.getLogger(__name__)
+_COMPACTION_BREAKER_THRESHOLD = 3
 
 
 class MemoryMiddleware(AgentMiddleware):
@@ -86,6 +88,10 @@ def __init__(
         self._cached_summary: str | None = None
         self._compact_up_to_index: int = 0
         self._summary_restored: bool = False
+        self._summary_thread_id: str | None = None
+        self._pending_owner_notices: list[dict[str, Any]] = []
+        self._compaction_failure_counts_by_thread: dict[str, int] = {}
+        self._compaction_breaker_open_by_thread: dict[str, bool] = {}
 
         if verbose:
             print("[MemoryMiddleware] Initialized")
@@ -125,6 +131,10 @@ def set_runtime(self, runtime: Any) -> None:
         """Inject AgentRuntime reference (called by agent.py)."""
         self._runtime = runtime
 
+    @property
+    def compact_boundary_index(self) -> int:
+        return self._compact_up_to_index
+
     # ========== AgentMiddleware interface ==========
 
     async def awrap_model_call(
@@ -134,13 +144,18 @@ async def awrap_model_call(
     ) -> ModelCallResult:
         messages = list(request.messages)
         original_count = len(messages)
+        thread_id = self._extract_thread_id(request)
 
         # Restore summary from store if not already done
         if not self._summary_restored and self.summary_store:
-            thread_id = self._extract_thread_id(request)
             if thread_id:
                 await self._restore_summary_from_store(thread_id)
                 self._summary_restored = True
+                self._summary_thread_id = thread_id
+        elif self.summary_store and thread_id and self._summary_thread_id != thread_id:
+            await self._restore_summary_from_store(thread_id)
+            self._summary_restored = True
+            self._summary_thread_id = thread_id
 
         sys_tokens = self._estimate_system_tokens(request)
 
@@ -173,8 +188,9 @@ async def awrap_model_call(
             )
 
         if self.compactor.should_compact(estimated, self._context_limit, self._compaction_threshold) and self._model:
-            thread_id = self._extract_thread_id(request)
-            messages = await self._do_compact(messages, thread_id)
+            compacted = await self._attempt_compaction(messages, thread_id=thread_id)
+            if compacted is not None:
+                messages = compacted
         elif self._cached_summary and self._compact_up_to_index > 0:
             if self._compact_up_to_index <= len(messages):
                 summary_msg = SystemMessage(content=f"[Conversation Summary]\n{self._cached_summary}")
@@ -190,7 +206,14 @@ async def awrap_model_call(
             final_tokens = self._estimate_tokens(messages) + sys_tokens
             print(f"[Memory] Final: {len(messages)} msgs (~{final_tokens} tokens) sent to LLM (original: {original_count} msgs)")
 
-        return await handler(request.override(messages=messages))
+        response = await handler(request.override(messages=messages))
+        if response.request_messages is None:
+            return ModelResponse(
+                result=response.result,
+                request_messages=list(messages),
+                prepared_request=response.prepared_request,
+            )
+        return response
 
     async def _do_compact(self, messages: list[Any], thread_id: str | None = None) -> list[Any]:
         """Execute compaction: summarize old messages, return compacted list."""
@@ -219,6 +242,9 @@ async def _do_compact(self, messages: list[Any], thread_id: str | None = None) -
 
             self._cached_summary = summary_text
             self._compact_up_to_index = len(messages) - len(to_keep)
+            self._summary_restored = True
+            self._summary_thread_id = thread_id
+            self._record_compaction_notice()
 
             if self.summary_store and thread_id:
                 try:
@@ -257,6 +283,7 @@ async def force_compact(self, messages: list[Any]) -> dict[str, Any] | None:
             summary_text = await self.compactor.compact(to_summarize, self._resolved_model)
             self._cached_summary = summary_text
             self._compact_up_to_index = len(messages) - len(to_keep)
+            self._record_compaction_notice()
             return {
                 "stats": {
                     "summarized": len(to_summarize),
@@ -267,6 +294,24 @@ async def force_compact(self, messages: list[Any]) -> dict[str, Any] | None:
             if self._runtime:
                 self._runtime.set_flag("is_compacting", False)
 
+    async def compact_messages_for_recovery(self, messages: list[Any], thread_id: str | None = None) -> list[Any] | None:
+        """Force a compaction pass and return the compacted message list."""
+        if not self._model:
+            return None
+
+        pruned = self.pruner.prune(messages)
+        to_summarize, to_keep = self.compactor.split_messages(pruned)
+        if len(to_summarize) < 2:
+            return None
+
+        return await self._attempt_compaction(
+            pruned,
+            thread_id=thread_id or self._current_thread_id(),
+            respect_breaker=False,
+            record_failures=False,
+            clear_breaker_on_success=True,
+        )
+
     def _estimate_tokens(self, messages: list[Any]) -> int:
         """Estimate total tokens for messages (chars // 2)."""
         total = 0
@@ -306,6 +351,110 @@ def _extract_thread_id(self, request: ModelRequest) -> str | None:
             return configurable.get("thread_id")
         return getattr(configurable, "thread_id", None) if configurable else None
 
+    def consume_pending_notices(self) -> list[dict[str, Any]]:
+        notices = list(self._pending_owner_notices)
+        self._pending_owner_notices.clear()
+        return notices
+
+    def snapshot_thread_state(self, thread_id: str) -> dict[str, Any]:
+        return {
+            "failure_count": int(self._compaction_failure_counts_by_thread.get(thread_id, 0)),
+            "breaker_open": bool(self._compaction_breaker_open_by_thread.get(thread_id, False)),
+        }
+
+    def restore_thread_state(self, thread_id: str, state: dict[str, Any] | None) -> None:
+        payload = dict(state or {})
+        failure_count = int(payload.get("failure_count") or 0)
+        breaker_open = bool(payload.get("breaker_open", False))
+        if failure_count > 0:
+            self._compaction_failure_counts_by_thread[thread_id] = failure_count
+        else:
+            self._compaction_failure_counts_by_thread.pop(thread_id, None)
+        if breaker_open:
+            self._compaction_breaker_open_by_thread[thread_id] = True
+        else:
+            self._compaction_breaker_open_by_thread.pop(thread_id, None)
+
+    def clear_thread_state(self, thread_id: str) -> None:
+        self._compaction_failure_counts_by_thread.pop(thread_id, None)
+        self._compaction_breaker_open_by_thread.pop(thread_id, None)
+
+    def _record_compaction_notice(self) -> None:
+        content = f"Conversation compacted. Earlier {self._compact_up_to_index} message(s) are now represented by a summary."
+        self._queue_owner_notice(
+            {
+                "content": content,
+                "notification_type": "compact",
+                "compact_boundary_index": self._compact_up_to_index,
+            }
+        )
+
+    def _current_thread_id(self) -> str | None:
+        from sandbox.thread_context import get_current_thread_id
+
+        return get_current_thread_id()
+
+    async def _attempt_compaction(
+        self,
+        messages: list[Any],
+        *,
+        thread_id: str | None,
+        respect_breaker: bool = True,
+        record_failures: bool = True,
+        clear_breaker_on_success: bool = False,
+    ) -> list[Any] | None:
+        # @@@compaction-breaker-scope - match cc-src's narrower boundary:
+        # the breaker blocks later automatic compaction attempts, but reactive
+        # recovery may still try once and clear the breaker on success.
+        if respect_breaker and thread_id and self._compaction_breaker_open_by_thread.get(thread_id, False):
+            return None
+        try:
+            compacted = await self._do_compact(messages, thread_id)
+        except Exception as exc:
+            logger.error("[Memory] Compaction failed for thread %s: %s", thread_id or "<unknown>", exc)
+            if record_failures:
+                self._record_compaction_failure(thread_id, exc)
+            return None
+        self._record_compaction_success(thread_id, clear_breaker=clear_breaker_on_success)
+        return compacted
+
+    def _record_compaction_success(self, thread_id: str | None, *, clear_breaker: bool = False) -> None:
+        if not thread_id:
+            return
+        self._compaction_failure_counts_by_thread.pop(thread_id, None)
+        if clear_breaker:
+            self._compaction_breaker_open_by_thread.pop(thread_id, None)
+
+    def _record_compaction_failure(self, thread_id: str | None, exc: Exception) -> None:
+        if not thread_id:
+            return
+        failures = int(self._compaction_failure_counts_by_thread.get(thread_id, 0)) + 1
+        self._compaction_failure_counts_by_thread[thread_id] = failures
+        if failures < _COMPACTION_BREAKER_THRESHOLD or self._compaction_breaker_open_by_thread.get(thread_id, False):
+            return
+        self._compaction_breaker_open_by_thread[thread_id] = True
+        self._queue_owner_notice(
+            {
+                "content": "Automatic compaction disabled for this thread after repeated failures. Clear the thread or start a new one.",
+                "notification_type": "compact_breaker",
+                "failure_count": failures,
+                "error": str(exc),
+            }
+        )
+
+    def _queue_owner_notice(self, notice: dict[str, Any]) -> None:
+        self._pending_owner_notices.append(dict(notice))
+        if self._runtime and hasattr(self._runtime, "emit_activity_event"):
+            # @@@memory-owner-notices - compaction boundary and breaker state are
+            # owner-facing runtime facts, so stream and cold rebuild must share
+            # the same notice payload instead of inventing separate surfaces.
+            self._runtime.emit_activity_event(
+                {
+                    "event": "notice",
+                    "data": json.dumps(notice, ensure_ascii=False),
+                }
+            )
+
     async def _restore_summary_from_store(self, thread_id: str) -> None:
         """Restore summary from SummaryStore."""
         if not thread_id:
@@ -314,6 +463,8 @@ async def _restore_summary_from_store(self, thread_id: str) -> None:
             )
 
         try:
+            self._cached_summary = None
+            self._compact_up_to_index = 0
             summary_data = self.summary_store.get_latest_summary(thread_id)
 
             if not summary_data:
@@ -332,6 +483,7 @@ async def _restore_summary_from_store(self, thread_id: str) -> None:
 
             self._cached_summary = summary_data.summary_text
             self._compact_up_to_index = summary_data.compact_up_to_index
+            self._summary_thread_id = thread_id
 
             if self.verbose:
                 print(
@@ -342,6 +494,8 @@ async def _restore_summary_from_store(self, thread_id: str) -> None:
                 )
 
         except Exception as e:
+            self._cached_summary = None
+            self._compact_up_to_index = 0
             logger.error(f"[Memory] Failed to restore summary: {e}")
 
     async def _rebuild_summary_from_checkpointer(self, thread_id: str) -> None:
diff --git a/core/runtime/middleware/monitor/cost.py b/core/runtime/middleware/monitor/cost.py
index 4b09c2a51..08615af02 100644
--- a/core/runtime/middleware/monitor/cost.py
+++ b/core/runtime/middleware/monitor/cost.py
@@ -112,7 +112,7 @@ def _load_cache() -> tuple[dict[str, dict[str, str]], dict[str, int], dict[str,
     if not cache_path.exists():
         return None
     try:
-        data = json.loads(cache_path.read_text())
+        data = json.loads(cache_path.read_text(encoding="utf-8"))
         if time.time() - data.get("timestamp", 0) > _CACHE_TTL:
             return None
         models = data.get("models", {})
@@ -128,7 +128,7 @@ def _save_cache(models: dict[str, dict[str, str]], context_limits: dict[str, int
     try:
         _CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
         data = {"timestamp": time.time(), "models": models, "context_limits": context_limits, "providers": providers}
-        _CACHE_PATH.write_text(json.dumps(data))
+        _CACHE_PATH.write_text(json.dumps(data), encoding="utf-8")
     except Exception:
         pass
 
@@ -163,11 +163,17 @@ def fetch_openrouter_pricing() -> dict[str, dict[str, Decimal]]:
     cached = _load_cache()
     if cached:
         models_raw, ctx, provs = cached
-        _pricing_data = _deserialize_costs(models_raw)
-        _context_limits = ctx
-        _model_providers = provs
-        _initialized = True
-        return _pricing_data
+        cached_costs = _deserialize_costs(models_raw)
+        # @@@pricing-cache-integrity - older CI caches can carry context/provider
+        # metadata with an empty model-pricing payload, which makes cost
+        # calculation silently degrade while context-limit tests still pass.
+        # Treat that cache as invalid and fall through to bundled/API reload.
+        if cached_costs:
+            _pricing_data = cached_costs
+            _context_limits = ctx
+            _model_providers = provs
+            _initialized = True
+            return _pricing_data
 
     _pricing_data = _fetch_from_openrouter() or _load_bundled()
     _initialized = True
@@ -219,7 +225,10 @@ def _load_bundled() -> dict[str, dict[str, Decimal]]:
     if not _BUNDLED_PATH.exists():
         return {}
     try:
-        data = json.loads(_BUNDLED_PATH.read_text())
+        # @@@bundled-models-utf8 - Windows runners do not default to UTF-8.
+        # The bundled OpenRouter snapshot contains non-ASCII descriptions, so
+        # implicit decoding can fail and silently collapse pricing/context data.
+        data = json.loads(_BUNDLED_PATH.read_text(encoding="utf-8"))
         result: dict[str, dict[str, Decimal]] = {}
         ctx_result: dict[str, int] = {}
         prov_result: dict[str, str] = {}
diff --git a/core/runtime/middleware/monitor/middleware.py b/core/runtime/middleware/monitor/middleware.py
index 218ebcd06..899617379 100644
--- a/core/runtime/middleware/monitor/middleware.py
+++ b/core/runtime/middleware/monitor/middleware.py
@@ -3,7 +3,7 @@
 from collections.abc import Awaitable, Callable
 from typing import Any
 
-from langchain.agents.middleware.types import (
+from core.runtime.middleware import (
     AgentMiddleware,
     ModelCallResult,
     ModelRequest,
@@ -113,6 +113,9 @@ async def awrap_model_call(
             self._state_monitor.mark_error(e)
             raise
 
+        if response.prepared_request is not None:
+            return response
+
         messages = response.result if hasattr(response, "result") else [response]
         resp_dict = {"messages": messages}
 
diff --git a/core/runtime/middleware/prompt_caching/__init__.py b/core/runtime/middleware/prompt_caching/__init__.py
index 87f4e92b4..361b124a8 100644
--- a/core/runtime/middleware/prompt_caching/__init__.py
+++ b/core/runtime/middleware/prompt_caching/__init__.py
@@ -1,8 +1,8 @@
 """Anthropic prompt caching middleware.
 
 Requires:
-    - `langchain`: For agent middleware framework
-    - `langchain-anthropic`: For `ChatAnthropic` model (already a dependency)
+    - local `core.runtime.middleware` protocol types
+    - `langchain-anthropic`: For `ChatAnthropic` model
 """
 
 from collections.abc import Awaitable, Callable
@@ -10,9 +10,10 @@
 from warnings import warn
 
 from langchain_anthropic.chat_models import ChatAnthropic
+from langchain_core.messages import SystemMessage
 
 try:
-    from langchain.agents.middleware.types import (
+    from core.runtime.middleware import (
         AgentMiddleware,
         ModelCallResult,
         ModelRequest,
@@ -20,9 +21,9 @@
     )
 except ImportError as e:
     msg = (
-        "AnthropicPromptCachingMiddleware requires 'langchain' to be installed. "
-        "This middleware is designed for use with LangChain agents. "
-        "Install it with: pip install langchain"
+        "AnthropicPromptCachingMiddleware requires the local "
+        "'core.runtime.middleware' protocol definitions and "
+        "'langchain-anthropic' to be importable."
     )
     raise ImportError(msg) from e
 
@@ -32,7 +33,7 @@ class PromptCachingMiddleware(AgentMiddleware):
 
     Optimizes API usage by caching conversation prefixes for Anthropic models.
 
-    Requires both `langchain` and `langchain-anthropic` packages to be installed.
+    Requires the local runtime middleware protocol plus `langchain-anthropic`.
 
     Learn more about Anthropic prompt caching
     [here](https://platform.claude.com/docs/en/build-with-claude/prompt-caching).
@@ -68,6 +69,26 @@ def __init__(
         self.min_messages_to_cache = min_messages_to_cache
         self.unsupported_model_behavior = unsupported_model_behavior
 
+    def _apply_system_cache(self, request: ModelRequest) -> ModelRequest:
+        """Add cache_control to the first (static) block of system_message.
+
+        Anthropic prompt caching requires cache_control on the system content
+        blocks, not on messages. Marking the first block caches the entire
+        static system prefix (identity + tool rules) across sessions.
+        """
+        sm = request.system_message
+        if sm is None:
+            return request
+        content = sm.content
+        if isinstance(content, str):
+            new_content: list = [{"type": "text", "text": content, "cache_control": {"type": self.type}}]
+        elif isinstance(content, list) and content:
+            first = {**content[0], "cache_control": {"type": self.type}}
+            new_content = [first, *content[1:]]
+        else:
+            return request
+        return request.override(system_message=SystemMessage(content=new_content))
+
     def _should_apply_caching(self, request: ModelRequest) -> bool:
         """Check if caching should be applied to the request.
 
@@ -112,12 +133,7 @@ def wrap_model_call(
         """
         if not self._should_apply_caching(request):
             return handler(request)
-
-        new_model_settings = {
-            **request.model_settings,
-            "cache_control": {"type": self.type, "ttl": self.ttl},
-        }
-        return handler(request.override(model_settings=new_model_settings))
+        return handler(self._apply_system_cache(request))
 
     async def awrap_model_call(
         self,
@@ -135,12 +151,7 @@ async def awrap_model_call(
         """
         if not self._should_apply_caching(request):
             return await handler(request)
-
-        new_model_settings = {
-            **request.model_settings,
-            "cache_control": {"type": self.type, "ttl": self.ttl},
-        }
-        return await handler(request.override(model_settings=new_model_settings))
+        return await handler(self._apply_system_cache(request))
 
 
 __all__ = ["PromptCachingMiddleware"]
diff --git a/core/runtime/middleware/queue/__init__.py b/core/runtime/middleware/queue/__init__.py
index f3d08f337..cf97229dc 100644
--- a/core/runtime/middleware/queue/__init__.py
+++ b/core/runtime/middleware/queue/__init__.py
@@ -2,7 +2,12 @@
 
 from storage.contracts import QueueItem
 
-from .formatters import format_background_notification, format_chat_notification, format_wechat_message
+from .formatters import (
+    format_agent_message,
+    format_background_notification,
+    format_chat_notification,
+    format_progress_notification,
+)
 from .manager import MessageQueueManager
 from .middleware import SteeringMiddleware
 
@@ -10,7 +15,8 @@
     "MessageQueueManager",
     "QueueItem",
     "SteeringMiddleware",
+    "format_agent_message",
     "format_background_notification",
     "format_chat_notification",
-    "format_wechat_message",
+    "format_progress_notification",
 ]
diff --git a/core/runtime/middleware/queue/formatters.py b/core/runtime/middleware/queue/formatters.py
index 1e7821187..85034f7b4 100644
--- a/core/runtime/middleware/queue/formatters.py
+++ b/core/runtime/middleware/queue/formatters.py
@@ -11,13 +11,51 @@
 
 
 def format_chat_notification(sender_name: str, chat_id: str, unread_count: int, signal: str | None = None) -> str:
-    """Lightweight notification — agent must chat_read to see content.
+    """Lightweight notification — agent must read_messages to see content.
 
     @@@v3-notification-only — no message content injected. Agent calls
-    chat_read(chat_id=...) to read, then chat_send() to reply.
+    read_messages(chat_id=...) to read, then send_message() to reply.
     """
     signal_hint = f" [signal: {signal}]" if signal and signal != "open" else ""
-    return f"<system-reminder>\nNew message from {sender_name} in chat {chat_id} ({unread_count} unread).{signal_hint}\n</system-reminder>"
+    return (
+        "<system-reminder>\n"
+        f"New message from {sender_name} in chat {chat_id} ({unread_count} unread).{signal_hint}\n"
+        f'Read it with read_messages(chat_id="{chat_id}").\n'
+        f'Reply with send_message(chat_id="{chat_id}", content="...").\n'
+        "Prefer using this exact chat_id directly.\n"
+        "Do not treat your normal assistant text as a chat reply.\n"
+        "</system-reminder>"
+    )
+
+
+def format_agent_message(sender_name: str, message: str) -> str:
+    """Format inter-agent delivery for steering injection on the next turn."""
+    return (
+        "<system-reminder>\n"
+        "<agent-message>\n"
+        f"  <from>{escape(sender_name)}</from>\n"
+        f"  <content>{escape(message)}</content>\n"
+        "</agent-message>\n"
+        "</system-reminder>"
+    )
+
+
+def format_progress_notification(
+    agent_id: str,
+    description: str,
+    *,
+    step: str = "running",
+) -> str:
+    """Format background worker progress for coordinator-style prompt injection."""
+    return (
+        "<system-reminder>\n"
+        "<worker-progress>\n"
+        f"  <agent-id>{escape(agent_id)}</agent-id>\n"
+        f"  <step>{escape(step)}</step>\n"
+        f"  <description>{escape(description)}</description>\n"
+        "</worker-progress>\n"
+        "</system-reminder>"
+    )
 
 
 def format_background_notification(
@@ -31,7 +69,7 @@ def format_background_notification(
     """Format background task completion as system-reminder XML."""
     parts = [
         "<system-reminder>",
-        "<background-notification>",
+        "<task-notification>",
         f"  <run-id>{task_id}</run-id>",
         f"  <status>{status}</status>",
     ]
@@ -44,29 +82,11 @@ def format_background_notification(
         parts.append(f"  <result>{escape(truncated)}</result>")
     if usage:
         parts.append(f"  <usage>{json.dumps(usage)}</usage>")
-    parts.append("</background-notification>")
+    parts.append("</task-notification>")
     parts.append("</system-reminder>")
     return "\n".join(parts)
 
 
-def format_wechat_message(sender_name: str, user_id: str, text: str) -> str:
-    """Format incoming WeChat message for thread delivery.
-
-    Agent sees: full message with user_id metadata (needed for wechat_send reply).
-    Frontend sees: just the message text (system-reminder stripped).
-    """
-    return (
-        f"{text}\n"
-        "<system-reminder>\n"
-        "<wechat-message>\n"
-        f"  <sender>{escape(sender_name)}</sender>\n"
-        f"  <user-id>{escape(user_id)}</user-id>\n"
-        "</wechat-message>\n"
-        'To reply, use wechat_send(user_id="' + escape(user_id) + '", text="...").\n'
-        "</system-reminder>"
-    )
-
-
 def format_command_notification(
     command_id: str,
     status: Literal["completed", "failed"],
diff --git a/core/runtime/middleware/queue/middleware.py b/core/runtime/middleware/queue/middleware.py
index ccb9c30be..c713c33bd 100644
--- a/core/runtime/middleware/queue/middleware.py
+++ b/core/runtime/middleware/queue/middleware.py
@@ -10,11 +10,13 @@
 from collections.abc import Awaitable, Callable
 from typing import Any
 
-from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
 from langchain_core.runnables import RunnableConfig
 
+from core.runtime.notifications import is_terminal_background_notification
+
 try:
-    from langchain.agents.middleware.types import (
+    from core.runtime.middleware import (
         AgentMiddleware,
         ModelCallResult,
         ModelRequest,
@@ -35,6 +37,50 @@ class AgentMiddleware:
 
 logger = logging.getLogger(__name__)
 
+_STEER_NON_PREEMPTIVE_SYSTEM_NOTE = (
+    "Steer requests accepted during an active run are non-preemptive. "
+    "If any tool call from the interrupted run already started, it was allowed to finish and its side effects may "
+    "already have happened. Do not claim that prior work was interrupted, prevented, cancelled, or rolled back. "
+    "Treat the steer as instructions for what to do next after that completed work, and answer honestly about any "
+    "side effects that may already exist."
+)
+
+
+def _is_terminal_background_notification(item: Any) -> bool:
+    return is_terminal_background_notification(
+        getattr(item, "content", None),
+        source="system",
+        notification_type=getattr(item, "notification_type", None),
+    )
+
+
+def _is_owner_steer_message(message: Any) -> bool:
+    if message.__class__.__name__ != "HumanMessage":
+        return False
+    metadata = getattr(message, "metadata", {}) or {}
+    return bool(metadata.get("is_steer") or (metadata.get("source") == "owner" and metadata.get("notification_type") == "steer"))
+
+
+def _apply_steer_contract(request: ModelRequest) -> ModelRequest:
+    if not any(_is_owner_steer_message(message) for message in request.messages):
+        return request
+
+    system_message = request.system_message
+    if system_message is None:
+        return request.override(system_message=SystemMessage(content=_STEER_NON_PREEMPTIVE_SYSTEM_NOTE))
+
+    content = getattr(system_message, "content", None)
+    if isinstance(content, str):
+        if _STEER_NON_PREEMPTIVE_SYSTEM_NOTE in content:
+            return request
+        # @@@steer-honesty-contract - mid-run steer stays a real user message in
+        # durable history, but the live model call also needs an explicit
+        # non-preemptive contract so it cannot overclaim that already-started
+        # tool work was stopped or never produced side effects.
+        return request.override(system_message=SystemMessage(content=f"{content}\n\n{_STEER_NON_PREEMPTIVE_SYSTEM_NOTE}"))
+
+    return request.override(messages=[SystemMessage(content=_STEER_NON_PREEMPTIVE_SYSTEM_NOTE), *request.messages])
+
 
 class SteeringMiddleware(AgentMiddleware):
     """Non-preemptive steering: let all tool calls finish, inject before next LLM call.
@@ -66,6 +112,20 @@ async def awrap_tool_call(
         """Async pure passthrough — never skip tool calls."""
         return await handler(request)
 
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelCallResult:
+        return handler(_apply_steer_contract(request))
+
+    async def awrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelCallResult:
+        return await handler(_apply_steer_contract(request))
+
     def before_model(
         self,
         state: Any,
@@ -79,7 +139,27 @@ def before_model(
             return None
 
         items = self._queue_manager.drain_all(thread_id)
-        rt = self._agent_runtime
+        inject_now = []
+        deferred = []
+        for item in items:
+            if _is_terminal_background_notification(item):
+                deferred.append(item)
+            else:
+                inject_now.append(item)
+        # @@@followup-defer - terminal background notifications must never be
+        # injected inline into an active run. Their stable contract is a
+        # dedicated followthrough notice-only turn, regardless of the current
+        # run source.
+        for item in deferred:
+            self._queue_manager.enqueue(
+                item.content,
+                thread_id,
+                notification_type=item.notification_type,
+                source=item.source,
+                sender_id=item.sender_id,
+                sender_name=item.sender_name,
+            )
+        items = inject_now
         if not items:
             return None
 
@@ -109,14 +189,15 @@ def before_model(
         # breaks the turn at the steer injection point.
         # user_message is NOT emitted here — wake_handler already did it
         # at enqueue time (@@@steer-instant-feedback).
-        if has_steer and rt and hasattr(rt, "emit_activity_event"):
-            rt.emit_activity_event(
+        agent_runtime = self._agent_runtime
+        if has_steer and agent_runtime and hasattr(agent_runtime, "emit_activity_event"):
+            agent_runtime.emit_activity_event(
                 {
                     "event": "run_done",
                     "data": json.dumps({"thread_id": thread_id}),
                 }
             )
-            rt.emit_activity_event(
+            agent_runtime.emit_activity_event(
                 {
                     "event": "run_start",
                     "data": json.dumps({"thread_id": thread_id, "showing": True}),
diff --git a/core/runtime/middleware/spill_buffer/middleware.py b/core/runtime/middleware/spill_buffer/middleware.py
index ca519cb27..66390718d 100644
--- a/core/runtime/middleware/spill_buffer/middleware.py
+++ b/core/runtime/middleware/spill_buffer/middleware.py
@@ -2,28 +2,16 @@
 
 from __future__ import annotations
 
+import json
+import mimetypes
+import posixpath
 from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any
 
 from langchain_core.messages import ToolMessage
 
-try:
-    from langchain.agents.middleware.types import (
-        AgentMiddleware,
-        ModelRequest,
-        ModelResponse,
-        ToolCallRequest,
-    )
-except ImportError:
-
-    class AgentMiddleware:  # type: ignore[no-redef]
-        pass
-
-    ModelRequest = Any
-    ModelResponse = Any
-    ToolCallRequest = Any
-
+from core.runtime.middleware import AgentMiddleware, ModelRequest, ModelResponse, ToolCallRequest
 from core.tools.filesystem.backend import FileSystemBackend
 
 from .spill import spill_if_needed
@@ -57,6 +45,53 @@ def __init__(
         self.thresholds: dict[str, int] = thresholds or {}
         self.default_threshold = default_threshold
 
+    def _rewrite_mcp_blocks(self, content: Any, *, tool_call_id: str) -> Any:
+        if not isinstance(content, list):
+            return content
+
+        lines: list[str] = []
+        saw_mcp_blocks = False
+
+        for index, block in enumerate(content):
+            if not isinstance(block, dict):
+                return content
+
+            kind = block.get("type")
+            if kind == "text":
+                lines.append(str(block.get("text", "")))
+                continue
+
+            saw_mcp_blocks = True
+            mime_type = str(block.get("mime_type") or "application/octet-stream")
+            guessed_ext = mimetypes.guess_extension(mime_type.split(";", 1)[0].strip()) or ".bin"
+
+            if isinstance(block.get("base64"), str):
+                payload_path = posixpath.join(
+                    self.workspace_root,
+                    ".leon",
+                    "tool-results",
+                    f"{tool_call_id}-{index}{guessed_ext}.base64",
+                )
+                # @@@mcp-binary-handoff - api-04 keeps Leon's sandbox/file
+                # abstraction by persisting encoded payloads through fs_backend
+                # instead of writing host-local bytes behind the sandbox's back.
+                write_result = self.fs_backend.write_file(payload_path, block["base64"])
+                if hasattr(write_result, "success") and not write_result.success:
+                    raise RuntimeError(write_result.error or f"failed to persist MCP payload to {payload_path}")
+                lines.append(f"MCP binary content ({mime_type}) saved to {payload_path} as base64 payload.")
+                continue
+
+            if isinstance(block.get("url"), str):
+                lines.append(f"MCP {kind} content available at {block['url']} ({mime_type})")
+                continue
+
+            lines.append(json.dumps(block, ensure_ascii=False, default=str))
+
+        if not saw_mcp_blocks:
+            text_only = "\n".join(line for line in lines if line)
+            return text_only if text_only else content
+        return "\n".join(line for line in lines if line)
+
     # -- model call: pass-through ------------------------------------------
 
     def wrap_model_call(
@@ -81,6 +116,19 @@ def _maybe_spill(self, request: ToolCallRequest, result: ToolMessage) -> ToolMes
         if tool_name in SKIP_TOOLS:
             return result
 
+        source = result.additional_kwargs.get("tool_result_meta", {}).get("source")
+        normalized_content = result.content
+        if source == "mcp":
+            normalized_content = self._rewrite_mcp_blocks(
+                normalized_content,
+                tool_call_id=request.tool_call.get("id", "unknown"),
+            )
+            if normalized_content is not result.content:
+                result = result.model_copy(update={"content": normalized_content})
+
+        if isinstance(result.content, str) and not result.content.strip():
+            return result.model_copy(update={"content": f"({tool_name} completed with no output)"})
+
         threshold = self.thresholds.get(tool_name, self.default_threshold)
         tool_call_id = request.tool_call.get("id", "unknown")
 
@@ -93,10 +141,10 @@ def _maybe_spill(self, request: ToolCallRequest, result: ToolMessage) -> ToolMes
         )
 
         if spilled is not result.content:
-            return ToolMessage(
-                content=spilled,
-                tool_call_id=result.tool_call_id,
-            )
+            # @@@spill-message-preservation - replacing content must not discard
+            # metadata/name/id; te-03 is about persisted handoff, not rebuilding
+            # a thinner ToolMessage shell.
+            return result.model_copy(update={"content": spilled})
         return result
 
     def wrap_tool_call(
diff --git a/core/runtime/middleware/spill_buffer/spill.py b/core/runtime/middleware/spill_buffer/spill.py
index 8246a4f33..58cfa470e 100644
--- a/core/runtime/middleware/spill_buffer/spill.py
+++ b/core/runtime/middleware/spill_buffer/spill.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import os
+import posixpath
 from typing import Any
 
 from core.tools.filesystem.backend import FileSystemBackend
@@ -10,6 +10,14 @@
 PREVIEW_BYTES = 2048
 
 
+def _format_preview(content: str) -> str:
+    preview = content[:PREVIEW_BYTES]
+    cutoff = preview.rfind("\n")
+    if cutoff >= PREVIEW_BYTES // 2:
+        return preview[:cutoff]
+    return preview
+
+
 def spill_if_needed(
     content: Any,
     threshold_bytes: int,
@@ -36,8 +44,8 @@ def spill_if_needed(
     if size <= threshold_bytes:
         return content
 
-    spill_dir = os.path.join(workspace_root, ".leon", "tool-results")
-    spill_path = os.path.join(spill_dir, f"{tool_call_id}.txt")
+    spill_dir = posixpath.join(workspace_root, ".leon", "tool-results")
+    spill_path = posixpath.join(spill_dir, f"{tool_call_id}.txt")
 
     write_note = ""
     try:
@@ -50,10 +58,15 @@ def spill_if_needed(
         write_note = f"\n\n(Warning: failed to save full output to disk: {exc})"
         spill_path = "<write failed>"
 
-    preview = content[:PREVIEW_BYTES]
+    # @@@persisted-output-wrapper - te-03 is about durable handoff semantics,
+    # not "shorter string". The model must see an explicit persisted artifact
+    # boundary plus the re-read path, otherwise we silently amputate context.
+    preview = _format_preview(content)
     return (
-        f"Output too large ({size} bytes). Full output saved to: {spill_path}"
-        f"\n\nUse read_file to view specific sections with offset and limit parameters."
-        f"\n\nPreview (first {PREVIEW_BYTES} bytes):\n{preview}\n..."
-        f"{write_note}"
+        f'<persisted-output path="{spill_path}" bytes="{size}">'
+        f"\nSize: {size} bytes"
+        f"\nUse read_file to inspect the full persisted output."
+        f"\nPreview (first {PREVIEW_BYTES} bytes):\n{preview}\n..."
+        f"{write_note}\n"
+        f"</persisted-output>"
     )
diff --git a/core/runtime/notifications.py b/core/runtime/notifications.py
new file mode 100644
index 000000000..f70ffc1fa
--- /dev/null
+++ b/core/runtime/notifications.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+
+def is_terminal_background_notification(
+    content: str | None,
+    *,
+    source: str | None,
+    notification_type: str | None,
+) -> bool:
+    if source != "system" or notification_type not in {"agent", "command"}:
+        return False
+    text = content or ""
+    return "<task-notification>" in text or "<CommandNotification>" in text
diff --git a/core/runtime/permissions.py b/core/runtime/permissions.py
new file mode 100644
index 000000000..37c182ed7
--- /dev/null
+++ b/core/runtime/permissions.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+PERMISSION_RULE_SOURCES = (
+    "userSettings",
+    "projectSettings",
+    "localSettings",
+    "flagSettings",
+    "policySettings",
+    "cliArg",
+    "session",
+)
+
+
+@dataclass(frozen=True)
+class ToolPermissionContext:
+    is_read_only: bool
+    is_destructive: bool = False
+    # @@@camelcase-permission-surface - external state/routes already speak this camelCase shape.
+    alwaysAllowRules: dict[str, list[str]] | None = None  # noqa: N815
+    alwaysDenyRules: dict[str, list[str]] | None = None  # noqa: N815
+    alwaysAskRules: dict[str, list[str]] | None = None  # noqa: N815
+    allowManagedPermissionRulesOnly: bool = False  # noqa: N815
+
+
+def can_auto_approve(context: ToolPermissionContext) -> bool:
+    return context.is_read_only and not context.is_destructive
+
+
+def _active_sources(context: ToolPermissionContext) -> tuple[str, ...]:
+    if context.allowManagedPermissionRulesOnly:
+        return ("policySettings",)
+    return PERMISSION_RULE_SOURCES
+
+
+def _extract_tool_name(rule: str) -> str:
+    rule = rule.strip()
+    open_paren = rule.find("(")
+    return rule if open_paren == -1 else rule[:open_paren]
+
+
+def _find_matching_rule(
+    rule_buckets: dict[str, list[str]] | None,
+    tool_name: str,
+    *,
+    sources: tuple[str, ...],
+) -> str | None:
+    if not rule_buckets:
+        return None
+    for source in sources:
+        for rule in rule_buckets.get(source, []):
+            if _extract_tool_name(rule) == tool_name:
+                return rule
+    return None
+
+
+def evaluate_permission_rules(
+    tool_name: str,
+    context: ToolPermissionContext,
+) -> dict[str, Any] | None:
+    sources = _active_sources(context)
+
+    deny_rule = _find_matching_rule(context.alwaysDenyRules, tool_name, sources=sources)
+    if deny_rule is not None:
+        return {"decision": "deny", "message": f"Permission denied by rule: {deny_rule}"}
+
+    ask_rule = _find_matching_rule(context.alwaysAskRules, tool_name, sources=sources)
+    if ask_rule is not None:
+        return {"decision": "ask", "message": f"Permission required by rule: {ask_rule}"}
+
+    allow_rule = _find_matching_rule(context.alwaysAllowRules, tool_name, sources=sources)
+    if allow_rule is not None:
+        return {"decision": "allow", "message": f"Permission allowed by rule: {allow_rule}"}
+
+    return None
diff --git a/core/runtime/prompts.py b/core/runtime/prompts.py
new file mode 100644
index 000000000..6077cf371
--- /dev/null
+++ b/core/runtime/prompts.py
@@ -0,0 +1,217 @@
+"""System prompt builders — pure functions, no agent state.
+
+Extracted from LeonAgent so agent.py stays lean.
+
+Middleware Stack
+- MemoryMiddleware: trims/compacts conversation context before model calls.
+- MonitorMiddleware: aggregates runtime metrics and observes model execution.
+- PromptCachingMiddleware: enables Anthropic prompt caching for eligible requests.
+- SteeringMiddleware: drains queued messages and injects them before the next model call.
+- SpillBufferMiddleware: spills oversized tool outputs to disk and replaces them with previews.
+"""
+
+from __future__ import annotations
+
+from typing import NamedTuple
+
+
+class RuleSpec(NamedTuple):
+    title: str
+    body: str
+    details: tuple[str, ...] = ()
+
+
+def _render_rule(index: int, rule: RuleSpec) -> str:
+    rendered = f"{index}. **{rule.title}**: {rule.body}"
+    if not rule.details:
+        return rendered
+    return rendered + "\n" + "\n".join(f"   - {detail}" for detail in rule.details)
+
+
+def _build_core_rules(*, is_sandbox: bool, sandbox_name: str, workspace_root: str, working_dir: str) -> list[RuleSpec]:
+    rules: list[RuleSpec] = []
+    if is_sandbox:
+        if sandbox_name == "docker":
+            location_rule = "All file and command operations run in a local Docker container, NOT on the user's host filesystem."
+        else:
+            location_rule = "All file and command operations run in a remote sandbox, NOT on the user's local machine."
+        rules.append(RuleSpec("Sandbox Environment", f"{location_rule} The sandbox is an isolated Linux environment."))
+    else:
+        rules.append(RuleSpec("Workspace", "File operations are restricted to: " + workspace_root))
+
+    rules.append(
+        RuleSpec(
+            "Absolute Paths",
+            "All file paths must be absolute paths.",
+            (
+                f"Correct: `{working_dir}/project/test.py`",
+                "Wrong: `test.py` or `./test.py`",
+            ),
+        )
+    )
+
+    if is_sandbox:
+        security = "The sandbox is isolated. You can install packages, run any commands, and modify files freely."
+    else:
+        security = "Dangerous commands are blocked. All operations are logged."
+    rules.append(RuleSpec("Security", security))
+    return rules
+
+
+def _build_risk_rules() -> list[RuleSpec]:
+    return [
+        RuleSpec(
+            "Risky Actions",
+            "Ask before destructive, hard-to-reverse, or shared-state actions.",
+            (
+                "Examples: deleting files, force-pushing, dropping tables, killing unfamiliar processes, modifying shared infrastructure.",
+                "If you see unexpected state, investigate before deleting or overwriting it.",
+            ),
+        ),
+        RuleSpec(
+            "No URL Guessing",
+            "Do not guess URLs unless the user provided them or you are confident they are directly relevant to programming help.",
+        ),
+        RuleSpec(
+            "Minimal Change",
+            "Do not add features, refactor code, or make speculative abstractions beyond what the task requires.",
+            (
+                "Don't create helpers, utilities, or abstractions for one-time operations.",
+                "Don't add error handling, fallbacks, or validation for scenarios that can't happen.",
+            ),
+        ),
+    ]
+
+
+def _build_tool_preference_rules() -> list[RuleSpec]:
+    return [
+        RuleSpec(
+            "Tool Priority",
+            "When a built-in tool and an MCP tool (`mcp__*`) have the same functionality, use the built-in tool.",
+        ),
+        RuleSpec(
+            "Tool Preference",
+            "Prefer dedicated tools over `Bash` when a built-in tool already matches the job.",
+            (
+                "Use `Read` instead of `cat`, `head`, or `tail`.",
+                "Use `Edit` instead of shell text-munging for file edits.",
+                "Use `Write` instead of heredoc or echo redirection for file creation.",
+                "Use `Glob`/`Grep` for file discovery and content search before falling back to `Bash`.",
+            ),
+        ),
+    ]
+
+
+def _build_interaction_rules() -> list[RuleSpec]:
+    return []
+
+
+def _build_function_result_clearing_rules(*, spill_buffer_enabled: bool, spill_keep_recent: int) -> list[RuleSpec]:
+    if not spill_buffer_enabled:
+        return []
+    return [
+        RuleSpec(
+            "Function Result Clearing",
+            f"Old tool results may be cleared from context to free up space. The {spill_keep_recent} most recent results are always kept.",
+            (
+                "When working with tool results, write down any important information "
+                "you might need later in your response, as the original tool result "
+                "may be cleared later.",
+            ),
+        )
+    ]
+
+
+def _build_rule_specs(
+    *,
+    is_sandbox: bool,
+    sandbox_name: str,
+    workspace_root: str,
+    working_dir: str,
+    spill_buffer_enabled: bool,
+    spill_keep_recent: int,
+) -> list[RuleSpec]:
+    rules: list[RuleSpec] = []
+    rules.extend(
+        _build_core_rules(
+            is_sandbox=is_sandbox,
+            sandbox_name=sandbox_name,
+            workspace_root=workspace_root,
+            working_dir=working_dir,
+        )
+    )
+    rules.extend(_build_risk_rules())
+    rules.extend(_build_tool_preference_rules())
+    rules.extend(
+        _build_function_result_clearing_rules(
+            spill_buffer_enabled=spill_buffer_enabled,
+            spill_keep_recent=spill_keep_recent,
+        )
+    )
+    rules.extend(_build_interaction_rules())
+    return rules
+
+
+def build_context_section(
+    *,
+    sandbox_name: str,
+    sandbox_env_label: str = "",
+    sandbox_working_dir: str = "",
+    workspace_root: str = "",
+    os_name: str = "",
+    shell_name: str = "",
+) -> str:
+    if sandbox_name != "local":
+        mode_label = "Sandbox (isolated local container)" if sandbox_name == "docker" else "Sandbox (isolated cloud environment)"
+        return f"""- Environment: {sandbox_env_label}
+- Working Directory: {sandbox_working_dir}
+- Mode: {mode_label}"""
+    return f"""- Workspace: `{workspace_root}`
+- OS: {os_name}
+- Shell: {shell_name}
+- Mode: Local"""
+
+
+def build_rules_section(
+    *,
+    is_sandbox: bool,
+    sandbox_name: str = "",
+    working_dir: str,
+    workspace_root: str,
+    spill_buffer_enabled: bool = False,
+    spill_keep_recent: int = 0,
+) -> str:
+    rule_specs = _build_rule_specs(
+        is_sandbox=is_sandbox,
+        sandbox_name=sandbox_name,
+        workspace_root=workspace_root,
+        working_dir=working_dir,
+        spill_buffer_enabled=spill_buffer_enabled,
+        spill_keep_recent=spill_keep_recent,
+    )
+    return "\n\n".join(_render_rule(index, rule) for index, rule in enumerate(rule_specs, start=1))
+
+
+def build_base_prompt(context: str, rules: str) -> str:
+    return f"""You are a highly capable AI assistant with access to file and system tools.
+
+**Context:**
+{context}
+
+**Important Rules:**
+
+{rules}
+"""
+
+
+_AGENT_TOOL_SECTION = """
+**Sub-agent Types:**
+- `explore`: Read-only codebase exploration (Grep, Glob, Read only)
+- `plan`: Architecture design and planning (read-only tools)
+- `bash`: Shell command execution (Bash + read tools)
+- `general`: Full tool access for independent multi-step tasks
+"""
+
+
+def build_common_sections(skills_enabled: bool) -> str:
+    return _AGENT_TOOL_SECTION
diff --git a/core/runtime/registry.py b/core/runtime/registry.py
index f6a87f008..79cb48590 100644
--- a/core/runtime/registry.py
+++ b/core/runtime/registry.py
@@ -1,11 +1,46 @@
 from __future__ import annotations
 
 from collections.abc import Awaitable, Callable
+from copy import deepcopy
 from dataclasses import dataclass
 from enum import Enum
+from typing import Any, NotRequired, Required, TypedDict, Unpack
 
-Handler = Callable[..., str] | Callable[..., Awaitable[str]]
-SchemaProvider = dict | Callable[[], dict]
+from core.runtime.tool_result import ToolResultEnvelope
+
+type ToolSchema = dict[str, Any]
+type ToolHandlerResult = str | ToolResultEnvelope
+type ToolArgs = dict[str, Any]
+type ToolPropertySchema = dict[str, Any]
+type ToolProperties = dict[str, ToolPropertySchema]
+
+type Handler = Callable[..., ToolHandlerResult] | Callable[..., Awaitable[ToolHandlerResult]]
+type SchemaProvider = ToolSchema | Callable[[], ToolSchema]
+type ConcurrencySafety = bool | Callable[[ToolArgs], bool]
+type ToolInputValidator = Callable[[ToolArgs, Any], ToolArgs | None] | Callable[[ToolArgs, Any], Awaitable[ToolArgs | None]]
+
+
+class _ToolEntryDefaults(TypedDict):
+    search_hint: str
+    is_concurrency_safe: ConcurrencySafety
+    is_read_only: bool
+    is_destructive: bool
+    context_schema: ToolSchema | None
+    validate_input: ToolInputValidator | None
+
+
+class _ToolEntryBuildArgs(TypedDict, total=False):
+    name: Required[str]
+    mode: Required[ToolMode]
+    schema: Required[SchemaProvider]
+    handler: Required[Handler]
+    source: Required[str]
+    search_hint: NotRequired[str]
+    is_concurrency_safe: NotRequired[ConcurrencySafety]
+    is_read_only: NotRequired[bool]
+    is_destructive: NotRequired[bool]
+    context_schema: NotRequired[ToolSchema | None]
+    validate_input: NotRequired[ToolInputValidator | None]
 
 
 class ToolMode(Enum):
@@ -20,11 +55,56 @@ class ToolEntry:
     schema: SchemaProvider
     handler: Handler
     source: str
-
-    def get_schema(self) -> dict:
+    search_hint: str = ""  # 3-10 word capability description for ToolSearch matching
+    is_concurrency_safe: ConcurrencySafety = False  # fail-closed: assume not safe
+    is_read_only: bool = False  # fail-closed: assume write operation
+    is_destructive: bool = False  # advisory metadata for permission/UI layers
+    context_schema: ToolSchema | None = None  # fields this tool needs from ToolUseContext
+    validate_input: ToolInputValidator | None = None
+
+    def get_schema(self) -> ToolSchema:
         return self.schema() if callable(self.schema) else self.schema
 
 
+TOOL_DEFAULTS: _ToolEntryDefaults = {
+    "search_hint": "",
+    "is_concurrency_safe": False,
+    "is_read_only": False,
+    "is_destructive": False,
+    "context_schema": None,
+    "validate_input": None,
+}
+
+
+def build_tool(**kwargs: Unpack[_ToolEntryBuildArgs]) -> ToolEntry:
+    """Factory that fills in safety defaults. Fail-closed: assumes write + non-concurrent."""
+    merged: _ToolEntryBuildArgs = {**TOOL_DEFAULTS, **kwargs}
+    return ToolEntry(**merged)
+
+
+def make_tool_schema(
+    *,
+    name: str,
+    description: str,
+    properties: ToolProperties,
+    required: list[str] | None = None,
+    parameter_overrides: ToolSchema | None = None,
+) -> ToolSchema:
+    parameters: ToolSchema = {
+        "type": "object",
+        "properties": properties,
+    }
+    if required:
+        parameters["required"] = required
+    if parameter_overrides:
+        parameters.update(parameter_overrides)
+    return {
+        "name": name,
+        "description": description,
+        "parameters": parameters,
+    }
+
+
 class ToolRegistry:
     """Central registry for all tools.
 
@@ -55,23 +135,70 @@ def register(self, entry: ToolEntry) -> None:
     def get(self, name: str) -> ToolEntry | None:
         return self._tools.get(name)
 
-    def get_inline_schemas(self) -> list[dict]:
-        return [e.get_schema() for e in self._tools.values() if e.mode == ToolMode.INLINE]
-
-    def search(self, query: str) -> list[ToolEntry]:
-        """Return all matching tools (including inline) for tool_search."""
-        q = query.lower()
-        results = []
-        for entry in self._tools.values():
+    def get_inline_schemas(self, discovered_tool_names: set[str] | None = None) -> list[dict]:
+        discovered_tool_names = discovered_tool_names or set()
+        return [
+            self._sanitize_schema_for_model(e.get_schema())
+            for e in self._tools.values()
+            if e.mode == ToolMode.INLINE or e.name in discovered_tool_names
+        ]
+
+    def _sanitize_schema_for_model(self, schema: dict) -> dict:
+        # @@@tool-schema-sanitize - runtime-only schema metadata is useful for
+        # validator/readiness, but provider tool schemas must stay within the
+        # subset the live model API accepts.
+        def _walk(value: Any) -> Any:
+            if isinstance(value, dict):
+                return {key: _walk(child) for key, child in value.items() if not (isinstance(key, str) and key.startswith("x-leon-"))}
+            if isinstance(value, list):
+                return [_walk(item) for item in value]
+            return value
+
+        return _walk(deepcopy(schema))
+
+    def search(self, query: str, *, modes: set[ToolMode] | None = None) -> list[ToolEntry]:
+        """Return matching tools with ranked relevance.
+
+        Supports ``select:Name1,Name2`` for exact selection.
+        Otherwise ranks by: search_hint > name > description.
+        """
+        q = query.strip()
+        entries = [entry for entry in self._tools.values() if modes is None or entry.mode in modes]
+
+        # --- select:<names> exact lookup ---
+        if q.lower().startswith("select:"):
+            names = [n.strip() for n in q[len("select:") :].split(",") if n.strip()]
+            results = [self._tools[n] for n in names if n in self._tools and (modes is None or self._tools[n].mode in modes)]
+            return results
+
+        # --- keyword search with ranking ---
+        keywords = q.lower().split()
+        if not keywords:
+            return list(entries)
+
+        scored: list[tuple[int, ToolEntry]] = []
+        for entry in entries:
             schema = entry.get_schema()
-            name = schema.get("name", "")
-            desc = schema.get("description", "")
-            if q in name.lower() or q in desc.lower():
-                results.append(entry)
-        # If no match, return all
-        if not results:
-            results = list(self._tools.values())
-        return results
+            name_lower = entry.name.lower()
+            hint_lower = entry.search_hint.lower()
+            desc_lower = schema.get("description", "").lower()
+
+            score = 0
+            for kw in keywords:
+                if kw in hint_lower:
+                    score += 3
+                if kw in name_lower:
+                    score += 2
+                if kw in desc_lower:
+                    score += 1
+            if score > 0:
+                scored.append((score, entry))
+
+        if not scored:
+            return []
+
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [entry for _, entry in scored]
 
     def list_all(self) -> list[ToolEntry]:
         return list(self._tools.values())
diff --git a/core/runtime/runner.py b/core/runtime/runner.py
index ade917216..b40c7347a 100644
--- a/core/runtime/runner.py
+++ b/core/runtime/runner.py
@@ -1,23 +1,44 @@
 from __future__ import annotations
 
 import asyncio
+import copy
+import inspect
 import json
 import logging
+import threading
 from collections.abc import Awaitable, Callable
+from typing import Any
 
-from langchain.agents.middleware.types import (
+from langchain_core.messages import ToolMessage
+
+from core.runtime.middleware import (
     AgentMiddleware,
     ModelRequest,
     ModelResponse,
     ToolCallRequest,
 )
-from langchain_core.messages import ToolMessage
 
 from .errors import InputValidationError
+from .permissions import ToolPermissionContext
 from .registry import ToolRegistry
+from .tool_result import (
+    ToolResultEnvelope,
+    materialize_tool_message,
+    tool_error,
+    tool_permission_denied,
+    tool_permission_request,
+    tool_success,
+)
 from .validator import ToolValidator
 
 logger = logging.getLogger(__name__)
+DEFAULT_ASYNC_HOOK_TIMEOUT_S = 15.0
+
+
+class _ToolSpecificValidationError(Exception):
+    def __init__(self, message: str, error_code: str | None = None):
+        super().__init__(message)
+        self.error_code = error_code
 
 
 class ToolRunner(AgentMiddleware):
@@ -48,9 +69,9 @@ def _inject_tools(self, request: ModelRequest) -> ModelRequest:
 
     def _extract_call_info(self, request: ToolCallRequest) -> tuple[str, dict, str]:
         tool_call = request.tool_call
-        name = tool_call.get("name")
+        name = tool_call.get("name") or ""
         args = tool_call.get("args", {})
-        call_id = tool_call.get("id", "")
+        call_id = tool_call.get("id", "") or ""
 
         if isinstance(args, str):
             try:
@@ -60,49 +81,896 @@ def _extract_call_info(self, request: ToolCallRequest) -> tuple[str, dict, str]:
 
         return name, args, call_id
 
-    def _validate_and_run(self, name: str, args: dict, call_id: str) -> ToolMessage:
+    @staticmethod
+    def _get_request_hook(request: ToolCallRequest, hook_name: str):
+        state = getattr(request, "state", None)
+        if state is None:
+            return None
+        if isinstance(state, dict):
+            hook = state.get(hook_name)
+        else:
+            hook = vars(state).get(hook_name)
+        if hook is None:
+            return None
+        if isinstance(hook, list):
+            return hook
+        return hook if callable(hook) else None
+
+    @staticmethod
+    def _apply_result_hooks_sync(
+        hook_or_hooks,
+        payload: ToolMessage | ToolResultEnvelope,
+        request: ToolCallRequest,
+    ) -> ToolMessage | ToolResultEnvelope:
+        if hook_or_hooks is None:
+            return payload
+        hooks = hook_or_hooks if isinstance(hook_or_hooks, list) else [hook_or_hooks]
+        current = payload
+        for hook in hooks:
+            updated = hook(current, request)
+            if asyncio.iscoroutine(updated):
+                updated = ToolRunner._await_async_hook_with_timeout_sync(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            if updated is not None:
+                current = updated
+        return current
+
+    @staticmethod
+    async def _apply_result_hooks(
+        hook_or_hooks,
+        payload: ToolMessage | ToolResultEnvelope,
+        request: ToolCallRequest,
+    ) -> ToolMessage | ToolResultEnvelope:
+        if hook_or_hooks is None:
+            return payload
+        hooks = hook_or_hooks if isinstance(hook_or_hooks, list) else [hook_or_hooks]
+        current = payload
+
+        async def _invoke(hook):
+            updated = hook(copy.deepcopy(payload), request)
+            if asyncio.iscoroutine(updated):
+                updated = await ToolRunner._await_async_hook_with_timeout(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            return updated
+
+        for updated in await asyncio.gather(*(_invoke(hook) for hook in hooks)):
+            if updated is not None:
+                current = updated
+        return current
+
+    def _normalize_result(self, result: Any) -> ToolResultEnvelope:
+        if isinstance(result, ToolResultEnvelope):
+            return result
+        return tool_success(result)
+
+    @staticmethod
+    def _resolve_context_path(state: Any, path: str) -> Any:
+        current = state
+        for segment in path.split("."):
+            if segment == "app_state":
+                current = current.get_app_state()
+                continue
+            if isinstance(current, dict):
+                current = current[segment]
+            else:
+                current = getattr(current, segment)
+        return current
+
+    @staticmethod
+    def _inject_handler_context(entry, args: dict, request: ToolCallRequest) -> dict:
+        state = getattr(request, "state", None)
+        if state is None:
+            return args
+        try:
+            signature = inspect.signature(entry.handler)
+        except (TypeError, ValueError):
+            return args
+        accepts_kwargs = any(param.kind == inspect.Parameter.VAR_KEYWORD for param in signature.parameters.values())
+        injected = dict(args)
+
+        context_schema = getattr(entry, "context_schema", None) or {}
+        if isinstance(context_schema, dict):
+            # @@@pt-02-context-schema-mapping
+            # Pattern 2 only becomes real once declared ToolUseContext field
+            # mappings are injected into handler kwargs on the live path.
+            for param_name, context_path in context_schema.items():
+                if param_name in injected:
+                    continue
+                if not accepts_kwargs and param_name not in signature.parameters:
+                    continue
+                injected[param_name] = ToolRunner._resolve_context_path(state, context_path)
+
+        if "tool_context" in injected:
+            return injected
+        if accepts_kwargs or "tool_context" in signature.parameters:
+            # @@@sa-04-tool-context-injection
+            # The sub-agent boundary only becomes real once the live ToolUseContext
+            # can cross the tool runner into handlers that explicitly opt in.
+            injected["tool_context"] = state
+        return injected
+
+    @staticmethod
+    def _coerce_permission_response(result) -> tuple[str | None, str | None]:
+        if result is None:
+            return None, None
+        if isinstance(result, str):
+            return result, None
+        if isinstance(result, dict):
+            decision = result.get("decision") or result.get("permission")
+            message = result.get("message")
+            return decision, message
+        decision = getattr(result, "decision", None) or getattr(result, "permission", None)
+        message = getattr(result, "message", None)
+        return decision, message
+
+    @staticmethod
+    def _permission_denied_result(decision: str, message: str | None) -> ToolResultEnvelope:
+        if decision == "ask":
+            text = message or "Permission required"
+        else:
+            text = message or "Permission denied"
+        return tool_permission_denied(
+            text,
+            metadata={"decision": decision, "error_type": "permission_resolution"},
+        )
+
+    @staticmethod
+    def _permission_request_result(request_id: str, message: str | None) -> ToolResultEnvelope:
+        return tool_permission_request(
+            message or "Permission required",
+            metadata={
+                "decision": "ask",
+                "request_id": request_id,
+                "error_type": "permission_resolution",
+            },
+        )
+
+    @staticmethod
+    def _materialize_permission_ask(
+        request_id: str | None,
+        message: str | None,
+    ) -> ToolResultEnvelope:
+        # @@@permission-ask-materialization
+        # Ask is only honest when a concrete request surface exists. Otherwise
+        # fail loudly as a deny so caller metadata matches the actual runtime.
+        if request_id is not None:
+            return ToolRunner._permission_request_result(request_id, message)
+        return ToolRunner._permission_denied_result("deny", message)
+
+    @staticmethod
+    def _run_awaitable_sync(awaitable):
+        try:
+            asyncio.get_running_loop()
+        except RuntimeError:
+            return asyncio.run(awaitable)
+
+        result_box: list[Any] = []
+        error_box: list[BaseException] = []
+
+        # @@@sync-awaitable-bridge - sync tool entrypoints still need to consume
+        # async permission checkers even when called from a live event loop.
+        def _runner() -> None:
+            try:
+                result_box.append(asyncio.run(awaitable))
+            except BaseException as exc:  # pragma: no cover - re-raised below
+                error_box.append(exc)
+
+        thread = threading.Thread(target=_runner, daemon=True)
+        thread.start()
+        thread.join()
+
+        if error_box:
+            raise error_box[0]
+        return result_box[0] if result_box else None
+
+    @staticmethod
+    def _get_async_hook_timeout_s(request: ToolCallRequest) -> float:
+        state = getattr(request, "state", None)
+        if state is None:
+            return DEFAULT_ASYNC_HOOK_TIMEOUT_S
+        hook_timeout_ms = state.get("hook_timeout_ms") if isinstance(state, dict) else getattr(state, "hook_timeout_ms", None)
+        if isinstance(hook_timeout_ms, (int, float)) and hook_timeout_ms > 0:
+            return float(hook_timeout_ms) / 1000.0
+        hook_timeout_s = state.get("hook_timeout_s") if isinstance(state, dict) else getattr(state, "hook_timeout_s", None)
+        if isinstance(hook_timeout_s, (int, float)) and hook_timeout_s > 0:
+            return float(hook_timeout_s)
+        return DEFAULT_ASYNC_HOOK_TIMEOUT_S
+
+    @staticmethod
+    async def _await_async_hook_with_timeout(
+        request: ToolCallRequest,
+        awaitable,
+        *,
+        hook_name: str,
+    ):
+        timeout_s = ToolRunner._get_async_hook_timeout_s(request)
+        task = asyncio.create_task(awaitable)
+        try:
+            return await asyncio.wait_for(task, timeout=timeout_s)
+        except TimeoutError:
+            logger.warning("Async hook %s timed out after %.3fs; ignoring hook result", hook_name, timeout_s)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            return None
+
+    @staticmethod
+    def _await_async_hook_with_timeout_sync(
+        request: ToolCallRequest,
+        awaitable,
+        *,
+        hook_name: str,
+    ):
+        return ToolRunner._run_awaitable_sync(
+            ToolRunner._await_async_hook_with_timeout(
+                request,
+                awaitable,
+                hook_name=hook_name,
+            )
+        )
+
+    @staticmethod
+    def _get_state_callable(request: ToolCallRequest, name: str):
+        state = getattr(request, "state", None)
+        if state is None:
+            return None
+        return state.get(name) if isinstance(state, dict) else getattr(state, name, None)
+
+    def _consume_permission_resolution_sync(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+    ) -> tuple[str | None, str | None]:
+        consumer = self._get_state_callable(request, "consume_permission_resolution")
+        if not callable(consumer):
+            return None, None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        result = consumer(name, args, permission_context, request)
+        if asyncio.iscoroutine(result):
+            result = self._run_awaitable_sync(result)
+        return self._coerce_permission_response(result)
+
+    async def _consume_permission_resolution_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+    ) -> tuple[str | None, str | None]:
+        consumer = self._get_state_callable(request, "consume_permission_resolution")
+        if not callable(consumer):
+            return None, None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        result = consumer(name, args, permission_context, request)
+        if asyncio.iscoroutine(result):
+            result = await result
+        return self._coerce_permission_response(result)
+
+    def _request_permission_sync(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+        message: str | None,
+    ) -> str | None:
+        requester = self._get_state_callable(request, "request_permission")
+        if not callable(requester):
+            return None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        result = requester(name, args, permission_context, request, message)
+        if asyncio.iscoroutine(result):
+            result = self._run_awaitable_sync(result)
+        if isinstance(result, dict):
+            request_id = result.get("request_id")
+            return request_id if isinstance(request_id, str) else None
+        return result if isinstance(result, str) else None
+
+    async def _request_permission_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+        message: str | None,
+    ) -> str | None:
+        requester = self._get_state_callable(request, "request_permission")
+        if not callable(requester):
+            return None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        result = requester(name, args, permission_context, request, message)
+        if asyncio.iscoroutine(result):
+            result = await result
+        if isinstance(result, dict):
+            request_id = result.get("request_id")
+            return request_id if isinstance(request_id, str) else None
+        return result if isinstance(result, str) else None
+
+    def _run_tool_specific_validation_sync(self, entry, args: dict, request: ToolCallRequest) -> dict:
+        validator = getattr(entry, "validate_input", None)
+        if validator is None:
+            return args
+        result = validator(dict(args), request)
+        if result is None:
+            return args
+        if isinstance(result, dict):
+            if result.get("result") is False or result.get("ok") is False:
+                raise _ToolSpecificValidationError(
+                    result.get("message") or "Tool-specific validation failed",
+                    result.get("errorCode") or result.get("error_code"),
+                )
+            return result
+        raise InputValidationError(str(result))
+
+    async def _run_tool_specific_validation_async(self, entry, args: dict, request: ToolCallRequest) -> dict:
+        validator = getattr(entry, "validate_input", None)
+        if validator is None:
+            return args
+        result = validator(dict(args), request)
+        if asyncio.iscoroutine(result):
+            result = await result
+        if result is None:
+            return args
+        if isinstance(result, dict):
+            if result.get("result") is False or result.get("ok") is False:
+                raise _ToolSpecificValidationError(
+                    result.get("message") or "Tool-specific validation failed",
+                    result.get("errorCode") or result.get("error_code"),
+                )
+            return result
+        raise InputValidationError(str(result))
+
+    def _run_pre_tool_use_sync(self, request: ToolCallRequest, *, name: str, args: dict, entry) -> tuple[dict, str | None, str | None]:
+        hooks = self._get_request_hook(request, "pre_tool_use")
+        if hooks is None:
+            return args, None, None
+        payload = {"name": name, "args": dict(args), "entry": entry}
+        permission: str | None = None
+        message: str | None = None
+        hook_list = hooks if isinstance(hooks, list) else [hooks]
+        for hook in hook_list:
+            updated = hook(payload, request)
+            if asyncio.iscoroutine(updated):
+                updated = self._await_async_hook_with_timeout_sync(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            if updated is None:
+                continue
+            if isinstance(updated, dict):
+                if "args" in updated:
+                    payload["args"] = updated["args"]
+                if "name" in updated:
+                    payload["name"] = updated["name"]
+                if "entry" in updated:
+                    payload["entry"] = updated["entry"]
+                new_permission, new_message = self._coerce_permission_response(updated)
+                if new_permission is not None:
+                    permission = new_permission
+                    message = new_message
+        return payload["args"], permission, message
+
+    async def _run_pre_tool_use_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+    ) -> tuple[dict, str | None, str | None]:
+        hooks = self._get_request_hook(request, "pre_tool_use")
+        if hooks is None:
+            return args, None, None
+        payload = {"name": name, "args": dict(args), "entry": entry}
+        permission: str | None = None
+        message: str | None = None
+        hook_list = hooks if isinstance(hooks, list) else [hooks]
+
+        async def _invoke(hook):
+            updated = hook({"name": name, "args": dict(args), "entry": entry}, request)
+            if asyncio.iscoroutine(updated):
+                updated = await self._await_async_hook_with_timeout(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            return updated
+
+        # @@@pt-06-hook-fanout
+        # Pattern 6 requires hooks to fan out instead of impersonating a
+        # middleware chain. We still fold results back in hook-list order so
+        # the aggregation stays deterministic.
+        for updated in await asyncio.gather(*(_invoke(hook) for hook in hook_list)):
+            if updated is None:
+                continue
+            if isinstance(updated, dict):
+                if "args" in updated:
+                    next_args = updated["args"]
+                    if isinstance(next_args, dict):
+                        payload["args"] = {**payload["args"], **next_args}
+                    else:
+                        payload["args"] = next_args
+                if "name" in updated:
+                    payload["name"] = updated["name"]
+                if "entry" in updated:
+                    payload["entry"] = updated["entry"]
+                new_permission, new_message = self._coerce_permission_response(updated)
+                if new_permission == "deny" and permission != "deny":
+                    permission = new_permission
+                    message = new_message
+                elif new_permission == "ask" and permission not in {"deny", "ask"}:
+                    permission = new_permission
+                    message = new_message
+                elif new_permission == "allow" and permission is None:
+                    permission = new_permission
+                    message = new_message
+        return payload["args"], permission, message
+
+    def _run_permission_request_hooks_sync(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        entry,
+        message: str | None,
+    ) -> tuple[str | None, str | None]:
+        hooks = self._get_request_hook(request, "permission_request_hooks")
+        if hooks is None:
+            return None, message
+        payload = {"name": name, "entry": entry, "message": message}
+        permission: str | None = None
+        hook_message = message
+        hook_list = hooks if isinstance(hooks, list) else [hooks]
+        for hook in hook_list:
+            updated = hook(payload, request)
+            if asyncio.iscoroutine(updated):
+                updated = self._await_async_hook_with_timeout_sync(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            if updated is None:
+                continue
+            if isinstance(updated, dict):
+                new_permission, new_message = self._coerce_permission_response(updated)
+                if new_permission is not None:
+                    permission = new_permission
+                if new_message is not None:
+                    hook_message = new_message
+        return permission, hook_message
+
+    async def _run_permission_request_hooks_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        entry,
+        message: str | None,
+    ) -> tuple[str | None, str | None]:
+        hooks = self._get_request_hook(request, "permission_request_hooks")
+        if hooks is None:
+            return None, message
+        payload = {"name": name, "entry": entry, "message": message}
+        permission: str | None = None
+        hook_message = message
+        hook_list = hooks if isinstance(hooks, list) else [hooks]
+
+        async def _invoke(hook):
+            updated = hook(payload, request)
+            if asyncio.iscoroutine(updated):
+                updated = await self._await_async_hook_with_timeout(
+                    request,
+                    updated,
+                    hook_name=getattr(hook, "__name__", type(hook).__name__),
+                )
+            return updated
+
+        for updated in await asyncio.gather(*(_invoke(hook) for hook in hook_list)):
+            if updated is None:
+                continue
+            if isinstance(updated, dict):
+                new_permission, new_message = self._coerce_permission_response(updated)
+                if new_permission == "deny" and permission != "deny":
+                    permission = new_permission
+                elif new_permission == "ask" and permission not in {"deny", "ask"}:
+                    permission = new_permission
+                elif new_permission == "allow" and permission is None:
+                    permission = new_permission
+                if new_message is not None:
+                    hook_message = new_message
+        return permission, hook_message
+
+    def _resolve_permission(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+        hook_permission: str | None,
+        hook_message: str | None,
+    ) -> ToolResultEnvelope | None:
+        if hook_permission == "deny":
+            return self._permission_denied_result("deny", hook_message)
+
+        checker = self._get_state_callable(request, "can_use_tool")
+        rule_permission: str | None = None
+        rule_message: str | None = None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        if callable(checker):
+            result = checker(name, args, permission_context, request)
+            if asyncio.iscoroutine(result):
+                result = self._run_awaitable_sync(result)
+            rule_permission, rule_message = self._coerce_permission_response(result)
+
+        # @@@permission-resolution-precedence - only consume one-shot approvals when current state still asks.
+        if rule_permission == "ask":
+            resolved_permission, resolved_message = self._consume_permission_resolution_sync(
+                request,
+                name=name,
+                args=args,
+                entry=entry,
+            )
+            if resolved_permission == "allow":
+                return None
+            if resolved_permission in {"deny", "ask"}:
+                return self._permission_denied_result(resolved_permission, resolved_message)
+            request_hook_permission, request_hook_message = self._run_permission_request_hooks_sync(
+                request,
+                name=name,
+                entry=entry,
+                message=rule_message,
+            )
+            if request_hook_permission == "allow":
+                return None
+            if request_hook_permission in {"deny", "ask"}:
+                return self._permission_denied_result(request_hook_permission, request_hook_message)
+            rule_message = request_hook_message
+
+        if hook_permission == "allow":
+            if rule_permission in {"deny", "ask"}:
+                if rule_permission == "ask":
+                    request_id = self._request_permission_sync(
+                        request,
+                        name=name,
+                        args=args,
+                        entry=entry,
+                        message=rule_message,
+                    )
+                    return self._materialize_permission_ask(request_id, rule_message)
+                return self._permission_denied_result(rule_permission, rule_message)
+            return None
+
+        if rule_permission in {"deny", "ask"}:
+            if rule_permission == "ask":
+                request_id = self._request_permission_sync(
+                    request,
+                    name=name,
+                    args=args,
+                    entry=entry,
+                    message=rule_message,
+                )
+                return self._materialize_permission_ask(request_id, rule_message)
+            return self._permission_denied_result(rule_permission, rule_message)
+        return None
+
+    async def _resolve_permission_async(
+        self,
+        request: ToolCallRequest,
+        *,
+        name: str,
+        args: dict,
+        entry,
+        hook_permission: str | None,
+        hook_message: str | None,
+    ) -> ToolResultEnvelope | None:
+        if hook_permission == "deny":
+            return self._permission_denied_result("deny", hook_message)
+
+        checker = self._get_state_callable(request, "can_use_tool")
+        rule_permission: str | None = None
+        rule_message: str | None = None
+        permission_context = ToolPermissionContext(
+            is_read_only=bool(getattr(entry, "is_read_only", False)),
+            is_destructive=bool(getattr(entry, "is_destructive", False)),
+        )
+        if callable(checker):
+            result = checker(name, args, permission_context, request)
+            if asyncio.iscoroutine(result):
+                result = await result
+            rule_permission, rule_message = self._coerce_permission_response(result)
+
+        # @@@permission-resolution-precedence - only consume one-shot approvals when current state still asks.
+        if rule_permission == "ask":
+            resolved_permission, resolved_message = await self._consume_permission_resolution_async(
+                request,
+                name=name,
+                args=args,
+                entry=entry,
+            )
+            if resolved_permission == "allow":
+                return None
+            if resolved_permission in {"deny", "ask"}:
+                return self._permission_denied_result(resolved_permission, resolved_message)
+            request_hook_permission, request_hook_message = await self._run_permission_request_hooks_async(
+                request,
+                name=name,
+                entry=entry,
+                message=rule_message,
+            )
+            if request_hook_permission == "allow":
+                return None
+            if request_hook_permission in {"deny", "ask"}:
+                return self._permission_denied_result(request_hook_permission, request_hook_message)
+            rule_message = request_hook_message
+
+        if hook_permission == "allow":
+            if rule_permission in {"deny", "ask"}:
+                if rule_permission == "ask":
+                    request_id = await self._request_permission_async(
+                        request,
+                        name=name,
+                        args=args,
+                        entry=entry,
+                        message=rule_message,
+                    )
+                    return self._materialize_permission_ask(request_id, rule_message)
+                return self._permission_denied_result(rule_permission, rule_message)
+            return None
+
+        if rule_permission in {"deny", "ask"}:
+            if rule_permission == "ask":
+                request_id = await self._request_permission_async(
+                    request,
+                    name=name,
+                    args=args,
+                    entry=entry,
+                    message=rule_message,
+                )
+                return self._materialize_permission_ask(request_id, rule_message)
+            return self._permission_denied_result(rule_permission, rule_message)
+        return None
+
+    def _materialize_result(
+        self,
+        envelope: ToolResultEnvelope,
+        *,
+        name: str,
+        call_id: str,
+        source: str,
+    ) -> ToolMessage:
+        return materialize_tool_message(
+            envelope,
+            tool_call_id=call_id,
+            name=name,
+            source=source,
+        )
+
+    @staticmethod
+    def _entry_source(entry) -> str:
+        return "mcp" if getattr(entry, "source", None) == "mcp" else "local"
+
+    def _finalize_registered_result(
+        self,
+        envelope: ToolResultEnvelope,
+        *,
+        name: str,
+        call_id: str,
+        source: str,
+    ) -> ToolMessage | ToolResultEnvelope:
+        if source == "mcp":
+            return envelope
+        return self._materialize_result(
+            envelope,
+            name=name,
+            call_id=call_id,
+            source=source,
+        )
+
+    @staticmethod
+    def _select_hook_name(kind: str) -> str:
+        if kind == "error":
+            return "post_tool_use_failure"
+        if kind == "permission_denied":
+            return "permission_denied_hooks"
+        return "post_tool_use"
+
+    @staticmethod
+    def _input_validation_metadata(error: InputValidationError) -> dict[str, object]:
+        metadata: dict[str, object] = {"error_type": "input_validation"}
+        if error.error_code:
+            metadata["error_code"] = error.error_code
+        if error.details:
+            metadata["error_details"] = error.details
+        return metadata
+
+    def _validate_and_run(self, request: ToolCallRequest, name: str, args: dict, call_id: str) -> ToolMessage | ToolResultEnvelope | None:
         entry = self._registry.get(name)
         if entry is None:
             return None  # not our tool
+        source = self._entry_source(entry)
 
         schema = entry.get_schema()
         try:
             self._validator.validate(schema, args)
         except InputValidationError as e:
-            return ToolMessage(
-                content=f"InputValidationError: {name} failed due to the following issue:\n{e}",
-                tool_call_id=call_id,
+            return self._finalize_registered_result(
+                tool_error(
+                    f"InputValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata=self._input_validation_metadata(e),
+                ),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+        try:
+            args = self._run_tool_specific_validation_sync(entry, args, request)
+        except _ToolSpecificValidationError as e:
+            return self._finalize_registered_result(
+                tool_error(
+                    f"ToolValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata={"error_type": "tool_input_validation", "error_code": e.error_code},
+                ),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+        except InputValidationError as e:
+            return self._finalize_registered_result(
+                tool_error(
+                    f"ToolValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata={"error_type": "tool_input_validation"},
+                ),
                 name=name,
+                call_id=call_id,
+                source=source,
+            )
+        args, hook_permission, hook_message = self._run_pre_tool_use_sync(
+            request,
+            name=name,
+            args=args,
+            entry=entry,
+        )
+        permission_result = self._resolve_permission(
+            request,
+            name=name,
+            args=args,
+            entry=entry,
+            hook_permission=hook_permission,
+            hook_message=hook_message,
+        )
+        if permission_result is not None:
+            return self._finalize_registered_result(
+                permission_result,
+                name=name,
+                call_id=call_id,
+                source=source,
             )
 
+        args = self._inject_handler_context(entry, args, request)
         try:
             result = entry.handler(**args)
             if asyncio.iscoroutine(result):
                 result = asyncio.get_event_loop().run_until_complete(result)
-            return ToolMessage(content=str(result), tool_call_id=call_id, name=name)
+            return self._finalize_registered_result(
+                self._normalize_result(result),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
         except Exception as e:
             logger.exception("Tool %s execution failed", name)
-            return ToolMessage(
-                content=f"<tool_use_error>{e}</tool_use_error>",
-                tool_call_id=call_id,
+            return self._finalize_registered_result(
+                tool_error(
+                    f"<tool_use_error>{e}</tool_use_error>",
+                    metadata={"error_type": "tool_execution"},
+                ),
                 name=name,
+                call_id=call_id,
+                source=source,
             )
 
-    async def _validate_and_run_async(self, name: str, args: dict, call_id: str) -> ToolMessage | None:
+    async def _validate_and_run_async(
+        self,
+        request: ToolCallRequest,
+        name: str,
+        args: dict,
+        call_id: str,
+    ) -> ToolMessage | ToolResultEnvelope | None:
         entry = self._registry.get(name)
         if entry is None:
             return None
+        source = self._entry_source(entry)
 
         schema = entry.get_schema()
         try:
             self._validator.validate(schema, args)
         except InputValidationError as e:
-            return ToolMessage(
-                content=f"InputValidationError: {name} failed due to the following issue:\n{e}",
-                tool_call_id=call_id,
+            return self._finalize_registered_result(
+                tool_error(
+                    f"InputValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata=self._input_validation_metadata(e),
+                ),
                 name=name,
+                call_id=call_id,
+                source=source,
+            )
+        try:
+            args = await self._run_tool_specific_validation_async(entry, args, request)
+        except _ToolSpecificValidationError as e:
+            return self._finalize_registered_result(
+                tool_error(
+                    f"ToolValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata={"error_type": "tool_input_validation", "error_code": e.error_code},
+                ),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+        except InputValidationError as e:
+            return self._finalize_registered_result(
+                tool_error(
+                    f"ToolValidationError: {name} failed due to the following issue:\n{e}",
+                    metadata={"error_type": "tool_input_validation"},
+                ),
+                name=name,
+                call_id=call_id,
+                source=source,
             )
 
+        args, hook_permission, hook_message = await self._run_pre_tool_use_async(
+            request,
+            name=name,
+            args=args,
+            entry=entry,
+        )
+        permission_result = await self._resolve_permission_async(
+            request,
+            name=name,
+            args=args,
+            entry=entry,
+            hook_permission=hook_permission,
+            hook_message=hook_message,
+        )
+        if permission_result is not None:
+            return self._finalize_registered_result(
+                permission_result,
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
+
+        args = self._inject_handler_context(entry, args, request)
         try:
             if asyncio.iscoroutinefunction(entry.handler):
                 result = await entry.handler(**args)
@@ -113,13 +981,22 @@ async def _validate_and_run_async(self, name: str, args: dict, call_id: str) ->
                 result = await asyncio.to_thread(entry.handler, **args)
             if asyncio.iscoroutine(result):
                 result = await result
-            return ToolMessage(content=str(result), tool_call_id=call_id, name=name)
+            return self._finalize_registered_result(
+                self._normalize_result(result),
+                name=name,
+                call_id=call_id,
+                source=source,
+            )
         except Exception as e:
             logger.exception("Tool %s execution failed", name)
-            return ToolMessage(
-                content=f"<tool_use_error>{e}</tool_use_error>",
-                tool_call_id=call_id,
+            return self._finalize_registered_result(
+                tool_error(
+                    f"<tool_use_error>{e}</tool_use_error>",
+                    metadata={"error_type": "tool_execution"},
+                ),
                 name=name,
+                call_id=call_id,
+                source=source,
             )
 
     # -- Model call wrappers --
@@ -146,10 +1023,26 @@ def wrap_tool_call(
         handler: Callable[[ToolCallRequest], ToolMessage],
     ) -> ToolMessage:
         name, args, call_id = self._extract_call_info(request)
-        result = self._validate_and_run(name, args, call_id)
+        entry = self._registry.get(name)
+        result = self._validate_and_run(request, name, args, call_id)
         if result is not None:
-            return result
-        return handler(request)
+            source = self._entry_source(entry) if entry is not None else "local"
+            if isinstance(result, ToolResultEnvelope):
+                hook_name = self._select_hook_name(result.kind)
+                hooks = self._get_request_hook(request, hook_name)
+                hooked = self._apply_result_hooks_sync(hooks, result, request) if hooks else result
+                if isinstance(hooked, ToolMessage):
+                    return hooked
+                return self._materialize_result(hooked, name=name, call_id=call_id, source=source)
+            kind = result.additional_kwargs.get("tool_result_meta", {}).get("kind")
+            hook_name = self._select_hook_name(kind)
+            hooks = self._get_request_hook(request, hook_name)
+            maybe_updated = self._apply_result_hooks_sync(hooks, result, request) if hooks else result
+            if isinstance(maybe_updated, ToolMessage):
+                return maybe_updated
+            return self._materialize_result(maybe_updated, name=name, call_id=call_id, source=source)
+        upstream = handler(request)
+        return upstream
 
     async def awrap_tool_call(
         self,
@@ -157,7 +1050,39 @@ async def awrap_tool_call(
         handler: Callable[[ToolCallRequest], Awaitable[ToolMessage]],
     ) -> ToolMessage:
         name, args, call_id = self._extract_call_info(request)
-        result = await self._validate_and_run_async(name, args, call_id)
+        entry = self._registry.get(name)
+        source = self._entry_source(entry) if entry is not None else "local"
+        result = await self._validate_and_run_async(request, name, args, call_id)
         if result is not None:
-            return result
-        return await handler(request)
+            # @@@tool-result-ordering
+            # te-02 keeps local tools materialize-first, but registered MCP
+            # tools must stay envelope-first so post hooks can see and modify
+            # structured output before final ToolMessage creation.
+            if isinstance(result, ToolResultEnvelope):
+                hook_name = self._select_hook_name(result.kind)
+                hooks = self._get_request_hook(request, hook_name)
+                hooked = await self._apply_result_hooks(hooks, result, request)
+                if isinstance(hooked, ToolMessage):
+                    return hooked
+                return self._materialize_result(hooked, name=name, call_id=call_id, source=source)
+            meta = result.additional_kwargs.get("tool_result_meta", {})
+            hook_name = self._select_hook_name(meta.get("kind"))
+            hooks = self._get_request_hook(request, hook_name)
+            hooked = await self._apply_result_hooks(hooks, result, request)
+            if isinstance(hooked, ToolMessage):
+                return hooked
+            return self._materialize_result(hooked, name=name, call_id=call_id, source=source)
+
+        upstream = await handler(request)
+        post_tool_use = self._get_request_hook(request, "post_tool_use")
+        if isinstance(upstream, ToolResultEnvelope):
+            # MCP/upstream path: post hooks get first shot at the structured
+            # result, and only then do we materialize the ToolMessage.
+            hooked = await self._apply_result_hooks(post_tool_use, upstream, request)
+            if isinstance(hooked, ToolMessage):
+                return hooked
+            return self._materialize_result(hooked, name=name, call_id=call_id, source="mcp")
+        if isinstance(upstream, ToolMessage):
+            hooked = await self._apply_result_hooks(post_tool_use, upstream, request)
+            return hooked if isinstance(hooked, ToolMessage) else self._materialize_result(hooked, name=name, call_id=call_id, source="mcp")
+        return upstream
diff --git a/core/runtime/state.py b/core/runtime/state.py
new file mode 100644
index 000000000..80b53a4c2
--- /dev/null
+++ b/core/runtime/state.py
@@ -0,0 +1,172 @@
+"""Three-layer state models aligned with CC architecture.
+
+Layer 1: BootstrapConfig — survives /clear, process-level constants
+Layer 2: AppState — per-session mutable state (Zustand-style store)
+Layer 3: ToolUseContext — per-turn, holds live closures to AppState
+"""
+
+from __future__ import annotations
+
+import uuid
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .abort import AbortController
+from .permissions import ToolPermissionContext
+
+
+class ToolPermissionState(BaseModel):
+    # @@@camelcase-permission-surface - persisted/thread API surface already uses camelCase keys.
+    alwaysAllowRules: dict[str, list[str]] = Field(default_factory=dict)  # noqa: N815
+    alwaysDenyRules: dict[str, list[str]] = Field(default_factory=dict)  # noqa: N815
+    alwaysAskRules: dict[str, list[str]] = Field(default_factory=dict)  # noqa: N815
+    allowManagedPermissionRulesOnly: bool = False  # noqa: N815
+
+
+class BootstrapConfig(BaseModel):
+    """Process-level configuration that survives /clear.
+
+    Analogous to CC Bootstrap State (~85 fields). Contains workspace
+    identity, model config, security flags, and API credentials.
+    """
+
+    workspace_root: Path
+    original_cwd: Path | None = None
+    project_root: Path | None = None
+    cwd: Path | None = None
+    model_name: str
+    api_key: str | None = None
+    sandbox_type: str = "local"
+    permission_resolver_scope: str = "none"
+
+    # Security flags (fail-closed defaults)
+    block_dangerous_commands: bool = True
+    block_network_commands: bool = False
+    enable_audit_log: bool = True
+    enable_web_tools: bool = False
+
+    # File access
+    allowed_file_extensions: list[str] | None = None
+    extra_allowed_paths: list[str] | None = None
+
+    # Turn limits
+    max_turns: int | None = None
+
+    # Session identity
+    session_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
+    parent_session_id: str | None = None
+
+    # Session accumulators that survive turn-level resets
+    total_cost_usd: float = 0.0
+    total_tool_duration_ms: int = 0
+
+    # Model settings
+    model_provider: str | None = None
+    base_url: str | None = None
+    context_limit: int | None = None
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    def model_post_init(self, __context: Any) -> None:
+        self.workspace_root = Path(self.workspace_root)
+        self.original_cwd = Path(self.original_cwd) if self.original_cwd is not None else self.workspace_root
+        self.project_root = Path(self.project_root) if self.project_root is not None else self.workspace_root
+        self.cwd = Path(self.cwd) if self.cwd is not None else self.project_root
+
+
+class AppState(BaseModel):
+    """Per-session mutable state. Analogous to CC AppState store.
+
+    Implements a minimal Zustand-style store with getState/setState.
+    Not reactive — no subscriptions needed for Python backend.
+    """
+
+    messages: list = Field(default_factory=list)
+    turn_count: int = 0
+    total_cost: float = 0.0
+    compact_boundary_index: int = 0
+    # Map of tool_name -> is_enabled (runtime overrides)
+    tool_overrides: dict[str, bool] = Field(default_factory=dict)
+    tool_permission_context: ToolPermissionState = Field(default_factory=ToolPermissionState)
+    pending_permission_requests: dict[str, dict[str, Any]] = Field(default_factory=dict)
+    resolved_permission_requests: dict[str, dict[str, Any]] = Field(default_factory=dict)
+    announced_mcp_instruction_blocks: dict[str, dict[str, str]] = Field(default_factory=dict)
+    # @@@session-hooks-not-watchers - keep this surface local and lifecycle-scoped.
+    # File watching remains a later outer-layer concern so Leon keeps the
+    # filesystem + terminal core decoupled.
+    session_hooks: dict[str, list[Any]] = Field(default_factory=dict)
+
+    def get_state(self) -> AppState:
+        return self
+
+    def set_state(self, updater: Callable[[AppState], AppState]) -> AppState:
+        updated = updater(self)
+        # Mutate in place (Python idiom — no immutable constraint needed here)
+        for field_name in AppState.model_fields:
+            setattr(self, field_name, getattr(updated, field_name))
+        return self
+
+    def add_session_hook(self, event: str, hook: Any) -> None:
+        hooks = list(self.session_hooks.get(event, []))
+        hooks.append(hook)
+        self.session_hooks[event] = hooks
+
+    def remove_session_hook(self, event: str, hook: Any) -> None:
+        hooks = [candidate for candidate in self.session_hooks.get(event, []) if candidate != hook]
+        if hooks:
+            self.session_hooks[event] = hooks
+        else:
+            self.session_hooks.pop(event, None)
+
+    def get_session_hooks(self, event: str) -> list[Any]:
+        return list(self.session_hooks.get(event, []))
+
+
+AppStateUpdater = Callable[[AppState], AppState]
+AppStateGetter = Callable[[], AppState]
+AppStateSetter = Callable[[AppStateUpdater], AppState | None]
+RefreshToolsHook = Callable[[], Awaitable[None] | None]
+PermissionDecision = dict[str, Any] | None
+PermissionChecker = Callable[
+    [str, dict[str, Any], ToolPermissionContext, object],
+    PermissionDecision | Awaitable[PermissionDecision],
+]
+PermissionRequester = Callable[
+    [str, dict[str, Any], ToolPermissionContext, object, str | None],
+    str | dict[str, Any] | None | Awaitable[str | dict[str, Any] | None],
+]
+PermissionResolutionConsumer = Callable[
+    [str, dict[str, Any], ToolPermissionContext, object],
+    PermissionDecision | Awaitable[PermissionDecision],
+]
+
+
+class ToolUseContext(BaseModel):
+    """Per-turn context bag. Analogous to CC ToolUseContext.
+
+    Carries live closures to AppState so tools can read/mutate session state.
+    Sub-agents receive a NO-OP set_app_state to prevent write-through.
+    """
+
+    bootstrap: BootstrapConfig
+    get_app_state: AppStateGetter = Field(exclude=True)
+    set_app_state: AppStateSetter = Field(exclude=True)
+    set_app_state_for_tasks: AppStateSetter | None = Field(default=None, exclude=True)
+    refresh_tools: RefreshToolsHook | None = Field(default=None, exclude=True)
+    can_use_tool: PermissionChecker | None = Field(default=None, exclude=True)
+    request_permission: PermissionRequester | None = Field(default=None, exclude=True)
+    consume_permission_resolution: PermissionResolutionConsumer | None = Field(default=None, exclude=True)
+    read_file_state: Any = Field(default_factory=dict, exclude=True)
+    loaded_nested_memory_paths: Any = Field(default_factory=set, exclude=True)
+    discovered_skill_names: Any = Field(default_factory=set, exclude=True)
+    discovered_tool_names: Any = Field(default_factory=set, exclude=True)
+    nested_memory_attachment_triggers: Any = Field(default_factory=set, exclude=True)
+    abort_controller: AbortController = Field(default_factory=AbortController, exclude=True)
+    messages: list = Field(default_factory=list)
+    thread_id: str = "default"
+    turn_id: str = Field(default_factory=lambda: uuid.uuid4().hex[:8])
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
diff --git a/core/runtime/tool_result.py b/core/runtime/tool_result.py
new file mode 100644
index 000000000..1ccd24288
--- /dev/null
+++ b/core/runtime/tool_result.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from langchain_core.messages import ToolMessage
+
+
+@dataclass
+class ToolResultEnvelope:
+    kind: str
+    content: Any
+    is_error: bool = False
+    top_level_blocks: list[Any] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+def tool_success(content: Any, *, metadata: dict[str, Any] | None = None) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="success",
+        content=content,
+        metadata=dict(metadata or {}),
+    )
+
+
+def tool_error(content: str, *, metadata: dict[str, Any] | None = None) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="error",
+        content=content,
+        is_error=True,
+        metadata=dict(metadata or {}),
+    )
+
+
+def tool_permission_denied(
+    content: str,
+    *,
+    top_level_blocks: list[Any] | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="permission_denied",
+        content=content,
+        is_error=True,
+        top_level_blocks=list(top_level_blocks or []),
+        metadata=dict(metadata or {}),
+    )
+
+
+def tool_permission_request(
+    content: str,
+    *,
+    top_level_blocks: list[Any] | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> ToolResultEnvelope:
+    return ToolResultEnvelope(
+        kind="permission_request",
+        content=content,
+        top_level_blocks=list(top_level_blocks or []),
+        metadata=dict(metadata or {}),
+    )
+
+
+def materialize_tool_message(
+    envelope: ToolResultEnvelope,
+    *,
+    tool_call_id: str,
+    name: str,
+    source: str,
+) -> ToolMessage:
+    additional_kwargs = {
+        "tool_result_meta": {
+            "kind": envelope.kind,
+            "source": source,
+            "top_level_blocks": list(envelope.top_level_blocks),
+            **dict(envelope.metadata),
+        }
+    }
+    return ToolMessage(
+        content=envelope.content,
+        tool_call_id=tool_call_id,
+        name=name,
+        additional_kwargs=additional_kwargs,
+    )
diff --git a/core/runtime/validator.py b/core/runtime/validator.py
index 84e678d07..46fa6d963 100644
--- a/core/runtime/validator.py
+++ b/core/runtime/validator.py
@@ -1,8 +1,45 @@
 import json
+import re
 
 from .errors import InputValidationError
 
 
+def _required_sets(parameters: dict, key: str) -> list[list[str]]:
+    value = parameters.get(key, [])
+    if not isinstance(value, list):
+        return []
+    sets: list[list[str]] = []
+    for item in value:
+        if isinstance(item, dict):
+            required = item.get("required", [])
+        else:
+            required = item
+        if isinstance(required, list):
+            sets.append([field for field in required if isinstance(field, str)])
+    return sets
+
+
+def _required_sets_match(parameters: dict, args: dict) -> bool:
+    required = parameters.get("required", [])
+    if any(field not in args for field in required):
+        return False
+
+    # @@@required-set-contract - some tools need one of several identifier sets
+    # before they're valid. Keep that contract in runtime metadata so
+    # validator/readiness stay aligned without sending unsupported top-level
+    # anyOf/oneOf schema to live providers.
+    any_of = _required_sets(parameters, "x-leon-required-any-of") or _required_sets(parameters, "anyOf")
+    if any_of:
+        return any(all(field in args for field in required) for required in any_of)
+
+    one_of = _required_sets(parameters, "x-leon-required-one-of") or _required_sets(parameters, "oneOf")
+    if one_of:
+        matches = [required for required in one_of if all(field in args for field in required)]
+        return len(matches) == 1
+
+    return True
+
+
 class ValidationResult:
     def __init__(self, ok: bool, params: dict):
         self.ok = ok
@@ -13,14 +50,43 @@ class ToolValidator:
     """Three-phase tool argument validation."""
 
     def validate(self, schema: dict, args: dict) -> ValidationResult:
-        properties = schema.get("parameters", {}).get("properties", {})
-        required = schema.get("parameters", {}).get("required", [])
+        parameters = schema.get("parameters", {})
+        properties = parameters.get("properties", {})
 
         # Phase 1: required fields
-        missing = [f for f in required if f not in args]
-        if missing:
-            msgs = [f"The required parameter `{f}` is missing" for f in missing]
-            raise InputValidationError("\n".join(msgs))
+        if not _required_sets_match(parameters, args):
+            required = parameters.get("required", [])
+            missing = [f for f in required if f not in args]
+            if missing:
+                details = [
+                    {
+                        "field": field,
+                        "error_code": "REQUIRED_FIELD_MISSING",
+                        "message": f"The required parameter `{field}` is missing",
+                    }
+                    for field in missing
+                ]
+                raise InputValidationError(
+                    "\n".join(detail["message"] for detail in details),
+                    error_code="REQUIRED_FIELD_MISSING" if len(details) == 1 else "INPUT_CONSTRAINT_VIOLATION",
+                    details=details,
+                )
+            any_of = _required_sets(parameters, "x-leon-required-any-of") or _required_sets(parameters, "anyOf")
+            one_of = _required_sets(parameters, "x-leon-required-one-of") or _required_sets(parameters, "oneOf")
+            if any_of:
+                message = f"Arguments must satisfy one of these required sets: {any_of}"
+                raise InputValidationError(
+                    message,
+                    error_code="REQUIRED_SET_UNSATISFIED",
+                    details=[{"error_code": "REQUIRED_SET_UNSATISFIED", "message": message}],
+                )
+            if one_of:
+                message = f"Arguments must satisfy exactly one of these required sets: {one_of}"
+                raise InputValidationError(
+                    message,
+                    error_code="REQUIRED_SET_UNSATISFIED",
+                    details=[{"error_code": "REQUIRED_SET_UNSATISFIED", "message": message}],
+                )
 
         # Phase 2: type check
         for name, val in args.items():
@@ -28,12 +94,38 @@ def validate(self, schema: dict, args: dict) -> ValidationResult:
             expected = prop.get("type")
             if expected and not self._type_matches(val, expected):
                 actual = type(val).__name__
-                raise InputValidationError(f"The parameter `{name}` type is expected as `{expected}` but provided as `{actual}`")
+                message = f"The parameter `{name}` type is expected as `{expected}` but provided as `{actual}`"
+                raise InputValidationError(
+                    message,
+                    error_code="INVALID_TYPE",
+                    details=[
+                        {
+                            "field": name,
+                            "error_code": "INVALID_TYPE",
+                            "expected": expected,
+                            "actual": actual,
+                            "message": message,
+                        }
+                    ],
+                )
 
-        # Phase 3: enum validation
+        # Phase 3: scalar constraints
+        issues = self._validate_scalar_constraints(properties, args)
+        if issues:
+            raise InputValidationError(
+                "\n".join(str(issue["message"]) for issue in issues),
+                error_code=str(issues[0]["error_code"]) if len(issues) == 1 else "INPUT_CONSTRAINT_VIOLATION",
+                details=issues,
+            )
+
+        # Phase 4: enum validation
         issues = self._validate_enum(properties, args)
         if issues:
-            raise InputValidationError(json.dumps(issues))
+            raise InputValidationError(
+                json.dumps(issues),
+                error_code="INVALID_ENUM" if len(issues) == 1 else "INPUT_CONSTRAINT_VIOLATION",
+                details=issues,
+            )
 
         return ValidationResult(ok=True, params=args)
 
@@ -51,11 +143,77 @@ def _type_matches(self, val, expected: str) -> bool:
             return True
         return isinstance(val, expected_type)
 
-    def _validate_enum(self, properties: dict, args: dict) -> list:
-        issues = []
+    def _validate_enum(self, properties: dict, args: dict) -> list[dict[str, object]]:
+        issues: list[dict[str, object]] = []
         for name, val in args.items():
             prop = properties.get(name, {})
             enum_vals = prop.get("enum")
             if enum_vals and val not in enum_vals:
-                issues.append({"field": name, "expected": enum_vals, "got": val})
+                issues.append(
+                    {
+                        "field": name,
+                        "error_code": "INVALID_ENUM",
+                        "expected": enum_vals,
+                        "got": val,
+                        "message": f"The parameter `{name}` must be one of {enum_vals}, got {val!r}",
+                    }
+                )
+        return issues
+
+    def _validate_scalar_constraints(self, properties: dict, args: dict) -> list[dict[str, object]]:
+        issues: list[dict[str, object]] = []
+        for name, val in args.items():
+            prop = properties.get(name, {})
+            if isinstance(val, str):
+                min_length = prop.get("minLength")
+                if isinstance(min_length, int) and len(val) < min_length:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "STRING_TOO_SHORT",
+                            "message": f"The parameter `{name}` must be at least {min_length} characters long",
+                            "minimum": min_length,
+                        }
+                    )
+                max_length = prop.get("maxLength")
+                if isinstance(max_length, int) and len(val) > max_length:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "STRING_TOO_LONG",
+                            "message": f"The parameter `{name}` must be at most {max_length} characters long",
+                            "maximum": max_length,
+                        }
+                    )
+                pattern = prop.get("pattern")
+                if isinstance(pattern, str) and re.search(pattern, val) is None:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "PATTERN_MISMATCH",
+                            "message": f"The parameter `{name}` must match pattern `{pattern}`",
+                            "pattern": pattern,
+                        }
+                    )
+            if isinstance(val, (int, float)) and not isinstance(val, bool):
+                minimum = prop.get("minimum")
+                if isinstance(minimum, (int, float)) and val < minimum:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "NUMBER_TOO_SMALL",
+                            "message": f"The parameter `{name}` must be at least {minimum}",
+                            "minimum": minimum,
+                        }
+                    )
+                maximum = prop.get("maximum")
+                if isinstance(maximum, (int, float)) and val > maximum:
+                    issues.append(
+                        {
+                            "field": name,
+                            "error_code": "NUMBER_TOO_LARGE",
+                            "message": f"The parameter `{name}` must be at most {maximum}",
+                            "maximum": maximum,
+                        }
+                    )
         return issues
diff --git a/core/tools/command/base.py b/core/tools/command/base.py
index e716420b2..a13ee7654 100644
--- a/core/tools/command/base.py
+++ b/core/tools/command/base.py
@@ -8,3 +8,10 @@
 from sandbox.interfaces.executor import AsyncCommand, BaseExecutor, ExecuteResult
 
 __all__ = ["BaseExecutor", "ExecuteResult", "AsyncCommand"]
+
+
+def describe_execution_exception(exc: Exception) -> str:
+    detail = str(exc).strip()
+    if detail:
+        return detail
+    return exc.__class__.__name__
diff --git a/core/tools/command/hooks/dangerous_commands.py b/core/tools/command/hooks/dangerous_commands.py
index 496251292..3abde2337 100644
--- a/core/tools/command/hooks/dangerous_commands.py
+++ b/core/tools/command/hooks/dangerous_commands.py
@@ -1,6 +1,7 @@
 """Dangerous commands hook - blocks commands that may harm the system."""
 
 import re
+import shlex
 from pathlib import Path
 from typing import Any
 
@@ -40,6 +41,32 @@ class DangerousCommandsHook(BashHook):
         r"\bssh\b",
     ]
 
+    DEFAULT_BLOCKED_BASE_COMMANDS = {
+        "rmdir",
+        "chmod",
+        "chown",
+        "sudo",
+        "su",
+        "kill",
+        "pkill",
+        "reboot",
+        "shutdown",
+        "mkfs",
+        "dd",
+    }
+    NETWORK_BASE_COMMANDS = {
+        "curl",
+        "wget",
+        "scp",
+        "sftp",
+        "rsync",
+        "ssh",
+    }
+    OPERATOR_TOKENS = {";", ";;", "&", "&&", "|", "||", "(", ")"}
+    ENV_ASSIGN_RE = re.compile(r"^[A-Za-z_]\w*=")
+    ANSI_C_QUOTE_RE = re.compile(r"\$'[^']*'")
+    LOCALE_QUOTE_RE = re.compile(r'\$"[^"]*"')
+
     def __init__(
         self,
         workspace_root: Path | str | None = None,
@@ -58,13 +85,140 @@ def __init__(
             patterns.extend(custom_blocked)
 
         self.compiled_patterns = [re.compile(p, re.IGNORECASE) for p in patterns]
+        self.blocked_base_commands = set(self.DEFAULT_BLOCKED_BASE_COMMANDS)
+        if block_network:
+            self.blocked_base_commands.update(self.NETWORK_BASE_COMMANDS)
 
         if verbose:
             print(f"[DangerousCommands] Loaded {len(self.compiled_patterns)} blocked command patterns")
 
+    @staticmethod
+    def _unquoted_command(command: str) -> str:
+        # @@@bash-hook-unquoted-scan - dangerous regexes should only inspect executable shell surface,
+        # not literal text inside quotes.
+        pieces: list[str] = []
+        in_single = False
+        in_double = False
+        escaped = False
+
+        for char in command:
+            if escaped:
+                if not in_single and not in_double:
+                    pieces.append(char)
+                escaped = False
+                continue
+
+            if char == "\\" and not in_single:
+                if not in_double:
+                    pieces.append(char)
+                escaped = True
+                continue
+
+            if char == "'" and not in_double:
+                in_single = not in_single
+                continue
+
+            if char == '"' and not in_single:
+                in_double = not in_double
+                continue
+
+            if not in_single and not in_double and char == "#":
+                prev = pieces[-1] if pieces else ""
+                if not prev or prev.isspace():
+                    break
+
+            if not in_single and not in_double:
+                pieces.append(char)
+
+        return "".join(pieces)
+
+    @classmethod
+    def _has_dangerous_rm_flags(cls, tokens: list[str], start: int) -> bool:
+        recursive = False
+        force = False
+
+        for token in tokens[start:]:
+            if token in cls.OPERATOR_TOKENS:
+                break
+            if token == "--":
+                break
+            lowered = token.lower()
+            if lowered == "--recursive":
+                recursive = True
+            elif lowered == "--force":
+                force = True
+            elif lowered.startswith("-"):
+                short_flags = lowered[1:]
+                recursive = recursive or "r" in short_flags
+                force = force or "f" in short_flags
+            if recursive and force:
+                return True
+
+        return False
+
+    def _find_dangerous_command_word(self, command: str) -> str | None:
+        try:
+            lexer = shlex.shlex(command, posix=True, punctuation_chars=";&|()<>")
+        except ValueError:
+            return None
+        lexer.whitespace_split = True
+        lexer.commenters = "#"
+        tokens = list(lexer)
+        command_position = True
+
+        for index, token in enumerate(tokens):
+            if token in self.OPERATOR_TOKENS:
+                command_position = True
+                continue
+
+            if token in {"<", ">", ">>", "<<", "<<<", "<>", ">|", "&>", "2>", "1>"}:
+                command_position = False
+                continue
+
+            if not command_position:
+                continue
+
+            if self.ENV_ASSIGN_RE.match(token):
+                continue
+
+            if token in self.blocked_base_commands:
+                return token
+
+            if token == "rm" and self._has_dangerous_rm_flags(tokens, index + 1):
+                return "rm -rf"
+
+            command_position = False
+
+        return None
+
     def check_command(self, command: str, context: dict[str, Any]) -> HookResult:
+        stripped = command.strip()
+        if self.ANSI_C_QUOTE_RE.search(stripped) or self.LOCALE_QUOTE_RE.search(stripped):
+            return HookResult.block_command(
+                error_message=(
+                    f"❌ SECURITY ERROR: Dangerous command detected\n"
+                    f"   Command: {command[:100]}\n"
+                    f"   Reason: Obfuscated shell quoting is blocked for security reasons\n"
+                    f"   Pattern: raw_obfuscation:$quote\n"
+                    f"   💡 If you need to perform this operation, ask the user for permission."
+                )
+            )
+
+        dangerous_word = self._find_dangerous_command_word(stripped)
+        if dangerous_word is not None:
+            return HookResult.block_command(
+                error_message=(
+                    f"❌ SECURITY ERROR: Dangerous command detected\n"
+                    f"   Command: {command[:100]}\n"
+                    f"   Reason: This command is blocked for security reasons\n"
+                    f"   Pattern: command_word:{dangerous_word}\n"
+                    f"   💡 If you need to perform this operation, ask the user for permission."
+                )
+            )
+
+        scanned = self._unquoted_command(stripped)
         for pattern in self.compiled_patterns:
-            if pattern.search(command.strip()):
+            if pattern.search(scanned):
                 return HookResult.block_command(
                     error_message=(
                         f"❌ SECURITY ERROR: Dangerous command detected\n"
diff --git a/core/tools/command/middleware.py b/core/tools/command/middleware.py
index dcd6453a4..5b4450c34 100644
--- a/core/tools/command/middleware.py
+++ b/core/tools/command/middleware.py
@@ -18,7 +18,7 @@
 
 from sandbox.shell_output import normalize_pty_result
 
-from .base import AsyncCommand, BaseExecutor
+from .base import AsyncCommand, BaseExecutor, describe_execution_exception
 from .dispatcher import get_executor, get_shell_info
 
 logger = logging.getLogger(__name__)
@@ -203,7 +203,7 @@ async def _execute_blocking(self, command_line: str, work_dir: str | None, timeo
                 env=self.env,
             )
         except Exception as e:
-            return f"Error executing command: {e}"
+            return f"Error executing command: {describe_execution_exception(e)}"
         return result.to_tool_result()
 
     def set_agent(self, agent: Any) -> None:
@@ -219,7 +219,7 @@ async def _execute_async(self, command_line: str, work_dir: str | None, timeout:
                 env=self.env,
             )
         except Exception as e:
-            return f"Error starting async command: {e}"
+            return f"Error starting async command: {describe_execution_exception(e)}"
 
         # Emit task_start event
         runtime = getattr(self._agent, "runtime", None) if self._agent else None
diff --git a/core/tools/command/service.py b/core/tools/command/service.py
index 475289b9c..e1927b82b 100644
--- a/core/tools/command/service.py
+++ b/core/tools/command/service.py
@@ -15,11 +15,13 @@
 import asyncio
 import json
 import logging
+from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import Any
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
-from core.tools.command.base import BaseExecutor
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+from core.runtime.tool_result import ToolResultEnvelope, tool_permission_denied
+from core.tools.command.base import BaseExecutor, describe_execution_exception
 from core.tools.command.dispatcher import get_executor
 
 logger = logging.getLogger(__name__)
@@ -61,35 +63,39 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Bash",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Bash",
-                    "description": ("Execute shell command. OS auto-detects shell (mac->zsh, linux->bash, win->powershell)."),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "command": {
-                                "type": "string",
-                                "description": "Command to execute",
-                            },
-                            "description": {
-                                "type": "string",
-                                "description": (
-                                    "Human-readable description of what this command does. "
-                                    "Required when run_in_background is true; shown in the background task indicator."
-                                ),
-                            },
-                            "run_in_background": {
-                                "type": "boolean",
-                                "description": "Run in background (default: false). Returns task ID for status queries.",
-                            },
-                            "timeout": {
-                                "type": "integer",
-                                "description": "Timeout in milliseconds (default: 120000)",
-                            },
+                schema=make_tool_schema(
+                    name="Bash",
+                    description=(
+                        "Execute shell command (zsh on macOS, bash on Linux, PowerShell on Windows). "
+                        "Default timeout 120s (max 600s). Dangerous commands are blocked. "
+                        "Prefer dedicated tools over Bash: Read over cat, Grep over grep/rg, Glob over find/ls, Edit over sed/awk."
+                    ),
+                    properties={
+                        "command": {
+                            "type": "string",
+                            "description": "Command to execute",
+                            "minLength": 1,
+                        },
+                        "description": {
+                            "type": "string",
+                            "description": (
+                                "Human-readable description of what this command does. "
+                                "Required when run_in_background is true; shown in the background task indicator."
+                            ),
+                        },
+                        "run_in_background": {
+                            "type": "boolean",
+                            "description": "Run in background (default: false). Returns task ID for status queries.",
+                        },
+                        "timeout": {
+                            "type": "integer",
+                            "description": "Timeout in milliseconds (default: 120000)",
+                            "minimum": 1,
+                            "maximum": 600000,
                         },
-                        "required": ["command"],
                     },
-                },
+                    required=["command"],
+                ),
                 handler=self._bash,
                 source="CommandService",
             )
@@ -113,10 +119,13 @@ async def _bash(
         description: str = "",
         run_in_background: bool = False,
         timeout: int = DEFAULT_TIMEOUT_MS,
-    ) -> str:
+    ) -> str | ToolResultEnvelope:
         allowed, error_msg = self._check_hooks(command)
         if not allowed:
-            return error_msg
+            return tool_permission_denied(
+                error_msg,
+                metadata={"policy": "command_hook"},
+            )
 
         work_dir = None if self._executor.runtime_owns_cwd else str(self.workspace_root)
         timeout_secs = timeout / 1000.0
@@ -135,7 +144,7 @@ async def _execute_blocking(self, command: str, work_dir: str | None, timeout_se
                 env=self.env,
             )
         except Exception as e:
-            return f"Error executing command: {e}"
+            return f"Error executing command: {describe_execution_exception(e)}"
         return result.to_tool_result()
 
     async def _execute_async(self, command: str, work_dir: str | None, timeout_secs: float, description: str = "") -> str:
@@ -146,7 +155,7 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
                 env=self.env,
             )
         except Exception as e:
-            return f"Error starting async command: {e}"
+            return f"Error starting async command: {describe_execution_exception(e)}"
 
         task_id = async_cmd.command_id
 
@@ -156,7 +165,7 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
             self._background_runs[task_id] = _BashBackgroundRun(async_cmd, command, description=description)
 
         # Build emit_fn for SSE task lifecycle events
-        emit_fn = None
+        emit_fn: Callable[[dict[str, Any]], Awaitable[None] | None] | None = None
         parent_thread_id = None
         try:
             from backend.web.event_bus import get_event_bus
@@ -178,7 +187,7 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
 
         # Emit task_start so the frontend dot lights up immediately
         if emit_fn is not None:
-            await emit_fn(
+            emission = emit_fn(
                 {
                     "event": "task_start",
                     "data": json.dumps(
@@ -193,6 +202,8 @@ async def _execute_async(self, command: str, work_dir: str | None, timeout_secs:
                     ),
                 }
             )
+            if asyncio.iscoroutine(emission):
+                await emission
 
         if parent_thread_id:
             asyncio.create_task(
@@ -207,7 +218,7 @@ async def _notify_bash_completion(
         async_cmd: Any,
         command: str,
         parent_thread_id: str,
-        emit_fn: Any = None,
+        emit_fn: Callable[[dict[str, Any]], Awaitable[None] | None] | None = None,
         description: str = "",
     ) -> None:
         """Poll until async command finishes, then enqueue CommandNotification."""
@@ -220,7 +231,7 @@ async def _notify_bash_completion(
         # Emit task_done so the frontend dot updates in real time
         if emit_fn is not None:
             try:
-                await emit_fn(
+                emission = emit_fn(
                     {
                         "event": "task_done",
                         "data": json.dumps(
@@ -232,6 +243,8 @@ async def _notify_bash_completion(
                         ),
                     }
                 )
+                if asyncio.iscoroutine(emission):
+                    await emission
             except Exception:
                 pass
 
diff --git a/core/tools/cron/service.py b/core/tools/cron/service.py
new file mode 100644
index 000000000..026c7d9be
--- /dev/null
+++ b/core/tools/cron/service.py
@@ -0,0 +1,102 @@
+"""CronToolService — agent-callable cron job CRUD on top of existing backend service."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from croniter import croniter
+
+from backend.web.services import cron_job_service
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+
+CRON_CREATE_SCHEMA = make_tool_schema(
+    name="CronCreate",
+    description="Create a cron job using the existing Mycel cron_jobs substrate.",
+    properties={
+        "name": {"type": "string", "description": "Human-readable cron job name", "minLength": 1},
+        "cron_expression": {
+            "type": "string",
+            "description": "Standard 5-field cron expression",
+            "minLength": 1,
+        },
+        "description": {"type": "string", "description": "Optional cron job description"},
+        "task_template": {
+            "type": "string",
+            "description": "JSON string template used when the cron job creates a task",
+        },
+        "enabled": {"type": "boolean", "description": "Whether the cron job starts enabled"},
+    },
+    required=["name", "cron_expression"],
+)
+
+CRON_DELETE_SCHEMA = make_tool_schema(
+    name="CronDelete",
+    description="Delete a cron job by ID.",
+    properties={
+        "job_id": {"type": "string", "description": "Cron job ID returned by CronCreate", "minLength": 1},
+    },
+    required=["job_id"],
+)
+
+CRON_LIST_SCHEMA = make_tool_schema(
+    name="CronList",
+    description="List all cron jobs in the current Mycel cron_jobs substrate.",
+    properties={},
+)
+
+
+class CronToolService:
+    def __init__(self, registry: ToolRegistry):
+        self._register(registry)
+
+    def _register(self, registry: ToolRegistry) -> None:
+        for name, schema, handler, read_only in [
+            ("CronCreate", CRON_CREATE_SCHEMA, self._create, False),
+            ("CronDelete", CRON_DELETE_SCHEMA, self._delete, False),
+            ("CronList", CRON_LIST_SCHEMA, self._list, True),
+        ]:
+            registry.register(
+                ToolEntry(
+                    name=name,
+                    mode=ToolMode.DEFERRED,
+                    schema=schema,
+                    handler=handler,
+                    source="CronToolService",
+                    is_concurrency_safe=read_only,
+                    is_read_only=read_only,
+                )
+            )
+
+    def _create(self, **args: Any) -> str:
+        name = str(args.get("name", "")).strip()
+        cron_expression = str(args.get("cron_expression", "")).strip()
+        if not croniter.is_valid(cron_expression):
+            raise ValueError(f"Invalid cron expression: {cron_expression!r}")
+
+        task_template = args.get("task_template", "{}")
+        if isinstance(task_template, str):
+            try:
+                json.loads(task_template)
+            except json.JSONDecodeError as exc:
+                raise ValueError("task_template must be valid JSON") from exc
+
+        item = cron_job_service.create_cron_job(
+            name=name,
+            cron_expression=cron_expression,
+            description=str(args.get("description", "")),
+            task_template=task_template,
+            enabled=int(bool(args.get("enabled", True))),
+        )
+        return json.dumps({"item": item}, ensure_ascii=False, indent=2)
+
+    def _delete(self, **args: Any) -> str:
+        job_id = str(args.get("job_id", "")).strip()
+        ok = cron_job_service.delete_cron_job(job_id)
+        if not ok:
+            raise ValueError(f"Cron job not found: {job_id}")
+        return json.dumps({"ok": True, "id": job_id}, ensure_ascii=False, indent=2)
+
+    def _list(self, **_args: Any) -> str:
+        items = cron_job_service.list_cron_jobs()
+        return json.dumps({"items": items, "total": len(items)}, ensure_ascii=False, indent=2)
diff --git a/core/tools/filesystem/local_backend.py b/core/tools/filesystem/local_backend.py
index 2bad2d45b..50bbe58a0 100644
--- a/core/tools/filesystem/local_backend.py
+++ b/core/tools/filesystem/local_backend.py
@@ -18,14 +18,16 @@ class LocalBackend(FileSystemBackend):
 
     def read_file(self, path: str) -> FileReadResult:
         p = Path(path)
-        content = p.read_text(encoding="utf-8")
+        with p.open("r", encoding="utf-8", newline="") as f:
+            content = f.read()
         return FileReadResult(content=content, size=p.stat().st_size)
 
     def write_file(self, path: str, content: str) -> FileWriteResult:
         try:
             p = Path(path)
             p.parent.mkdir(parents=True, exist_ok=True)
-            p.write_text(content, encoding="utf-8")
+            with p.open("w", encoding="utf-8", newline="") as f:
+                f.write(content)
             return FileWriteResult(success=True)
         except Exception as e:
             return FileWriteResult(success=False, error=str(e))
diff --git a/core/tools/filesystem/middleware.py b/core/tools/filesystem/middleware.py
index 0844d892a..ff31d0c1c 100644
--- a/core/tools/filesystem/middleware.py
+++ b/core/tools/filesystem/middleware.py
@@ -14,7 +14,7 @@
 from __future__ import annotations
 
 from collections.abc import Awaitable, Callable
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from typing import TYPE_CHECKING, Any
 
 from langchain.agents.middleware.types import (
@@ -33,6 +33,13 @@
     from core.operations import FileOperationRecorder
 
 
+def _remote_path(path: str | Path) -> PurePosixPath:
+    # @@@remote-posix-path-contract - Middleware callers still hand us sandbox
+    # POSIX paths even when tests run on Windows, so keep validation and
+    # workspace comparisons in POSIX space instead of host-native path rules.
+    return PurePosixPath(str(path).replace("\\", "/"))
+
+
 class FileSystemMiddleware(AgentMiddleware):
     """FileSystem Middleware - pure middleware implementation of file operations.
 
@@ -80,7 +87,7 @@ def __init__(
             backend = LocalBackend()
 
         self.backend = backend
-        self.workspace_root = Path(workspace_root) if backend.is_remote else Path(workspace_root).resolve()
+        self.workspace_root = _remote_path(workspace_root) if backend.is_remote else Path(workspace_root).resolve()
         self.max_file_size = max_file_size
         self.allowed_extensions = allowed_extensions
         self.hooks = hooks or []
@@ -91,10 +98,10 @@ def __init__(
             "multi_edit": True,
             "list_dir": True,
         }
-        self._read_files: dict[Path, float | None] = {}
+        self._read_files: dict[Path | PurePosixPath, float | None] = {}
         self.operation_recorder = operation_recorder
         self.verbose = verbose
-        self.extra_allowed_paths: list[Path] = [Path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
+        self.extra_allowed_paths = [_remote_path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
 
         if not backend.is_remote:
             self.workspace_root.mkdir(parents=True, exist_ok=True)
@@ -105,17 +112,20 @@ def __init__(
             if self.hooks:
                 print(f"[FileSystemMiddleware] Loaded {len(self.hooks)} hooks")
 
-    def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | None]:
+    def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | PurePosixPath | None]:
         """Validate path for file operations.
 
         Returns:
             (is_valid, error_message, resolved_path)
         """
-        if not Path(path).is_absolute():
+        if self.backend.is_remote:
+            if not _remote_path(path).is_absolute():
+                return False, f"Path must be absolute: {path}", None
+        elif not Path(path).is_absolute():
             return False, f"Path must be absolute: {path}", None
 
         try:
-            resolved = Path(path) if self.backend.is_remote else Path(path).resolve()
+            resolved = _remote_path(path) if self.backend.is_remote else Path(path).resolve()
         except Exception as e:
             return False, f"Invalid path: {path} ({e})", None
 
@@ -146,7 +156,7 @@ def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | N
 
         return True, "", resolved
 
-    def _check_file_staleness(self, resolved: Path) -> str | None:
+    def _check_file_staleness(self, resolved: Path | PurePosixPath) -> str | None:
         """Check if file has been modified since last read.
 
         Returns:
@@ -165,7 +175,7 @@ def _check_file_staleness(self, resolved: Path) -> str | None:
 
         return None
 
-    def _update_file_tracking(self, resolved: Path) -> None:
+    def _update_file_tracking(self, resolved: Path | PurePosixPath) -> None:
         """Update mtime tracking after successful file operation."""
         self._read_files[resolved] = self.backend.file_mtime(str(resolved))
 
@@ -203,7 +213,7 @@ def _record_operation(
         except Exception as e:
             raise RuntimeError(f"[FileSystemMiddleware] Failed to record operation: {e}") from e
 
-    def _count_lines(self, resolved: Path) -> int:
+    def _count_lines(self, resolved: Path | PurePosixPath) -> int:
         """Count total lines in a file (for error messages)."""
         try:
             raw = self.backend.read_file(str(resolved))
@@ -571,12 +581,12 @@ def _get_tool_schemas(self) -> list[dict]:
                     "parameters": {
                         "type": "object",
                         "properties": {
-                            "directory_path": {
+                            "path": {
                                 "type": "string",
                                 "description": "Absolute directory path (e.g., /path/to/dir). Do NOT use '.' or '..'",
                             },
                         },
-                        "required": ["directory_path"],
+                        "required": ["path"],
                     },
                 },
             },
@@ -633,7 +643,7 @@ def _handle_tool_call(self, tool_call: dict) -> ToolMessage | None:
             return ToolMessage(content=result, tool_call_id=tool_call_id)
 
         if tool_name == self.TOOL_LIST_DIR:
-            result = self._list_dir_impl(directory_path=args.get("directory_path", ""))
+            result = self._list_dir_impl(directory_path=args.get("path", ""))
             return ToolMessage(content=result, tool_call_id=tool_call_id)
 
         return None
diff --git a/core/tools/filesystem/read/dispatcher.py b/core/tools/filesystem/read/dispatcher.py
index f880e60e1..0119f424e 100644
--- a/core/tools/filesystem/read/dispatcher.py
+++ b/core/tools/filesystem/read/dispatcher.py
@@ -22,6 +22,7 @@ def read_file(
     limits: ReadLimits | None = None,
     offset: int | None = None,
     limit: int | None = None,
+    pages: str | None = None,
 ) -> ReadResult:
     """
     Read file with type-specific handling.
@@ -38,6 +39,7 @@ def read_file(
         limits: ReadLimits configuration (uses defaults if None)
         offset: Start line for text files (1-indexed)
         limit: Number of lines for text files
+        pages: Optional page range for document files, e.g. "1" or "3-5"
 
     Returns:
         ReadResult with content and metadata
@@ -68,7 +70,8 @@ def read_file(
         return read_binary(path)
 
     if file_type == FileType.DOCUMENT:
-        return _read_document(path, limits, offset, limit)
+        start_page, limit_pages = _parse_pages_arg(pages, offset, limit)
+        return _read_document(path, limits, start_page, limit_pages)
 
     if file_type == FileType.NOTEBOOK:
         return read_notebook(path, limits, start_cell=offset, limit_cells=limit)
@@ -79,6 +82,32 @@ def read_file(
     return read_text(path, limits, offset, limit)
 
 
+def _parse_pages_arg(
+    pages: str | None,
+    offset: int | None,
+    limit: int | None,
+) -> tuple[int | None, int | None]:
+    if pages is None:
+        return offset, limit
+
+    raw = pages.strip()
+    if not raw:
+        raise ValueError("pages must not be empty")
+
+    if "-" in raw:
+        start_raw, end_raw = raw.split("-", 1)
+        start_page = int(start_raw)
+        end_page = int(end_raw)
+        if start_page <= 0 or end_page < start_page:
+            raise ValueError(f"Invalid pages range: {pages}")
+        return start_page, end_page - start_page + 1
+
+    start_page = int(raw)
+    if start_page <= 0:
+        raise ValueError(f"Invalid page number: {pages}")
+    return start_page, 1
+
+
 def _read_document(
     path: Path,
     limits: ReadLimits,
diff --git a/core/tools/filesystem/service.py b/core/tools/filesystem/service.py
index a8cf1c9c6..ecfa0b7c5 100644
--- a/core/tools/filesystem/service.py
+++ b/core/tools/filesystem/service.py
@@ -10,18 +10,90 @@
 from __future__ import annotations
 
 import logging
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+import tempfile
+import threading
+from collections import OrderedDict
+from collections.abc import Sequence
+from dataclasses import dataclass
+from pathlib import Path, PurePosixPath
+from typing import TYPE_CHECKING, Any, Literal
+
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+from core.runtime.tool_result import ToolResultEnvelope, tool_success
 from core.tools.filesystem.backend import FileSystemBackend
 from core.tools.filesystem.read import ReadLimits
 from core.tools.filesystem.read import read_file as read_file_dispatch
+from core.tools.filesystem.read.readers.binary import IMAGE_EXTENSIONS, MAX_IMAGE_SIZE
+from core.tools.filesystem.read.types import FileType, detect_file_type
 
 if TYPE_CHECKING:
     from core.operations import FileOperationRecorder
 
 logger = logging.getLogger(__name__)
+DEFAULT_READ_STATE_CACHE_SIZE = 100
+ABSOLUTE_PATH_PATTERN = r"^(?:/|[A-Za-z]:[\\/])"
+type ResolvedPath = Path | PurePosixPath
+type ValidationResult = tuple[Literal[True], str, ResolvedPath] | tuple[Literal[False], str, None]
+
+
+def _remote_path(path: str | Path) -> PurePosixPath:
+    # @@@remote-posix-path-contract - Remote filesystem tools operate on sandbox
+    # POSIX paths, not host-native paths. Preserve forward-slash semantics even
+    # when the host process is running on Windows.
+    return PurePosixPath(str(path).replace("\\", "/"))
+
+
+@dataclass
+class _ReadFileState:
+    timestamp: float | None
+    is_partial: bool
+
+
+class _ReadFileStateCache:
+    def __init__(self, max_entries: int = DEFAULT_READ_STATE_CACHE_SIZE):
+        self._max_entries = max_entries
+        self._entries: OrderedDict[ResolvedPath, _ReadFileState] = OrderedDict()
+
+    @staticmethod
+    def make_state(*, timestamp: float | None, is_partial: bool) -> _ReadFileState:
+        return _ReadFileState(timestamp=timestamp, is_partial=is_partial)
+
+    def get(self, path: ResolvedPath) -> _ReadFileState | None:
+        state = self._entries.get(path)
+        if state is None:
+            return None
+        self._entries.move_to_end(path)
+        return state
+
+    def set(self, path: ResolvedPath, state: _ReadFileState) -> None:
+        self._entries[path] = state
+        self._entries.move_to_end(path)
+        while len(self._entries) > self._max_entries:
+            self._entries.popitem(last=False)
+
+    def clone(self) -> _ReadFileStateCache:
+        clone = _ReadFileStateCache(max_entries=self._max_entries)
+        clone._entries = OrderedDict(
+            (path, _ReadFileState(timestamp=state.timestamp, is_partial=state.is_partial)) for path, state in self._entries.items()
+        )
+        return clone
+
+    def merge(self, other: _ReadFileStateCache) -> None:
+        for path, incoming in other._entries.items():
+            existing = self._entries.get(path)
+            if existing is None or self._is_newer(incoming, existing):
+                self.set(
+                    path,
+                    _ReadFileState(timestamp=incoming.timestamp, is_partial=incoming.is_partial),
+                )
+
+    @staticmethod
+    def _is_newer(incoming: _ReadFileState, existing: _ReadFileState) -> bool:
+        if incoming.timestamp is None:
+            return False
+        if existing.timestamp is None:
+            return True
+        return incoming.timestamp >= existing.timestamp
 
 
 class FileSystemService:
@@ -37,7 +109,9 @@ def __init__(
         hooks: list[Any] | None = None,
         operation_recorder: FileOperationRecorder | None = None,
         backend: FileSystemBackend | None = None,
-        extra_allowed_paths: list[str | Path] | None = None,
+        extra_allowed_paths: Sequence[str | Path] | None = None,
+        max_read_cache_entries: int = DEFAULT_READ_STATE_CACHE_SIZE,
+        max_edit_file_size: int | None = None,
     ):
         if backend is None:
             from core.tools.filesystem.local_backend import LocalBackend
@@ -45,15 +119,17 @@ def __init__(
             backend = LocalBackend()
 
         self.backend = backend
-        self.workspace_root = Path(workspace_root) if backend.is_remote else Path(workspace_root).resolve()
+        self.workspace_root: ResolvedPath = _remote_path(workspace_root) if backend.is_remote else Path(workspace_root).resolve()
         self.max_file_size = max_file_size
         self.allowed_extensions = allowed_extensions
         self.hooks = hooks or []
-        self._read_files: dict[Path, float | None] = {}
+        self._read_files = _ReadFileStateCache(max_entries=max_read_cache_entries)
+        self.max_edit_file_size = max_file_size if max_edit_file_size is None else max_edit_file_size
         self.operation_recorder = operation_recorder
-        self.extra_allowed_paths: list[Path] = [Path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
+        self.extra_allowed_paths = [_remote_path(p) if backend.is_remote else Path(p).resolve() for p in (extra_allowed_paths or [])]
+        self._edit_critical_section = threading.Lock()
 
-        if not backend.is_remote:
+        if not backend.is_remote and isinstance(self.workspace_root, Path):
             self.workspace_root.mkdir(parents=True, exist_ok=True)
 
         self._register(registry)
@@ -67,30 +143,42 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Read",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Read",
-                    "description": ("Read file content (text/code/images/PDF/PPTX/Notebook). Path must be absolute."),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {
-                                "type": "string",
-                                "description": "Absolute file path",
-                            },
-                            "offset": {
-                                "type": "integer",
-                                "description": "Start line (1-indexed, optional)",
-                            },
-                            "limit": {
-                                "type": "integer",
-                                "description": "Number of lines to read (optional)",
-                            },
+                schema=make_tool_schema(
+                    name="Read",
+                    description=(
+                        "Read file content. Output uses cat -n format (line numbers starting at 1). "
+                        "Default reads up to 2000 lines from start; use offset/limit for long files. "
+                        "Supports images (PNG/JPG), PDF (use pages param for large PDFs), and Jupyter notebooks. "
+                        "Path must be absolute."
+                    ),
+                    properties={
+                        "file_path": {
+                            "type": "string",
+                            "description": "Absolute file path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Start line (1-indexed, optional)",
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Number of lines to read (optional)",
+                        },
+                        "pages": {
+                            "type": "string",
+                            "description": "Page range for PDF files (e.g. '1-5'). Max 20 pages per request.",
                         },
-                        "required": ["file_path"],
                     },
-                },
+                    required=["file_path"],
+                ),
                 handler=self._read_file,
+                validate_input=self._validate_read_args,
                 source="FileSystemService",
+                search_hint="read view file content text code image PDF notebook",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -98,26 +186,27 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Write",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Write",
-                    "description": "Create new file. Path must be absolute. Fails if file exists.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {
-                                "type": "string",
-                                "description": "Absolute file path",
-                            },
-                            "content": {
-                                "type": "string",
-                                "description": "File content",
-                            },
+                schema=make_tool_schema(
+                    name="Write",
+                    description="Create or overwrite a file with full content. Forces LF line endings. Path must be absolute.",
+                    properties={
+                        "file_path": {
+                            "type": "string",
+                            "description": "Absolute file path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": "File content",
                         },
-                        "required": ["file_path", "content"],
                     },
-                },
+                    required=["file_path", "content"],
+                ),
                 handler=self._write_file,
+                validate_input=self._validate_write_args,
                 source="FileSystemService",
+                search_hint="create new file write content to disk",
             )
         )
 
@@ -125,39 +214,39 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Edit",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Edit",
-                    "description": (
-                        "Edit existing file using exact string replacement. "
-                        "MUST read file before editing. "
-                        "old_string must be unique in file. "
-                        "Set replace_all=true to replace all occurrences."
+                schema=make_tool_schema(
+                    name="Edit",
+                    description=(
+                        "Edit file via exact string replacement. You MUST Read the file first. "
+                        "old_string must match exactly one location (or use replace_all=true). "
+                        "Does not support .ipynb files (use Write to overwrite full JSON). Path must be absolute."
                     ),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "file_path": {
-                                "type": "string",
-                                "description": "Absolute file path",
-                            },
-                            "old_string": {
-                                "type": "string",
-                                "description": "Exact string to replace",
-                            },
-                            "new_string": {
-                                "type": "string",
-                                "description": "Replacement string",
-                            },
-                            "replace_all": {
-                                "type": "boolean",
-                                "description": "Replace all occurrences (default: false)",
-                            },
+                    properties={
+                        "file_path": {
+                            "type": "string",
+                            "description": "Absolute file path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
+                        },
+                        "old_string": {
+                            "type": "string",
+                            "description": "Exact string to replace",
+                        },
+                        "new_string": {
+                            "type": "string",
+                            "description": "Replacement string",
+                        },
+                        "replace_all": {
+                            "type": "boolean",
+                            "description": "Replace all occurrences (default: false)",
                         },
-                        "required": ["file_path", "old_string", "new_string"],
                     },
-                },
+                    required=["file_path", "old_string", "new_string"],
+                ),
                 handler=self._edit_file,
+                validate_input=self._validate_edit_args,
                 source="FileSystemService",
+                search_hint="edit modify replace string in existing file",
             )
         )
 
@@ -165,22 +254,25 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="list_dir",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "list_dir",
-                    "description": "List directory contents. Path must be absolute.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "directory_path": {
-                                "type": "string",
-                                "description": "Absolute directory path",
-                            },
+                schema=make_tool_schema(
+                    name="list_dir",
+                    description="List directory contents (files and subdirectories, non-recursive). Path must be absolute.",
+                    properties={
+                        "path": {
+                            "type": "string",
+                            "description": "Absolute directory path",
+                            "minLength": 1,
+                            "pattern": ABSOLUTE_PATH_PATTERN,
                         },
-                        "required": ["directory_path"],
                     },
-                },
+                    required=["path"],
+                ),
                 handler=self._list_dir,
+                validate_input=self._validate_list_dir_args,
                 source="FileSystemService",
+                search_hint="list directory contents browse folder",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -188,12 +280,15 @@ def _register(self, registry: ToolRegistry) -> None:
     # Path validation (reused from middleware)
     # ------------------------------------------------------------------
 
-    def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | None]:
-        if not Path(path).is_absolute():
+    def _validate_path(self, path: str, operation: str) -> ValidationResult:
+        if self.backend.is_remote:
+            if not _remote_path(path).is_absolute():
+                return False, f"Path must be absolute: {path}", None
+        elif not Path(path).is_absolute():
             return False, f"Path must be absolute: {path}", None
 
         try:
-            resolved = Path(path) if self.backend.is_remote else Path(path).resolve()
+            resolved = _remote_path(path) if self.backend.is_remote else Path(path).resolve()
         except Exception as e:
             return False, f"Invalid path: {path} ({e})", None
 
@@ -224,10 +319,159 @@ def _validate_path(self, path: str, operation: str) -> tuple[bool, str, Path | N
 
         return True, "", resolved
 
-    def _check_file_staleness(self, resolved: Path) -> str | None:
-        if resolved not in self._read_files:
-            return "File has not been read yet. Read it first before writing to it."
-        stored_mtime = self._read_files[resolved]
+    def _validation_error(self, message: str, error_code: str) -> dict[str, object]:
+        return {
+            "result": False,
+            "message": message,
+            "errorCode": error_code,
+        }
+
+    def _path_validation_error(self, message: str) -> dict[str, object]:
+        # @@@filesystem-validation-codes - Keep the pre-execution path failure
+        # mapping centralized so the runner can surface stable structured
+        # codes instead of ad-hoc handler strings on the highest-traffic tools.
+        if message.startswith("Path must be absolute:"):
+            return self._validation_error(message, "PATH_NOT_ABSOLUTE")
+        if message.startswith("Invalid path:"):
+            return self._validation_error(message, "INVALID_PATH")
+        if message.startswith("Path outside workspace"):
+            return self._validation_error(message, "PATH_OUTSIDE_WORKSPACE")
+        if message.startswith("File type not allowed:"):
+            return self._validation_error(message, "FILE_TYPE_NOT_ALLOWED")
+        return self._validation_error(message, "INVALID_PATH")
+
+    def _validate_existing_path(self, path: str, operation: str) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        is_valid, error, resolved = self._validate_path(path, operation)
+        if not is_valid:
+            return self._path_validation_error(error), None
+        assert resolved is not None
+        return None, resolved
+
+    def _validation_message(self, error: dict[str, object]) -> str:
+        return str(error["message"])
+
+    def _read_preflight(
+        self,
+        *,
+        file_path: str,
+        offset: int = 0,
+        limit: int | None = None,
+        pages: str | None = None,
+    ) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        error, resolved = self._validate_existing_path(file_path, "read")
+        if error is not None:
+            return error, None
+        assert resolved is not None
+
+        file_size = self.backend.file_size(str(resolved))
+        if file_size is not None and file_size > self.max_file_size:
+            return (
+                self._validation_error(
+                    f"File too large: {file_size:,} bytes (max: {self.max_file_size:,} bytes)",
+                    "FILE_TOO_LARGE",
+                ),
+                None,
+            )
+
+        has_pagination = offset > 0 or limit is not None or pages is not None
+        if not has_pagination and file_size is not None:
+            limits = ReadLimits()
+            if file_size > limits.max_size_bytes:
+                total_lines = self._count_lines(resolved)
+                return (
+                    self._validation_error(
+                        (
+                            f"File content ({file_size:,} bytes) exceeds maximum allowed size ({limits.max_size_bytes:,} bytes).\n"
+                            f"Use offset and limit parameters to read specific sections.\n"
+                            f"Total lines: {total_lines}"
+                        ),
+                        "READ_REQUIRES_PAGINATION",
+                    ),
+                    None,
+                )
+            estimated_tokens = file_size // 4
+            if estimated_tokens > limits.max_tokens:
+                total_lines = self._count_lines(resolved)
+                return (
+                    self._validation_error(
+                        (
+                            f"File content (~{estimated_tokens:,} tokens) exceeds maximum allowed tokens ({limits.max_tokens:,}).\n"
+                            f"Use offset and limit parameters to read specific sections.\n"
+                            f"Total lines: {total_lines}"
+                        ),
+                        "READ_REQUIRES_PAGINATION",
+                    ),
+                    None,
+                )
+
+        return None, resolved
+
+    def _edit_preflight(self, *, file_path: str) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        error, resolved = self._validate_existing_path(file_path, "edit")
+        if error is not None:
+            return error, None
+        assert resolved is not None
+
+        if resolved.suffix.lower() == ".ipynb":
+            return (
+                self._validation_error(
+                    "Notebook files (.ipynb) are not supported by Edit. Use Write to overwrite the full JSON.",
+                    "NOTEBOOK_EDIT_UNSUPPORTED",
+                ),
+                None,
+            )
+
+        file_size = self.backend.file_size(str(resolved))
+        if file_size is not None and file_size > self.max_edit_file_size:
+            return (
+                self._validation_error(
+                    f"File too large for Edit: {file_size:,} bytes (max: {self.max_edit_file_size:,} bytes)",
+                    "FILE_TOO_LARGE",
+                ),
+                None,
+            )
+
+        return None, resolved
+
+    def _list_dir_preflight(self, *, path: str) -> tuple[dict[str, object] | None, ResolvedPath | None]:
+        error, resolved = self._validate_existing_path(path, "list")
+        if error is not None:
+            return error, None
+        assert resolved is not None
+        if not self.backend.is_dir(str(resolved)):
+            if self.backend.file_exists(str(resolved)):
+                return self._validation_error(f"Not a directory: {path}", "NOT_A_DIRECTORY"), None
+            return self._validation_error(f"Directory not found: {path}", "DIRECTORY_NOT_FOUND"), None
+        return None, resolved
+
+    def _validate_read_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._read_preflight(
+            file_path=args["file_path"],
+            offset=args.get("offset") or 0,
+            limit=args.get("limit"),
+            pages=args.get("pages"),
+        )
+        return error or args
+
+    def _validate_write_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._validate_existing_path(args["file_path"], "write")
+        return error or args
+
+    def _validate_edit_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._edit_preflight(file_path=args["file_path"])
+        return error or args
+
+    def _validate_list_dir_args(self, args: dict[str, Any], request: Any) -> dict[str, Any]:
+        error, _ = self._list_dir_preflight(path=args["path"])
+        return error or args
+
+    def _check_file_staleness(self, resolved: ResolvedPath) -> str | None:
+        state = self._read_files.get(resolved)
+        if state is None:
+            return "File has not been read yet. Read the full file first before editing."
+        if state.is_partial:
+            return "File has only been read partially. Read the full file before editing."
+        stored_mtime = state.timestamp
         if stored_mtime is None:
             return None
         current_mtime = self.backend.file_mtime(str(resolved))
@@ -235,8 +479,70 @@ def _check_file_staleness(self, resolved: Path) -> str | None:
             return "File has been modified since last read. Read it again before editing."
         return None
 
-    def _update_file_tracking(self, resolved: Path) -> None:
-        self._read_files[resolved] = self.backend.file_mtime(str(resolved))
+    def _update_file_tracking(
+        self,
+        resolved: ResolvedPath,
+        *,
+        is_partial: bool,
+        file_type: FileType | None = None,
+    ) -> None:
+        if file_type is None:
+            file_type = self._detect_file_type(resolved)
+        if file_type not in {FileType.TEXT, FileType.NOTEBOOK}:
+            return
+        self._read_files.set(
+            resolved,
+            _ReadFileState(
+                timestamp=self.backend.file_mtime(str(resolved)),
+                is_partial=is_partial,
+            ),
+        )
+
+    def _normalize_write_content(self, content: str) -> str:
+        return content.replace("\r\n", "\n").replace("\r", "\n")
+
+    def _read_result_is_partial(self, result) -> bool:
+        if getattr(result, "truncated", False):
+            return True
+        if getattr(result, "file_type", None) == FileType.TEXT:
+            start_line = getattr(result, "start_line", None) or 1
+            total_lines = getattr(result, "total_lines", None)
+            end_line = getattr(result, "end_line", None) or total_lines or start_line
+            if total_lines is not None:
+                return start_line > 1 or end_line < total_lines
+        return False
+
+    def _detect_file_type(self, resolved: ResolvedPath) -> FileType:
+        return detect_file_type(Path(str(resolved)))
+
+    def _structured_media_success(
+        self,
+        *,
+        resolved: ResolvedPath,
+        file_type: FileType,
+        content_blocks: list[dict[str, str]],
+    ) -> ToolResultEnvelope:
+        return tool_success(
+            [
+                {
+                    "type": "text",
+                    "text": (f"Read file: {resolved.name}\nSpecial content is attached below as structured blocks."),
+                },
+                *content_blocks,
+            ],
+            metadata={"file_type": file_type.value},
+        )
+
+    def _restore_special_result_identity(
+        self,
+        *,
+        result,
+        resolved: ResolvedPath,
+        temp_path: Path,
+    ) -> None:
+        result.file_path = str(resolved)
+        if isinstance(getattr(result, "content", None), str):
+            result.content = result.content.replace(str(temp_path), str(resolved)).replace(temp_path.name, resolved.name)
 
     def _record_operation(
         self,
@@ -267,7 +573,7 @@ def _record_operation(
         except Exception as e:
             raise RuntimeError(f"[FileSystemService] Failed to record operation: {e}") from e
 
-    def _count_lines(self, resolved: Path) -> int:
+    def _count_lines(self, resolved: ResolvedPath) -> int:
         try:
             raw = self.backend.read_file(str(resolved))
             return raw.content.count("\n") + 1
@@ -278,50 +584,86 @@ def _count_lines(self, resolved: Path) -> int:
     # Tool handlers
     # ------------------------------------------------------------------
 
-    def _read_file(self, file_path: str, offset: int = 0, limit: int | None = None) -> str:
-        is_valid, error, resolved = self._validate_path(file_path, "read")
-        if not is_valid:
-            return error
-
-        file_size = self.backend.file_size(str(resolved))
-
-        if file_size is not None and file_size > self.max_file_size:
-            return f"File too large: {file_size:,} bytes (max: {self.max_file_size:,} bytes)"
-
-        has_pagination = offset > 0 or limit is not None
-        if not has_pagination and file_size is not None:
-            limits = ReadLimits()
-            if file_size > limits.max_size_bytes:
-                total_lines = self._count_lines(resolved)
-                return (
-                    f"File content ({file_size:,} bytes) exceeds maximum allowed size ({limits.max_size_bytes:,} bytes).\n"
-                    f"Use offset and limit parameters to read specific sections.\n"
-                    f"Total lines: {total_lines}"
-                )
-            estimated_tokens = file_size // 4
-            if estimated_tokens > limits.max_tokens:
-                total_lines = self._count_lines(resolved)
-                return (
-                    f"File content (~{estimated_tokens:,} tokens) exceeds maximum allowed tokens ({limits.max_tokens:,}).\n"
-                    f"Use offset and limit parameters to read specific sections.\n"
-                    f"Total lines: {total_lines}"
-                )
+    def _read_file(self, file_path: str, offset: int = 0, limit: int | None = None, pages: str | None = None) -> str | ToolResultEnvelope:
+        error, resolved = self._read_preflight(
+            file_path=file_path,
+            offset=offset,
+            limit=limit,
+            pages=pages,
+        )
+        if error is not None:
+            return self._validation_message(error)
+        assert resolved is not None
 
         from core.tools.filesystem.local_backend import LocalBackend
 
         if isinstance(self.backend, LocalBackend):
+            assert isinstance(resolved, Path)
             limits = ReadLimits()
             result = read_file_dispatch(
                 path=resolved,
                 limits=limits,
                 offset=offset if offset > 0 else None,
                 limit=limit,
+                pages=pages,
             )
             if not result.error:
-                self._update_file_tracking(resolved)
+                self._update_file_tracking(
+                    resolved,
+                    is_partial=self._read_result_is_partial(result),
+                    file_type=result.file_type,
+                )
+            if result.content_blocks:
+                return self._structured_media_success(
+                    resolved=resolved,
+                    file_type=result.file_type,
+                    content_blocks=result.content_blocks,
+                )
             return result.format_output()
 
         try:
+            file_type = self._detect_file_type(resolved)
+            download_bytes = getattr(self.backend, "download_bytes", None)
+            if callable(download_bytes) and file_type in {FileType.BINARY, FileType.DOCUMENT}:
+                # @@@dt-02-remote-special-file-bridge
+                # Remote providers expose raw-byte download hooks. Reuse the
+                # same local dispatcher for binary/document reads instead of
+                # degrading special files into placeholder text.
+                raw_bytes = download_bytes(str(resolved))
+                if not isinstance(raw_bytes, (bytes, bytearray)):
+                    raise TypeError(f"Remote special-file download returned {type(raw_bytes).__name__}, expected bytes.")
+                raw_bytes = bytes(raw_bytes)
+                if (
+                    file_type == FileType.BINARY
+                    and resolved.suffix.lstrip(".").lower() in IMAGE_EXTENSIONS
+                    and len(raw_bytes) > MAX_IMAGE_SIZE
+                ):
+                    return f"Image exceeds size limit: {len(raw_bytes)} bytes"
+                with tempfile.NamedTemporaryFile(suffix=resolved.suffix, delete=False) as tmp:
+                    tmp.write(raw_bytes)
+                    tmp_path = Path(tmp.name)
+                try:
+                    result = read_file_dispatch(
+                        path=tmp_path,
+                        limits=ReadLimits(),
+                        offset=offset if offset > 0 else None,
+                        limit=limit,
+                        pages=pages,
+                    )
+                finally:
+                    tmp_path.unlink(missing_ok=True)
+                self._restore_special_result_identity(
+                    result=result,
+                    resolved=resolved,
+                    temp_path=tmp_path,
+                )
+                if result.content_blocks:
+                    return self._structured_media_success(
+                        resolved=resolved,
+                        file_type=result.file_type,
+                        content_blocks=result.content_blocks,
+                    )
+                return result.format_output()
             raw = self.backend.read_file(str(resolved))
             lines = raw.content.split("\n")
             total_lines = len(lines)
@@ -331,7 +673,10 @@ def _read_file(self, file_path: str, offset: int = 0, limit: int | None = None)
             selected = lines[start:end]
             numbered = [f"{start + i + 1:>6}\t{line}" for i, line in enumerate(selected)]
             content = "\n".join(numbered)
-            self._update_file_tracking(resolved)
+            self._update_file_tracking(
+                resolved,
+                is_partial=start > 0 or end < total_lines,
+            )
             return content
         except Exception as e:
             return f"Error reading file: {e}"
@@ -340,88 +685,102 @@ def _write_file(self, file_path: str, content: str) -> str:
         is_valid, error, resolved = self._validate_path(file_path, "write")
         if not is_valid:
             return error
-
-        if self.backend.file_exists(str(resolved)):
-            return f"File already exists: {file_path}\nUse Edit to modify existing files"
+        assert resolved is not None
 
         try:
-            result = self.backend.write_file(str(resolved), content)
+            normalized = self._normalize_write_content(content)
+            result = self.backend.write_file(str(resolved), normalized)
             if not result.success:
                 return f"Error writing file: {result.error}"
 
-            self._update_file_tracking(resolved)
+            self._update_file_tracking(resolved, is_partial=False)
             self._record_operation(
                 operation_type="write",
                 file_path=file_path,
                 before_content=None,
-                after_content=content,
+                after_content=normalized,
             )
 
-            lines = content.count("\n") + 1
+            lines = normalized.count("\n") + 1
             return f"File created: {file_path}\n   Lines: {lines}\n   Size: {len(content)} bytes"
         except Exception as e:
             return f"Error writing file: {e}"
 
     def _edit_file(self, file_path: str, old_string: str, new_string: str, replace_all: bool = False) -> str:
-        is_valid, error, resolved = self._validate_path(file_path, "edit")
-        if not is_valid:
-            return error
-
-        if not self.backend.file_exists(str(resolved)):
-            return f"File not found: {file_path}"
-
-        staleness_error = self._check_file_staleness(resolved)
-        if staleness_error:
-            return staleness_error
-
-        if old_string == new_string:
-            return "Error: old_string and new_string are identical (no-op edit)"
+        error, resolved = self._edit_preflight(file_path=file_path)
+        if error is not None:
+            return self._validation_message(error)
+        assert resolved is not None
 
         try:
-            raw = self.backend.read_file(str(resolved))
-            content = raw.content
-
-            if old_string not in content:
-                return f"String not found in file\n   Looking for: {old_string[:100]}..."
-
-            if replace_all:
-                count = content.count(old_string)
-                new_content = content.replace(old_string, new_string)
-            else:
-                count = content.count(old_string)
-                if count > 1:
-                    return (
-                        f"String appears {count} times in file (not unique)\n"
-                        f"   Use replace_all=true or provide more context to make it unique"
-                    )
-                new_content = content.replace(old_string, new_string, 1)
-                count = 1
-
-            result = self.backend.write_file(str(resolved), new_content)
-            if not result.success:
-                return f"Error editing file: {result.error}"
-
-            self._update_file_tracking(resolved)
-            self._record_operation(
-                operation_type="edit",
-                file_path=file_path,
-                before_content=content,
-                after_content=new_content,
-                changes=[{"old_string": old_string, "new_string": new_string}],
-            )
-            return f"File edited: {file_path}\n   Replaced {count} occurrence(s)"
+            # @@@edit-critical-lock
+            # dt-01 requires the reread -> stale check -> write path to be one
+            # synchronous critical section so two stale concurrent edits cannot
+            # both commit from the same prior read snapshot.
+            with self._edit_critical_section:
+                try:
+                    raw = self.backend.read_file(str(resolved))
+                except FileNotFoundError:
+                    if old_string == "":
+                        return self._write_file(file_path, new_string)
+                    return f"File not found: {file_path}"
+                content = raw.content
+
+                if old_string == "":
+                    return "Cannot use empty old_string on an existing file. Use Write to replace the full file content."
+                staleness_error = self._check_file_staleness(resolved)
+                if staleness_error:
+                    return staleness_error
+
+                if old_string == new_string:
+                    return "Error: old_string and new_string are identical (no-op edit)"
+
+                # @@@edit-critical-staleness
+                # te-06 needs a second stale-read check inside the read->write
+                # critical section so an external write that lands after the
+                # preflight check cannot be silently overwritten.
+                staleness_error = self._check_file_staleness(resolved)
+                if staleness_error:
+                    return staleness_error
+
+                if old_string not in content:
+                    return f"String not found in file\n   Looking for: {old_string[:100]}..."
+
+                if replace_all:
+                    count = content.count(old_string)
+                    new_content = content.replace(old_string, new_string)
+                else:
+                    count = content.count(old_string)
+                    if count > 1:
+                        return (
+                            f"String appears {count} times in file (not unique)\n"
+                            f"   Use replace_all=true or provide more context to make it unique"
+                        )
+                    new_content = content.replace(old_string, new_string, 1)
+                    count = 1
+
+                result = self.backend.write_file(str(resolved), new_content)
+                if not result.success:
+                    return f"Error editing file: {result.error}"
+
+                self._update_file_tracking(resolved, is_partial=False)
+                self._record_operation(
+                    operation_type="edit",
+                    file_path=file_path,
+                    before_content=content,
+                    after_content=new_content,
+                    changes=[{"old_string": old_string, "new_string": new_string}],
+                )
+                return f"File edited: {file_path}\n   Replaced {count} occurrence(s)"
         except Exception as e:
             return f"Error editing file: {e}"
 
-    def _list_dir(self, directory_path: str) -> str:
-        is_valid, error, resolved = self._validate_path(directory_path, "list")
-        if not is_valid:
-            return error
-
-        if not self.backend.is_dir(str(resolved)):
-            if self.backend.file_exists(str(resolved)):
-                return f"Not a directory: {directory_path}"
-            return f"Directory not found: {directory_path}"
+    def _list_dir(self, path: str) -> str:
+        directory_path = path
+        error, resolved = self._list_dir_preflight(path=directory_path)
+        if error is not None:
+            return self._validation_message(error)
+        assert resolved is not None
 
         try:
             result = self.backend.list_dir(str(resolved))
diff --git a/core/tools/lsp/__init__.py b/core/tools/lsp/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/core/tools/lsp/service.py b/core/tools/lsp/service.py
new file mode 100644
index 000000000..dc480812d
--- /dev/null
+++ b/core/tools/lsp/service.py
@@ -0,0 +1,838 @@
+"""LSP Service - Language Server Protocol code intelligence via multilspy.
+
+Registers a single DEFERRED `LSP` tool with 9 operations:
+  goToDefinition, findReferences, hover, documentSymbol, workspaceSymbol,
+  goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls
+
+Sessions are managed by the process-level _LSPSessionPool singleton — they
+start lazily on first use and persist for the lifetime of the process,
+surviving agent restarts. Call `await lsp_pool.close_all()` on process exit.
+
+Supported languages (via multilspy):
+  python, typescript, javascript, go, rust, java, ruby, kotlin, csharp
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import shutil
+import subprocess
+from pathlib import Path
+from typing import Any
+
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+
+_FILE_SIZE_LIMIT = 10 * 1024 * 1024  # 10 MB — matches CC LSP limit
+
+logger = logging.getLogger(__name__)
+
+LSP_SCHEMA = make_tool_schema(
+    name="LSP",
+    description=(
+        "Language Server Protocol code intelligence. "
+        "Operations: goToDefinition, findReferences, hover, documentSymbol, workspaceSymbol, "
+        "goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls. "
+        "Language servers are auto-downloaded on first use. "
+        "Supports python, typescript, javascript, go, rust, java, ruby, kotlin. "
+        "file_path must be absolute. line/character are 1-based. "
+        "incomingCalls/outgoingCalls require 'item' from prepareCallHierarchy output."
+    ),
+    properties={
+        "operation": {
+            "type": "string",
+            "enum": [
+                "goToDefinition",
+                "findReferences",
+                "hover",
+                "documentSymbol",
+                "workspaceSymbol",
+                "goToImplementation",
+                "prepareCallHierarchy",
+                "incomingCalls",
+                "outgoingCalls",
+            ],
+            "description": "LSP operation to perform",
+        },
+        "file_path": {
+            "type": "string",
+            "description": "Absolute path to file (required for all operations except workspaceSymbol)",
+        },
+        "line": {
+            "type": "integer",
+            "description": "1-based line number (required for goToDefinition, findReferences, hover)",
+        },
+        "character": {
+            "type": "integer",
+            "description": "1-based character offset (required for goToDefinition, findReferences, hover)",
+        },
+        "query": {
+            "type": "string",
+            "description": "Symbol name to search (required for workspaceSymbol)",
+        },
+        "language": {
+            "type": "string",
+            "description": "Language override. Auto-detected from file extension if omitted.",
+        },
+        "item": {
+            "type": "object",
+            "description": "CallHierarchyItem from prepareCallHierarchy (required for incomingCalls/outgoingCalls).",
+        },
+    },
+    required=["operation"],
+)
+
+# File extension → multilspy language identifier
+_EXT_TO_LANG: dict[str, str] = {
+    ".py": "python",
+    ".ts": "typescript",
+    ".tsx": "typescript",
+    ".js": "javascript",
+    ".jsx": "javascript",
+    ".go": "go",
+    ".rs": "rust",
+    ".java": "java",
+    ".rb": "ruby",
+    ".kt": "kotlin",
+    ".cs": "csharp",
+}
+
+
+def _find_pyright() -> str | None:
+    """Locate pyright-langserver: venv-local first, then PATH."""
+    for name in ("pyright-langserver", "pyright_langserver"):
+        # prefer the binary in the same venv as the current interpreter
+        venv_bin = Path(os.__file__).parent.parent.parent / "bin" / name
+        if venv_bin.exists():
+            return str(venv_bin)
+        found = shutil.which(name)
+        if found:
+            return found
+    return None
+
+
+class _PyrightSession:
+    """Minimal asyncio LSP client for pyright-langserver (stdio).
+
+    Used for Python operations not supported by Jedi:
+    goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls.
+
+    Requires pyright in the active venv: pip install pyright
+    """
+
+    def __init__(self, workspace_root: str) -> None:
+        self._workspace_root = workspace_root
+        self._proc: asyncio.subprocess.Process | None = None
+        self._pending: dict[int, asyncio.Future] = {}
+        self._next_id = 1
+        self._reader_task: asyncio.Task | None = None
+        self._open_files: set[str] = set()
+
+    async def start(self) -> None:
+        server = _find_pyright()
+        if not server:
+            raise RuntimeError("pyright-langserver not found. Install with: pip install pyright")
+        self._proc = await asyncio.create_subprocess_exec(
+            server,
+            "--stdio",
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        self._reader_task = asyncio.create_task(self._read_loop(), name="pyright-reader")
+
+        # LSP handshake
+        await self._request(
+            "initialize",
+            {
+                "processId": os.getpid(),
+                "rootUri": Path(self._workspace_root).as_uri(),
+                "capabilities": {
+                    "textDocument": {
+                        "synchronization": {"dynamicRegistration": False},
+                        "implementation": {"dynamicRegistration": False, "linkSupport": True},
+                        "callHierarchy": {"dynamicRegistration": False},
+                    }
+                },
+                "initializationOptions": {},
+            },
+        )
+        self._notify("initialized", {})
+
+    # ── I/O ───────────────────────────────────────────────────────────
+
+    async def _read_loop(self) -> None:
+        try:
+            while True:
+                assert self._proc and self._proc.stdout
+                # Read headers until blank line
+                content_length = 0
+                while True:
+                    raw = await self._proc.stdout.readline()
+                    if not raw:
+                        return
+                    line = raw.decode().rstrip()
+                    if not line:
+                        break
+                    if line.lower().startswith("content-length:"):
+                        content_length = int(line.split(":", 1)[1].strip())
+                if content_length == 0:
+                    continue
+                body = await self._proc.stdout.readexactly(content_length)
+                msg = json.loads(body)
+                # Route response/error to waiting Future
+                msg_id = msg.get("id")
+                msg_method = msg.get("method", "")
+                if msg_id is not None and msg_method:
+                    # Server-to-client request — must acknowledge with a response
+                    self._write({"jsonrpc": "2.0", "id": msg_id, "result": None})
+                    await self._drain()
+                elif msg_id is not None and msg_id in self._pending:
+                    fut = self._pending.pop(msg_id)
+                    if not fut.done():
+                        if "error" in msg:
+                            fut.set_exception(RuntimeError(f"{msg['error'].get('message', 'LSP error')} ({msg['error'].get('code', '')})"))
+                        else:
+                            fut.set_result(msg.get("result"))
+                # All other notifications ($/progress, diagnostics, etc.) are silently dropped
+        except Exception as exc:
+            for fut in self._pending.values():
+                if not fut.done():
+                    fut.set_exception(exc)
+
+    def _write(self, msg: dict) -> None:
+        """Encode and buffer one LSP message (call drain() to flush)."""
+        assert self._proc and self._proc.stdin
+        body = json.dumps(msg, separators=(",", ":")).encode()
+        header = f"Content-Length: {len(body)}\r\n\r\n".encode()
+        self._proc.stdin.write(header + body)
+
+    async def _drain(self) -> None:
+        assert self._proc and self._proc.stdin
+        await self._proc.stdin.drain()
+
+    def _notify(self, method: str, params: Any) -> None:
+        self._write({"jsonrpc": "2.0", "method": method, "params": params})
+
+    async def _request(self, method: str, params: Any, timeout: float = 30.0) -> Any:
+        req_id = self._next_id
+        self._next_id += 1
+        loop = asyncio.get_event_loop()
+        fut: asyncio.Future = loop.create_future()
+        self._pending[req_id] = fut
+        self._write({"jsonrpc": "2.0", "id": req_id, "method": method, "params": params})
+        await self._drain()
+        return await asyncio.wait_for(fut, timeout=timeout)
+
+    # ── file lifecycle ────────────────────────────────────────────────
+
+    def _open_file(self, abs_path: str) -> None:
+        uri = Path(abs_path).as_uri()
+        if uri in self._open_files:
+            return
+        try:
+            text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
+        except OSError:
+            text = ""
+        self._notify("textDocument/didOpen", {"textDocument": {"uri": uri, "languageId": "python", "version": 1, "text": text}})
+        self._open_files.add(uri)
+
+    def _close_file(self, abs_path: str) -> None:
+        uri = Path(abs_path).as_uri()
+        if uri not in self._open_files:
+            return
+        self._notify("textDocument/didClose", {"textDocument": {"uri": uri}})
+        self._open_files.discard(uri)
+
+    def _abs(self, rel_path: str) -> str:
+        return str(Path(self._workspace_root) / rel_path)
+
+    # ── LSP operations ────────────────────────────────────────────────
+
+    async def request_implementation(self, rel_path: str, line: int, col: int) -> list:
+        abs_path = self._abs(rel_path)
+        self._open_file(abs_path)
+        await self._drain()
+        uri = Path(abs_path).as_uri()
+        response = await self._request(
+            "textDocument/implementation",
+            {
+                "textDocument": {"uri": uri},
+                "position": {"line": line, "character": col},
+            },
+        )
+        return self._normalise_locations(response)
+
+    async def request_prepare_call_hierarchy(self, rel_path: str, line: int, col: int) -> list:
+        abs_path = self._abs(rel_path)
+        self._open_file(abs_path)
+        await self._drain()
+        uri = Path(abs_path).as_uri()
+        response = await self._request(
+            "textDocument/prepareCallHierarchy",
+            {
+                "textDocument": {"uri": uri},
+                "position": {"line": line, "character": col},
+            },
+        )
+        # File stays open — callHierarchy/incomingCalls and outgoingCalls may need it
+        return response or []
+
+    async def request_incoming_calls(self, item: dict) -> list:
+        response = await self._request("callHierarchy/incomingCalls", {"item": item})
+        return response or []
+
+    async def request_outgoing_calls(self, item: dict) -> list:
+        response = await self._request("callHierarchy/outgoingCalls", {"item": item})
+        return response or []
+
+    @staticmethod
+    def _normalise_locations(response: Any) -> list:
+        if not response:
+            return []
+        if isinstance(response, dict):
+            response = [response]
+        out = []
+        for loc in response:
+            uri = loc.get("uri") or loc.get("targetUri", "")
+            rng = loc.get("range") or loc.get("targetSelectionRange") or loc.get("targetRange") or {}
+            out.append({"uri": uri, "absolutePath": uri.replace("file://", ""), "range": rng})
+        return out
+
+    # ── shutdown ──────────────────────────────────────────────────────
+
+    async def stop(self) -> None:
+        if self._proc:
+            try:
+                await asyncio.wait_for(self._request("shutdown", {}), timeout=5)
+                self._notify("exit", {})
+            except Exception:
+                pass
+            try:
+                self._proc.terminate()
+                await asyncio.wait_for(self._proc.wait(), timeout=5)
+            except Exception:
+                self._proc.kill()
+        if self._reader_task and not self._reader_task.done():
+            self._reader_task.cancel()
+            try:
+                await self._reader_task
+            except (asyncio.CancelledError, Exception):
+                pass
+
+
+class _LSPSession:
+    """Holds a multilspy LanguageServer alive in a background asyncio task.
+
+    Pattern: start_server() is an async context manager that must stay open
+    for the lifetime of the session. We enter it inside a background Task and
+    use an Event to signal readiness. Stopping sets a second Event that causes
+    the background task to exit the context and shut down the server process.
+    """
+
+    def __init__(self, language: str, workspace_root: str) -> None:
+        self.language = language
+        self._workspace_root = workspace_root
+        self._ready = asyncio.Event()
+        self._stop = asyncio.Event()
+        self._task: asyncio.Task | None = None
+        self._lsp: Any = None
+        self._error: Exception | None = None
+
+    async def start(self) -> None:
+        self._task = asyncio.create_task(self._run(), name=f"lsp-{self.language}")
+        try:
+            await asyncio.wait_for(asyncio.shield(self._ready.wait()), timeout=60)
+        except TimeoutError:
+            raise TimeoutError(f"LSP server for '{self.language}' did not start within 60s")
+        if self._error:
+            raise self._error
+
+    async def _run(self) -> None:
+        try:
+            from multilspy import LanguageServer  # core dep — always available
+            from multilspy.multilspy_config import MultilspyConfig
+            from multilspy.multilspy_logger import MultilspyLogger
+
+            config = MultilspyConfig.from_dict({"code_language": self.language})
+            lsp_logger = MultilspyLogger()
+            self._lsp = LanguageServer.create(config, lsp_logger, self._workspace_root)
+            async with self._lsp.start_server():
+                self._ready.set()
+                await self._stop.wait()
+        except Exception as e:
+            self._error = e
+            self._ready.set()  # unblock any waiters
+            logger.error("[LSPService] %s server error: %s", self.language, e)
+
+    async def stop(self) -> None:
+        self._stop.set()
+        if self._task and not self._task.done():
+            try:
+                await asyncio.wait_for(self._task, timeout=5)
+            except (TimeoutError, asyncio.CancelledError):
+                self._task.cancel()
+                try:
+                    await self._task
+                except asyncio.CancelledError:
+                    pass
+
+    # ── request methods ───────────────────────────────────────────────
+
+    async def request_definition(self, rel_path: str, line: int, col: int) -> list:
+        try:
+            return await self._lsp.request_definition(rel_path, line, col) or []
+        except AssertionError:
+            return []  # multilspy asserts on None response (no definition found)
+
+    async def request_references(self, rel_path: str, line: int, col: int) -> list:
+        try:
+            return await self._lsp.request_references(rel_path, line, col) or []
+        except AssertionError:
+            return []
+
+    async def request_hover(self, rel_path: str, line: int, col: int) -> Any:
+        try:
+            return await self._lsp.request_hover(rel_path, line, col)
+        except AssertionError:
+            return None
+
+    async def request_document_symbols(self, rel_path: str) -> list:
+        try:
+            symbols, _ = await self._lsp.request_document_symbols(rel_path)
+            return symbols or []
+        except AssertionError:
+            return []
+
+    async def request_workspace_symbol(self, query: str) -> list:
+        return await self._lsp.request_workspace_symbol(query) or []
+
+    # ── advanced ops (direct server.send, for servers that support them) ──
+
+    async def request_implementation(self, rel_path: str, line: int, col: int) -> list:
+        abs_uri = Path(self._workspace_root, rel_path).as_uri()
+        with self._lsp.open_file(rel_path):
+            response = await self._lsp.server.send.implementation(
+                {"textDocument": {"uri": abs_uri}, "position": {"line": line, "character": col}}
+            )
+        if not response:
+            return []
+        if isinstance(response, dict):
+            response = [response]
+        out = []
+        for item in response:
+            if "uri" in item and "range" in item:
+                item.setdefault("absolutePath", item["uri"].replace("file://", ""))
+                out.append(item)
+            elif "targetUri" in item:
+                out.append(
+                    {
+                        "uri": item["targetUri"],
+                        "absolutePath": item["targetUri"].replace("file://", ""),
+                        "range": item.get("targetSelectionRange", item.get("targetRange", {})),
+                    }
+                )
+        return out
+
+    async def request_prepare_call_hierarchy(self, rel_path: str, line: int, col: int) -> list:
+        abs_uri = Path(self._workspace_root, rel_path).as_uri()
+        with self._lsp.open_file(rel_path):
+            response = await self._lsp.server.send.prepare_call_hierarchy(
+                {"textDocument": {"uri": abs_uri}, "position": {"line": line, "character": col}}
+            )
+        return response or []
+
+    async def request_incoming_calls(self, item: dict) -> list:
+        response = await self._lsp.server.send.incoming_calls({"item": item})
+        return response or []
+
+    async def request_outgoing_calls(self, item: dict) -> list:
+        response = await self._lsp.server.send.outgoing_calls({"item": item})
+        return response or []
+
+
+class _LSPSessionPool:
+    """Process-level singleton managing LSP sessions across all agent instances.
+
+    Sessions are keyed by (language, workspace_root) and survive agent restarts.
+    Call close_all() once at process exit (e.g. from backend lifespan shutdown).
+    """
+
+    def __init__(self) -> None:
+        # (language, workspace_root) → _LSPSession
+        self._sessions: dict[tuple[str, str], _LSPSession] = {}
+        # workspace_root → _PyrightSession
+        self._pyright: dict[str, _PyrightSession] = {}
+        # In-flight start tasks to prevent duplicate starts under concurrent requests
+        self._starting: dict[tuple[str, str], asyncio.Task] = {}
+        self._starting_pyright: dict[str, asyncio.Task] = {}
+
+    async def get_session(self, language: str, workspace_root: str) -> _LSPSession:
+        key = (language, workspace_root)
+        if key in self._sessions:
+            return self._sessions[key]
+        if key not in self._starting:
+
+            async def _start() -> _LSPSession:
+                logger.info("[LSPPool] starting %s language server (workspace=%s)...", language, workspace_root)
+                s = _LSPSession(language, workspace_root)
+                await s.start()
+                self._sessions[key] = s
+                self._starting.pop(key, None)
+                logger.info("[LSPPool] %s language server ready", language)
+                return s
+
+            self._starting[key] = asyncio.create_task(_start(), name=f"lsp-start-{language}")
+        return await self._starting[key]
+
+    async def get_pyright(self, workspace_root: str) -> _PyrightSession:
+        if workspace_root in self._pyright:
+            return self._pyright[workspace_root]
+        if workspace_root not in self._starting_pyright:
+
+            async def _start() -> _PyrightSession:
+                logger.info("[LSPPool] starting pyright (workspace=%s)...", workspace_root)
+                s = _PyrightSession(workspace_root)
+                await s.start()
+                self._pyright[workspace_root] = s
+                self._starting_pyright.pop(workspace_root, None)
+                logger.info("[LSPPool] pyright ready")
+                return s
+
+            self._starting_pyright[workspace_root] = asyncio.create_task(_start(), name="lsp-start-pyright")
+        return await self._starting_pyright[workspace_root]
+
+    async def close_all(self) -> None:
+        """Stop all running language server processes. Call once at process exit."""
+        for (lang, ws), session in list(self._sessions.items()):
+            try:
+                await session.stop()
+                logger.debug("[LSPPool] stopped %s server (workspace=%s)", lang, ws)
+            except Exception as e:
+                logger.debug("[LSPPool] error stopping %s: %s", lang, e)
+        self._sessions.clear()
+        for ws, session in list(self._pyright.items()):
+            try:
+                await session.stop()
+                logger.debug("[LSPPool] stopped pyright (workspace=%s)", ws)
+            except Exception as e:
+                logger.debug("[LSPPool] error stopping pyright: %s", e)
+        self._pyright.clear()
+
+
+# Process-level singleton — import and use directly
+lsp_pool = _LSPSessionPool()
+
+
+class LSPService:
+    """Registers the LSP tool (DEFERRED) into ToolRegistry.
+
+    Delegates all session management to the process-level lsp_pool singleton.
+    Language servers start lazily on first use and persist across agent restarts.
+    """
+
+    # Operations that Jedi doesn't support — routed to pyright for Python,
+    # or to the native server.send.* for other languages.
+    _ADVANCED_OPS: frozenset[str] = frozenset({"goToImplementation", "prepareCallHierarchy", "incomingCalls", "outgoingCalls"})
+
+    def __init__(self, registry: ToolRegistry, workspace_root: str | Path) -> None:
+        self._workspace_root = str(Path(workspace_root).resolve())
+        registry.register(
+            ToolEntry(
+                name="LSP",
+                mode=ToolMode.DEFERRED,
+                schema=LSP_SCHEMA,
+                handler=self._handle,
+                source="LSPService",
+                search_hint="language server definition references hover symbols go-to",
+                is_read_only=True,
+                is_concurrency_safe=True,
+            )
+        )
+        logger.debug("[LSPService] registered (workspace=%s)", self._workspace_root)
+
+    # ── session management (delegates to process-level pool) ──────────
+
+    async def _get_session(self, language: str) -> _LSPSession:
+        return await lsp_pool.get_session(language, self._workspace_root)
+
+    async def _get_pyright(self) -> _PyrightSession:
+        return await lsp_pool.get_pyright(self._workspace_root)
+
+    def _detect_language(self, file_path: str) -> str | None:
+        return _EXT_TO_LANG.get(Path(file_path).suffix.lower())
+
+    def _to_relative(self, file_path: str) -> str:
+        try:
+            return str(Path(file_path).relative_to(self._workspace_root))
+        except ValueError:
+            return file_path  # fallback: pass as-is
+
+    # ── pre-flight checks ─────────────────────────────────────────────
+
+    @staticmethod
+    def _check_file(file_path: str) -> str | None:
+        """Return error string if file exceeds 10 MB limit, else None."""
+        try:
+            size = Path(file_path).stat().st_size
+        except OSError:
+            return None  # let LSP handle missing file errors
+        if size > _FILE_SIZE_LIMIT:
+            mb = size / (1024 * 1024)
+            return f"File too large ({mb:.1f} MB). LSP file size limit is 10 MB."
+        return None
+
+    def _filter_gitignored(self, locations: list) -> list:
+        """Filter out locations inside gitignored paths (batches of 50, like CC)."""
+        if not locations:
+            return locations
+        abs_paths = [loc.get("absolutePath") or loc.get("uri", "").replace("file://", "") for loc in locations]
+        try:
+            # git check-ignore exits 0 if any path is ignored, 1 if none are
+            result = subprocess.run(
+                ["git", "check-ignore", "--stdin", "-z"],
+                input="\0".join(abs_paths),
+                capture_output=True,
+                text=True,
+                cwd=self._workspace_root,
+                timeout=5,
+            )
+            ignored = set(result.stdout.split("\0")) if result.stdout else set()
+        except Exception:
+            return locations  # on error, return all (fail-open)
+        return [loc for loc, p in zip(locations, abs_paths) if p not in ignored]
+
+    def _filter_gitignored_batched(self, locations: list) -> list:
+        """Run _filter_gitignored in batches of 50 (matches CC batch size)."""
+        out = []
+        for i in range(0, len(locations), 50):
+            out.extend(self._filter_gitignored(locations[i : i + 50]))
+        return out
+
+    async def _filter_gitignored_batched_async(self, locations: list) -> list:
+        return await asyncio.to_thread(self._filter_gitignored_batched, locations)
+
+    # ── output formatters ─────────────────────────────────────────────
+
+    @staticmethod
+    def _fmt_location(loc: Any) -> dict:
+        start = loc.get("range", {}).get("start", {})
+        return {
+            "file": loc.get("absolutePath") or loc.get("uri", ""),
+            "line": start.get("line", 0),
+            "column": start.get("character", 0),
+        }
+
+    @staticmethod
+    def _fmt_hover(result: Any) -> str:
+        contents = result.get("contents", "")
+        if isinstance(contents, dict):
+            return contents.get("value", str(contents))
+        if isinstance(contents, list):
+            parts = []
+            for c in contents:
+                parts.append(c.get("value", str(c)) if isinstance(c, dict) else str(c))
+            return "\n".join(parts)
+        return str(contents)
+
+    @staticmethod
+    def _fmt_symbol(sym: Any) -> dict:
+        loc = sym.get("location") or {}
+        if loc:
+            # SymbolInformation (workspaceSymbol) — location.uri + location.range
+            start = loc.get("range", {}).get("start", {})
+            uri = loc.get("uri", "")
+            file = loc.get("absolutePath") or (uri.replace("file://", "") if uri.startswith("file://") else uri)
+        else:
+            # DocumentSymbol (documentSymbol) — range/selectionRange at top level, no file
+            start = sym.get("selectionRange", sym.get("range", {})).get("start", {})
+            file = ""
+        return {
+            "name": sym.get("name", ""),
+            "kind": sym.get("kind"),
+            "file": file,
+            "line": start.get("line"),
+        }
+
+    @staticmethod
+    def _fmt_call_hierarchy_item(item: Any) -> dict:
+        uri = item.get("uri", "")
+        start = item.get("range", {}).get("start", {})
+        return {
+            "name": item.get("name", ""),
+            "kind": item.get("kind"),
+            "file": uri.replace("file://", "") if uri.startswith("file://") else uri,
+            "line": start.get("line"),
+            "item": item,  # pass-through for incomingCalls/outgoingCalls
+        }
+
+    @staticmethod
+    def _fmt_call_hierarchy_call(call: Any, direction: str) -> dict:
+        item_key = "from" if direction == "incoming" else "to"
+        caller = call.get(item_key, {})
+        uri = caller.get("uri", "")
+        start = caller.get("range", {}).get("start", {})
+        ranges = [r.get("start", {}) for r in call.get(f"{item_key}Ranges", [])]
+        return {
+            "name": caller.get("name", ""),
+            "kind": caller.get("kind"),
+            "file": uri.replace("file://", "") if uri.startswith("file://") else uri,
+            "line": start.get("line"),
+            "call_sites": [{"line": r.get("line"), "column": r.get("character")} for r in ranges],
+            "item": caller,  # pass-through for chaining
+        }
+
+    # ── tool handler ──────────────────────────────────────────────────
+
+    async def _handle(
+        self,
+        operation: str,
+        file_path: str | None = None,
+        line: int | None = None,
+        character: int | None = None,
+        query: str | None = None,
+        language: str | None = None,
+        item: dict | None = None,
+    ) -> str:
+        # Resolve language (incomingCalls/outgoingCalls carry language in item["uri"])
+        lang = language
+        if not lang and file_path:
+            lang = self._detect_language(file_path)
+        if not lang and operation in ("incomingCalls", "outgoingCalls") and item:
+            uri = item.get("uri", "")
+            lang = self._detect_language(uri)
+        if not lang:
+            supported = ", ".join(sorted(set(_EXT_TO_LANG.values())))
+            return f"Cannot detect language. Set 'language' parameter. Supported: {supported}"
+
+        # 10 MB file size guard (matches CC LSP limit)
+        if file_path:
+            err = self._check_file(file_path)
+            if err:
+                return err
+
+        # Python advanced ops → pyright; other languages → multilspy server.send.*
+        use_pyright = lang == "python" and operation in self._ADVANCED_OPS
+
+        pyright: _PyrightSession | None = None
+        session: _LSPSession | None = None
+
+        if use_pyright:
+            try:
+                pyright = await self._get_pyright()
+            except Exception as e:
+                return f"Failed to start pyright language server: {e}"
+        else:
+            try:
+                session = await self._get_session(lang)
+            except Exception as e:
+                return f"Failed to start {lang} language server: {e}"
+
+        rel = self._to_relative(file_path) if file_path else ""
+        # @@@dt-04-lsp-position-contract - CC exposes editor-facing 1-based
+        # positions and converts at the tool boundary. Leon must do the same
+        # or every position-aware operation silently lands one symbol off.
+        zero_line = line - 1 if line is not None else None
+        zero_character = character - 1 if character is not None else None
+
+        try:
+            if operation == "goToDefinition":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "goToDefinition requires: file_path, line, character"
+                assert session is not None
+                results = await session.request_definition(rel, zero_line, zero_character)
+                results = await self._filter_gitignored_batched_async(results)
+                if not results:
+                    return "No definition found."
+                return json.dumps([self._fmt_location(r) for r in results], indent=2)
+
+            elif operation == "findReferences":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "findReferences requires: file_path, line, character"
+                assert session is not None
+                results = await session.request_references(rel, zero_line, zero_character)
+                results = await self._filter_gitignored_batched_async(results)
+                if not results:
+                    return "No references found."
+                return json.dumps([self._fmt_location(r) for r in results], indent=2)
+
+            elif operation == "hover":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "hover requires: file_path, line, character"
+                assert session is not None
+                result = await session.request_hover(rel, zero_line, zero_character)
+                if not result:
+                    return "No hover info."
+                return self._fmt_hover(result)
+
+            elif operation == "documentSymbol":
+                if not file_path:
+                    return "documentSymbol requires: file_path"
+                assert session is not None
+                symbols = await session.request_document_symbols(rel)
+                if not symbols:
+                    return "No symbols found."
+                return json.dumps([self._fmt_symbol(s) for s in symbols], indent=2)
+
+            elif operation == "workspaceSymbol":
+                if not query:
+                    return "workspaceSymbol requires: query"
+                assert session is not None
+                symbols = await session.request_workspace_symbol(query)
+                if not symbols:
+                    return f"No symbols matching '{query}'."
+                return json.dumps([self._fmt_symbol(s) for s in symbols], indent=2)
+
+            elif operation == "goToImplementation":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "goToImplementation requires: file_path, line, character"
+                src = pyright if use_pyright else session
+                assert src is not None
+                results = await src.request_implementation(rel, zero_line, zero_character)
+                results = await self._filter_gitignored_batched_async(results)
+                if not results:
+                    return "No implementation found."
+                return json.dumps([self._fmt_location(r) for r in results], indent=2)
+
+            elif operation == "prepareCallHierarchy":
+                if not file_path or zero_line is None or zero_character is None:
+                    return "prepareCallHierarchy requires: file_path, line, character"
+                src = pyright if use_pyright else session
+                assert src is not None
+                items = await src.request_prepare_call_hierarchy(rel, zero_line, zero_character)
+                if not items:
+                    return "No call hierarchy items found."
+                return json.dumps([self._fmt_call_hierarchy_item(i) for i in items], indent=2)
+
+            elif operation == "incomingCalls":
+                if not item:
+                    return "incomingCalls requires: item (CallHierarchyItem from prepareCallHierarchy)"
+                src = pyright if use_pyright else session
+                assert src is not None
+                calls = await src.request_incoming_calls(item)
+                if not calls:
+                    return "No incoming calls found."
+                return json.dumps([self._fmt_call_hierarchy_call(c, "incoming") for c in calls], indent=2)
+
+            elif operation == "outgoingCalls":
+                if not item:
+                    return "outgoingCalls requires: item (CallHierarchyItem from prepareCallHierarchy)"
+                src = pyright if use_pyright else session
+                assert src is not None
+                calls = await src.request_outgoing_calls(item)
+                if not calls:
+                    return "No outgoing calls found."
+                return json.dumps([self._fmt_call_hierarchy_call(c, "outgoing") for c in calls], indent=2)
+
+            else:
+                return (
+                    f"Unknown operation '{operation}'. "
+                    "Valid: goToDefinition, findReferences, hover, documentSymbol, workspaceSymbol, "
+                    "goToImplementation, prepareCallHierarchy, incomingCalls, outgoingCalls"
+                )
+
+        except Exception as e:
+            logger.exception("[LSPService] operation=%s failed", operation)
+            return f"LSP error: {e}"
diff --git a/core/tools/mcp_resources/service.py b/core/tools/mcp_resources/service.py
new file mode 100644
index 000000000..bf44c2cbc
--- /dev/null
+++ b/core/tools/mcp_resources/service.py
@@ -0,0 +1,155 @@
+"""Expose MCP resource discovery and reading as agent-callable deferred tools."""
+
+from __future__ import annotations
+
+import base64
+import json
+from collections.abc import Callable
+from typing import Any
+
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
+
+LIST_MCP_RESOURCES_SCHEMA = make_tool_schema(
+    name="ListMcpResources",
+    description="List MCP resources exposed by connected MCP servers.",
+    properties={
+        "server": {
+            "type": "string",
+            "description": "Optional MCP server name to filter by.",
+            "minLength": 1,
+        }
+    },
+)
+
+READ_MCP_RESOURCE_SCHEMA = make_tool_schema(
+    name="ReadMcpResource",
+    description="Read a specific MCP resource by server name and URI.",
+    properties={
+        "server": {
+            "type": "string",
+            "description": "MCP server name.",
+            "minLength": 1,
+        },
+        "uri": {
+            "type": "string",
+            "description": "Resource URI to read.",
+            "minLength": 1,
+        },
+    },
+    required=["server", "uri"],
+)
+
+
+class McpResourceToolService:
+    def __init__(
+        self,
+        *,
+        registry: ToolRegistry,
+        client_fn: Callable[[], Any | None],
+        server_configs_fn: Callable[[], dict[str, Any]],
+    ) -> None:
+        self._client_fn = client_fn
+        self._server_configs_fn = server_configs_fn
+        if not self._server_configs_fn():
+            return
+        self._register(registry)
+
+    def _register(self, registry: ToolRegistry) -> None:
+        for name, schema, handler in [
+            ("ListMcpResources", LIST_MCP_RESOURCES_SCHEMA, self._list_resources),
+            ("ReadMcpResource", READ_MCP_RESOURCE_SCHEMA, self._read_resource),
+        ]:
+            registry.register(
+                ToolEntry(
+                    name=name,
+                    mode=ToolMode.DEFERRED,
+                    schema=schema,
+                    handler=handler,
+                    source="McpResourceToolService",
+                    is_concurrency_safe=True,
+                    is_read_only=True,
+                )
+            )
+
+    def _get_client(self) -> Any:
+        client = self._client_fn()
+        if client is None:
+            raise ValueError("MCP client is not initialized")
+        return client
+
+    def _available_servers(self) -> list[str]:
+        return list(self._server_configs_fn().keys())
+
+    @staticmethod
+    def _stringify_uri(value: Any) -> str | None:
+        if value is None:
+            return None
+        return str(value)
+
+    async def _list_resources(self, server: str | None = None, **_kwargs: Any) -> str:
+        client = self._get_client()
+        server_names = [server] if server else self._available_servers()
+        if server and server not in self._available_servers():
+            raise ValueError(f'MCP server not found: "{server}"')
+
+        items: list[dict[str, Any]] = []
+        for server_name in server_names:
+            async with client.session(server_name) as session:
+                result = await session.list_resources()
+                for resource in result.resources:
+                    items.append(
+                        {
+                            "server": server_name,
+                            "uri": self._stringify_uri(resource.uri),
+                            "name": getattr(resource, "name", self._stringify_uri(resource.uri)),
+                            "mime_type": getattr(resource, "mimeType", None),
+                            "description": getattr(resource, "description", None),
+                        }
+                    )
+        return json.dumps({"items": items, "total": len(items)}, ensure_ascii=False, indent=2)
+
+    async def _read_resource(self, *, server: str, uri: str, **_kwargs: Any) -> str:
+        client = self._get_client()
+        if server not in self._available_servers():
+            raise ValueError(f'MCP server not found: "{server}"')
+
+        async with client.session(server) as session:
+            result = await session.read_resource(uri)
+
+        contents: list[dict[str, Any]] = []
+        for content in result.contents:
+            if hasattr(content, "text"):
+                contents.append(
+                    {
+                        "uri": self._stringify_uri(content.uri),
+                        "mime_type": getattr(content, "mimeType", None),
+                        "text": content.text,
+                    }
+                )
+                continue
+            if hasattr(content, "blob"):
+                blob_size = len(base64.b64decode(content.blob))
+                contents.append(
+                    {
+                        "uri": self._stringify_uri(content.uri),
+                        "mime_type": getattr(content, "mimeType", None),
+                        "text": f"Binary MCP resource omitted from context ({blob_size} bytes).",
+                    }
+                )
+                continue
+            contents.append(
+                {
+                    "uri": self._stringify_uri(getattr(content, "uri", uri)),
+                    "mime_type": getattr(content, "mimeType", None),
+                }
+            )
+
+        return json.dumps(
+            {
+                "server": server,
+                "uri": uri,
+                "contents": contents,
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
diff --git a/core/tools/search/service.py b/core/tools/search/service.py
index 4329de6e4..a6ff0a4d4 100644
--- a/core/tools/search/service.py
+++ b/core/tools/search/service.py
@@ -12,11 +12,16 @@
 import subprocess
 from pathlib import Path
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 
 DEFAULT_EXCLUDES: list[str] = [
     "node_modules",
     ".git",
+    ".svn",
+    ".hg",
+    ".bzr",
+    ".jj",
+    ".sl",
     "__pycache__",
     ".venv",
     "venv",
@@ -50,67 +55,76 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Grep",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Grep",
-                    "description": "Search file contents using regex patterns.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "pattern": {
-                                "type": "string",
-                                "description": "Regex pattern to search for",
-                            },
-                            "path": {
-                                "type": "string",
-                                "description": "File or directory (absolute). Defaults to workspace.",
-                            },
-                            "glob": {
-                                "type": "string",
-                                "description": "Filter files by glob (e.g., '*.py')",
-                            },
-                            "type": {
-                                "type": "string",
-                                "description": "Filter by file type (e.g., 'py', 'js')",
-                            },
-                            "case_insensitive": {
-                                "type": "boolean",
-                                "description": "Case insensitive search",
-                            },
-                            "after_context": {
-                                "type": "integer",
-                                "description": "Lines to show after each match",
-                            },
-                            "before_context": {
-                                "type": "integer",
-                                "description": "Lines to show before each match",
-                            },
-                            "context": {
-                                "type": "integer",
-                                "description": "Context lines before and after each match",
-                            },
-                            "output_mode": {
-                                "type": "string",
-                                "enum": ["content", "files_with_matches", "count"],
-                                "description": "Output format. Default: files_with_matches",
-                            },
-                            "head_limit": {
-                                "type": "integer",
-                                "description": "Limit to first N entries",
-                            },
-                            "offset": {
-                                "type": "integer",
-                                "description": "Skip first N entries",
-                            },
-                            "multiline": {
-                                "type": "boolean",
-                                "description": "Allow pattern to span multiple lines",
-                            },
+                schema=make_tool_schema(
+                    name="Grep",
+                    description=(
+                        "Regex search across files (ripgrep-based). "
+                        "Default output_mode: files_with_matches (sorted by mtime). Default head_limit: 250 entries. "
+                        "Auto-excludes .git/.svn/.hg dirs. Max column width 500 chars (suppresses minified/base64). "
+                        "Use output_mode='content' with after_context/before_context/context for context lines."
+                    ),
+                    properties={
+                        "pattern": {
+                            "type": "string",
+                            "description": "Regex pattern to search for",
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "File or directory (absolute). Defaults to workspace.",
+                        },
+                        "glob": {
+                            "type": "string",
+                            "description": "Filter files by glob (e.g., '*.py')",
+                        },
+                        "type": {
+                            "type": "string",
+                            "description": "Filter by file type (e.g., 'py', 'js')",
+                        },
+                        "case_insensitive": {
+                            "type": "boolean",
+                            "description": "Case insensitive search",
+                        },
+                        "after_context": {
+                            "type": "integer",
+                            "description": "Lines to show after each match",
+                        },
+                        "before_context": {
+                            "type": "integer",
+                            "description": "Lines to show before each match",
+                        },
+                        "context": {
+                            "type": "integer",
+                            "description": "Context lines before and after each match",
+                        },
+                        "output_mode": {
+                            "type": "string",
+                            "enum": ["content", "files_with_matches", "count"],
+                            "description": "Output format. Default: files_with_matches",
+                        },
+                        "head_limit": {
+                            "type": "integer",
+                            "description": "Limit to first N entries",
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Skip first N entries",
+                        },
+                        "multiline": {
+                            "type": "boolean",
+                            "description": "Allow pattern to span multiple lines",
+                        },
+                        "line_numbers": {
+                            "type": "boolean",
+                            "description": "Show line numbers (default true). Only applies with output_mode='content'.",
                         },
-                        "required": ["pattern"],
                     },
-                },
+                    required=["pattern"],
+                ),
                 handler=self._grep,
                 source="SearchService",
+                search_hint="search file contents regex pattern matching ripgrep",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -118,26 +132,30 @@ def _register(self, registry: ToolRegistry) -> None:
             ToolEntry(
                 name="Glob",
                 mode=ToolMode.INLINE,
-                schema={
-                    "name": "Glob",
-                    "description": "Find files by glob pattern. Returns paths sorted by modification time.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "pattern": {
-                                "type": "string",
-                                "description": "Glob pattern (e.g., '**/*.py')",
-                            },
-                            "path": {
-                                "type": "string",
-                                "description": "Directory to search (absolute). Defaults to workspace.",
-                            },
+                schema=make_tool_schema(
+                    name="Glob",
+                    description=(
+                        "Fast file pattern matching (ripgrep-based). Returns paths sorted by modification time. "
+                        "Includes hidden files, ignores .gitignore. Default limit 100 results. "
+                        "Use '**/*.py' for recursive search. Path must be absolute."
+                    ),
+                    properties={
+                        "pattern": {
+                            "type": "string",
+                            "description": "Glob pattern (e.g., '**/*.py')",
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "Directory to search (absolute). Defaults to workspace.",
                         },
-                        "required": ["pattern"],
                     },
-                },
+                    required=["pattern"],
+                ),
                 handler=self._glob,
                 source="SearchService",
+                search_hint="find files by name glob pattern matching",
+                is_read_only=True,
+                is_concurrency_safe=True,
             )
         )
 
@@ -183,9 +201,10 @@ def _grep(
         before_context: int | None = None,
         context: int | None = None,
         output_mode: str = "files_with_matches",
-        head_limit: int | None = None,
+        head_limit: int | None = 250,
         offset: int | None = None,
         multiline: bool = False,
+        line_numbers: bool = True,
     ) -> str:
         ok, error, resolved = self._validate_path(path)
         if not ok:
@@ -209,6 +228,7 @@ def _grep(
                     head_limit=head_limit,
                     offset=offset,
                     multiline=multiline,
+                    line_numbers=line_numbers,
                 )
             except Exception:
                 pass  # fallback to Python
@@ -238,8 +258,9 @@ def _ripgrep_search(
         head_limit: int | None,
         offset: int | None,
         multiline: bool,
+        line_numbers: bool = True,
     ) -> str:
-        cmd: list[str] = ["rg", pattern, str(path)]
+        cmd: list[str] = ["rg", pattern, str(path), "--max-columns", "500"]
 
         for excl in DEFAULT_EXCLUDES:
             cmd.extend(["--glob", f"!{excl}"])
@@ -258,7 +279,8 @@ def _ripgrep_search(
         elif output_mode == "count":
             cmd.append("--count")
         elif output_mode == "content":
-            cmd.extend(["--line-number", "--no-heading"])
+            ln_flag = "--line-number" if line_numbers else "--no-line-number"
+            cmd.extend([ln_flag, "--no-heading"])
             if context is not None:
                 cmd.extend(["-C", str(context)])
             else:
diff --git a/core/tools/skills/service.py b/core/tools/skills/service.py
index e65215a20..17c0b842a 100644
--- a/core/tools/skills/service.py
+++ b/core/tools/skills/service.py
@@ -9,9 +9,10 @@
 from __future__ import annotations
 
 import re
+from collections.abc import Sequence
 from pathlib import Path
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 
 
 class SkillsService:
@@ -20,7 +21,7 @@ class SkillsService:
     def __init__(
         self,
         registry: ToolRegistry,
-        skill_paths: list[str | Path],
+        skill_paths: Sequence[str | Path],
         enabled_skills: dict[str, bool] | None = None,
     ):
         self.skill_paths = [Path(p).expanduser().resolve() for p in skill_paths]
@@ -65,6 +66,8 @@ def _register(self, registry: ToolRegistry) -> None:
                 schema=self._get_schema,
                 handler=self._load_skill,
                 source="SkillsService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
 
@@ -72,24 +75,22 @@ def _get_schema(self) -> dict:
         available_skills = list(self._skills_index.keys())
         skills_list = "\n".join(f"- {name}" for name in available_skills)
 
-        return {
-            "name": "load_skill",
-            "description": (
-                f"Load a specialized skill to access domain-specific knowledge and workflows.\n\n"
-                f"Available skills:\n{skills_list}\n\n"
-                f"Returns the skill's instructions and context."
+        return make_tool_schema(
+            name="load_skill",
+            description=(
+                f"Load a skill for domain-specific guidance. "
+                f"Use when you need specialized workflows (TDD, debugging, git). "
+                f"Skills are loaded on-demand to save context.\n\n"
+                f"Available skills:\n{skills_list}"
             ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "skill_name": {
-                        "type": "string",
-                        "description": f"Name of the skill to load. Available: {', '.join(self._skills_index.keys())}",
-                    },
+            properties={
+                "skill_name": {
+                    "type": "string",
+                    "description": f"Name of the skill to load. Available: {', '.join(self._skills_index.keys())}",
                 },
-                "required": ["skill_name"],
             },
-        }
+            required=["skill_name"],
+        )
 
     def _load_skill(self, skill_name: str) -> str:
         if skill_name not in self._skills_index:
diff --git a/core/tools/task/service.py b/core/tools/task/service.py
index b6e9f6f96..114b2939d 100644
--- a/core/tools/task/service.py
+++ b/core/tools/task/service.py
@@ -13,117 +13,109 @@
 from typing import Any
 
 from backend.web.core.storage_factory import make_tool_task_repo
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 from core.tools.task.types import Task, TaskStatus
 
 logger = logging.getLogger(__name__)
 
 DEFAULT_DB_PATH = Path.home() / ".leon" / "tasks.db"
 
-TASK_CREATE_SCHEMA = {
-    "name": "TaskCreate",
-    "description": ("Create a new task to track work progress. Tasks are created with status 'pending'."),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "subject": {
-                "type": "string",
-                "description": "Brief task title in imperative form",
-            },
-            "description": {
-                "type": "string",
-                "description": "Detailed description of what needs to be done",
-            },
-            "active_form": {
-                "type": "string",
-                "description": "Present continuous form for spinner display",
-            },
-            "metadata": {
-                "type": "object",
-                "description": "Optional metadata to attach to the task",
-            },
+TASK_CREATE_SCHEMA = make_tool_schema(
+    name="TaskCreate",
+    description=(
+        "Create a task to track multi-step work. "
+        "Use for complex tasks with 3+ steps or when managing multiple parallel workstreams. "
+        "Status starts as 'pending'."
+    ),
+    properties={
+        "subject": {
+            "type": "string",
+            "description": "Brief task title in imperative form",
         },
-        "required": ["subject", "description"],
-    },
-}
-
-TASK_GET_SCHEMA = {
-    "name": "TaskGet",
-    "description": "Get full details of a task including description and dependencies.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID to retrieve",
-            },
+        "description": {
+            "type": "string",
+            "description": "Detailed description of what needs to be done",
+        },
+        "active_form": {
+            "type": "string",
+            "description": "Present continuous form for spinner display",
+        },
+        "metadata": {
+            "type": "object",
+            "description": "Optional metadata to attach to the task",
         },
-        "required": ["task_id"],
     },
-}
-
-TASK_LIST_SCHEMA = {
-    "name": "TaskList",
-    "description": ("List all tasks with summary info: id, subject, status, owner, blockedBy."),
-    "parameters": {
-        "type": "object",
-        "properties": {},
+    required=["subject", "description"],
+)
+
+TASK_GET_SCHEMA = make_tool_schema(
+    name="TaskGet",
+    description="Get full details of a task including description and dependencies.",
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID to retrieve",
+        },
     },
-}
-
-TASK_UPDATE_SCHEMA = {
-    "name": "TaskUpdate",
-    "description": (
+    required=["task_id"],
+)
+
+TASK_LIST_SCHEMA = make_tool_schema(
+    name="TaskList",
+    description="List all tasks with summary info: id, subject, status, owner, blockedBy.",
+    properties={},
+)
+
+TASK_UPDATE_SCHEMA = make_tool_schema(
+    name="TaskUpdate",
+    description=(
         "Update a task's status, dependencies, or other fields. "
         "Status flow: pending -> in_progress -> completed. "
         "Use status='deleted' to remove a task."
     ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_id": {
-                "type": "string",
-                "description": "The task ID to update",
-            },
-            "status": {
-                "type": "string",
-                "enum": ["pending", "in_progress", "completed", "deleted"],
-                "description": "New status for the task",
-            },
-            "subject": {
-                "type": "string",
-                "description": "New subject for the task",
-            },
-            "description": {
-                "type": "string",
-                "description": "New description for the task",
-            },
-            "active_form": {
-                "type": "string",
-                "description": "New activeForm for the task",
-            },
-            "owner": {
-                "type": "string",
-                "description": "Assign task to an agent",
-            },
-            "add_blocks": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "Task IDs that this task blocks",
-            },
-            "add_blocked_by": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "Task IDs that block this task",
-            },
-            "metadata": {
-                "type": "object",
-                "description": "Metadata keys to merge (set key to null to delete)",
-            },
+    properties={
+        "task_id": {
+            "type": "string",
+            "description": "The task ID to update",
+        },
+        "status": {
+            "type": "string",
+            "enum": ["pending", "in_progress", "completed", "deleted"],
+            "description": "New status for the task",
+        },
+        "subject": {
+            "type": "string",
+            "description": "New subject for the task",
+        },
+        "description": {
+            "type": "string",
+            "description": "New description for the task",
+        },
+        "active_form": {
+            "type": "string",
+            "description": "New activeForm for the task",
+        },
+        "owner": {
+            "type": "string",
+            "description": "Assign task to an agent",
+        },
+        "add_blocks": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "Task IDs that this task blocks",
+        },
+        "add_blocked_by": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "Task IDs that block this task",
+        },
+        "metadata": {
+            "type": "object",
+            "description": "Metadata keys to merge (set key to null to delete)",
         },
-        "required": ["task_id"],
     },
-}
+    required=["task_id"],
+)
 
 
 class TaskService:
@@ -139,7 +131,7 @@ class TaskService:
     def __init__(
         self,
         registry: ToolRegistry,
-        workspace_root: str | None = None,
+        workspace_root: str | Path | None = None,
         db_path: Path | None = None,
         thread_id: str | None = None,
     ):
@@ -157,12 +149,14 @@ def _get_thread_id(self) -> str:
         return tid or "default"
 
     def _register(self, registry: ToolRegistry) -> None:
+        read_only = {"TaskGet", "TaskList"}
         for name, schema, handler in [
             ("TaskCreate", TASK_CREATE_SCHEMA, self._create),
             ("TaskGet", TASK_GET_SCHEMA, self._get),
             ("TaskList", TASK_LIST_SCHEMA, self._list),
             ("TaskUpdate", TASK_UPDATE_SCHEMA, self._update),
         ]:
+            ro = name in read_only
             registry.register(
                 ToolEntry(
                     name=name,
@@ -170,6 +164,8 @@ def _register(self, registry: ToolRegistry) -> None:
                     schema=schema,
                     handler=handler,
                     source="TaskService",
+                    is_concurrency_safe=ro,
+                    is_read_only=ro,
                 )
             )
 
diff --git a/core/tools/tool_search/service.py b/core/tools/tool_search/service.py
index 9b5ceba77..234007182 100644
--- a/core/tools/tool_search/service.py
+++ b/core/tools/tool_search/service.py
@@ -9,24 +9,26 @@
 import json
 import logging
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 
 logger = logging.getLogger(__name__)
 
-TOOL_SEARCH_SCHEMA = {
-    "name": "tool_search",
-    "description": ("Search for available tools. Use this to discover tools that might help with your task."),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": "Search query - tool name or description of what you want to do",
-            },
+TOOL_SEARCH_SCHEMA = make_tool_schema(
+    name="tool_search",
+    description=(
+        "Search for available deferred tools by name or keyword. "
+        "Use 'select:ToolA,ToolB' for exact deferred-tool lookup (returns full schema). "
+        "Use keywords for fuzzy search (up to 5 results). "
+        "Deferred tools are only usable after discovery via this tool."
+    ),
+    properties={
+        "query": {
+            "type": "string",
+            "description": "Search query. Use 'select:ToolA,ToolB' for exact deferred-tool lookup, or keywords for fuzzy search.",
         },
-        "required": ["query"],
     },
-}
+    required=["query"],
+)
 
 
 class ToolSearchService:
@@ -41,11 +43,34 @@ def __init__(self, registry: ToolRegistry):
                 schema=TOOL_SEARCH_SCHEMA,
                 handler=self._search,
                 source="ToolSearchService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
         logger.info("ToolSearchService initialized")
 
-    def _search(self, query: str = "", **kwargs) -> str:
-        results = self._registry.search(query)
+    def _search(self, query: str = "", tool_context=None, **kwargs) -> str:
+        select_names: list[str] = []
+        normalized = query.strip()
+        if normalized.lower().startswith("select:"):
+            select_names = [name.strip() for name in normalized[len("select:") :].split(",") if name.strip()]
+
+        results = self._registry.search(query, modes={ToolMode.DEFERRED})
+        if select_names:
+            found_names = {entry.name for entry in results}
+            missing = [name for name in select_names if name not in found_names]
+            inline = [name for name in missing if (entry := self._registry.get(name)) is not None and entry.mode == ToolMode.INLINE]
+            unknown = [name for name in missing if self._registry.get(name) is None]
+            if inline or unknown:
+                parts: list[str] = []
+                if inline:
+                    parts.append(f"inline/already-available tools: {', '.join(inline)}")
+                if unknown:
+                    parts.append(f"unknown tools: {', '.join(unknown)}")
+                raise ValueError("tool_search select: only supports deferred tools; " + "; ".join(parts))
+        else:
+            results = results[:5]
+        if tool_context is not None and hasattr(tool_context, "discovered_tool_names"):
+            tool_context.discovered_tool_names.update(entry.name for entry in results)
         schemas = [e.get_schema() for e in results]
         return json.dumps(schemas, indent=2, ensure_ascii=False)
diff --git a/core/tools/web/middleware.py b/core/tools/web/middleware.py
index fedf1708e..f244a5bfb 100644
--- a/core/tools/web/middleware.py
+++ b/core/tools/web/middleware.py
@@ -103,8 +103,8 @@ async def _web_search_impl(
         self,
         Query: str,
         MaxResults: int | None = None,
-        IncludeDomains: list[str] | None = None,
-        ExcludeDomains: list[str] | None = None,
+        AllowedDomains: list[str] | None = None,
+        BlockedDomains: list[str] | None = None,
     ) -> SearchResult:
         """
         实现 web_search（多提供商降级）
@@ -121,8 +121,8 @@ async def _web_search_impl(
                 result = await searcher.search(
                     query=Query,
                     max_results=max_results,
-                    include_domains=IncludeDomains,
-                    exclude_domains=ExcludeDomains,
+                    include_domains=AllowedDomains,
+                    exclude_domains=BlockedDomains,
                 )
                 if not result.error:
                     return result
@@ -217,12 +217,12 @@ def _get_tool_definitions(self) -> list[dict]:
                                 "type": "integer",
                                 "description": "Maximum number of results (default: 5)",
                             },
-                            "IncludeDomains": {
+                            "AllowedDomains": {
                                 "type": "array",
                                 "items": {"type": "string"},
                                 "description": "Only include results from these domains",
                             },
-                            "ExcludeDomains": {
+                            "BlockedDomains": {
                                 "type": "array",
                                 "items": {"type": "string"},
                                 "description": "Exclude results from these domains",
@@ -281,8 +281,8 @@ async def _handle_tool_call(self, tool_name: str, args: dict, tool_call_id: str)
             result = await self._web_search_impl(
                 Query=args.get("Query", ""),
                 MaxResults=args.get("MaxResults"),
-                IncludeDomains=args.get("IncludeDomains"),
-                ExcludeDomains=args.get("ExcludeDomains"),
+                AllowedDomains=args.get("AllowedDomains"),
+                BlockedDomains=args.get("BlockedDomains"),
             )
             return ToolMessage(content=result.format_output(), tool_call_id=tool_call_id)
 
diff --git a/core/tools/web/service.py b/core/tools/web/service.py
index 077db9b70..02d2f12e8 100644
--- a/core/tools/web/service.py
+++ b/core/tools/web/service.py
@@ -10,7 +10,7 @@
 import asyncio
 from typing import Any
 
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry, make_tool_schema
 from core.tools.web.fetchers.jina import JinaFetcher
 from core.tools.web.fetchers.markdownify import MarkdownifyFetcher
 from core.tools.web.searchers.exa import ExaSearcher
@@ -59,64 +59,74 @@ def _register(self, registry: ToolRegistry) -> None:
         registry.register(
             ToolEntry(
                 name="WebSearch",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "WebSearch",
-                    "description": "Search the web for current information. Returns titles, URLs, and snippets.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {
-                                "type": "string",
-                                "description": "Search query",
-                            },
-                            "max_results": {
-                                "type": "integer",
-                                "description": "Maximum number of results (default: 5)",
-                            },
-                            "include_domains": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "Only include results from these domains",
-                            },
-                            "exclude_domains": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "Exclude results from these domains",
-                            },
+                mode=ToolMode.DEFERRED,
+                schema=make_tool_schema(
+                    name="WebSearch",
+                    description=(
+                        "Search the web. Returns titles, URLs, and text snippets. "
+                        "Use for current events, documentation lookups, or fact-checking. Max 10 results per query."
+                    ),
+                    properties={
+                        "query": {
+                            "type": "string",
+                            "description": "Search query",
+                            "minLength": 1,
+                        },
+                        "max_results": {
+                            "type": "integer",
+                            "description": "Maximum number of results (default: 5)",
+                            "minimum": 1,
+                            "maximum": 10,
+                        },
+                        "allowed_domains": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Only include results from these domains",
+                        },
+                        "blocked_domains": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Exclude results from these domains",
                         },
-                        "required": ["query"],
                     },
-                },
+                    required=["query"],
+                ),
                 handler=self._web_search,
                 source="WebService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
 
         registry.register(
             ToolEntry(
                 name="WebFetch",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "WebFetch",
-                    "description": "Fetch a URL and extract specific information using AI. Returns processed content, not raw HTML.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "url": {
-                                "type": "string",
-                                "description": "URL to fetch content from",
-                            },
-                            "prompt": {
-                                "type": "string",
-                                "description": "What information to extract from the page",
-                            },
+                mode=ToolMode.DEFERRED,
+                schema=make_tool_schema(
+                    name="WebFetch",
+                    description=(
+                        "Fetch a URL and extract specific information via AI. Returns processed text, not raw HTML. "
+                        "Provide a focused prompt describing what to extract. "
+                        "Useful for reading documentation pages, API references, or articles."
+                    ),
+                    properties={
+                        "url": {
+                            "type": "string",
+                            "description": "URL to fetch content from",
+                            "minLength": 1,
+                        },
+                        "prompt": {
+                            "type": "string",
+                            "description": "What information to extract from the page",
+                            "minLength": 1,
                         },
-                        "required": ["url", "prompt"],
                     },
-                },
+                    required=["url", "prompt"],
+                ),
                 handler=self._web_fetch,
                 source="WebService",
+                is_concurrency_safe=True,
+                is_read_only=True,
             )
         )
 
@@ -124,8 +134,8 @@ async def _web_search(
         self,
         query: str,
         max_results: int | None = None,
-        include_domains: list[str] | None = None,
-        exclude_domains: list[str] | None = None,
+        allowed_domains: list[str] | None = None,
+        blocked_domains: list[str] | None = None,
     ) -> str:
         if not self._searchers:
             return "No search providers configured"
@@ -137,8 +147,8 @@ async def _web_search(
                 result: SearchResult = await searcher.search(
                     query=query,
                     max_results=effective_max,
-                    include_domains=include_domains,
-                    exclude_domains=exclude_domains,
+                    include_domains=allowed_domains,
+                    exclude_domains=blocked_domains,
                 )
                 if not result.error:
                     return result.format_output()
diff --git a/core/tools/wechat/service.py b/core/tools/wechat/service.py
deleted file mode 100644
index 9cb57e233..000000000
--- a/core/tools/wechat/service.py
+++ /dev/null
@@ -1,109 +0,0 @@
-"""WeChat tool service — registers wechat_send and wechat_contacts into ToolRegistry.
-
-Thin wrapper: actual API calls go through WeChatConnection (backend).
-Tools are scoped to the agent's owner's user_id (the human who connected WeChat).
-"""
-
-from __future__ import annotations
-
-import logging
-from collections.abc import Callable
-from typing import TYPE_CHECKING
-
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
-
-if TYPE_CHECKING:
-    from backend.web.services.wechat_service import WeChatConnection
-
-logger = logging.getLogger(__name__)
-
-
-class WeChatToolService:
-    """Registers WeChat tools for agents to interact with WeChat contacts.
-
-    @@@lazy-connection — connection_fn is called at tool invocation time, not registration.
-    This avoids import-time dependency on app.state.
-    """
-
-    def __init__(self, registry: ToolRegistry, connection_fn: Callable[[], WeChatConnection | None]) -> None:
-        self._get_conn = connection_fn
-        self._register(registry)
-
-    def _register(self, registry: ToolRegistry) -> None:
-        self._register_wechat_send(registry)
-        self._register_wechat_contacts(registry)
-
-    def _register_wechat_send(self, registry: ToolRegistry) -> None:
-        get_conn = self._get_conn
-
-        async def handle(user_id: str, text: str) -> str:
-            conn = get_conn()
-            if not conn or not conn.connected:
-                return "Error: WeChat is not connected. Ask the owner to connect via the Connections page."
-            try:
-                await conn.send_message(user_id, text)
-                return f"Message sent to {user_id.split('@')[0]}"
-            except RuntimeError as e:
-                return f"Error: {e}"
-
-        registry.register(
-            ToolEntry(
-                name="wechat_send",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "wechat_send",
-                    "description": (
-                        "Send a text message to a WeChat user via the connected WeChat bot.\n"
-                        "Use wechat_contacts to find available user_ids.\n"
-                        "The user must have messaged the bot first before you can reply.\n"
-                        "Keep messages concise — WeChat is a chat app. Use plain text, no markdown."
-                    ),
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "user_id": {
-                                "type": "string",
-                                "description": "WeChat user ID (format: xxx@im.wechat). Get from wechat_contacts.",
-                            },
-                            "text": {
-                                "type": "string",
-                                "description": "Plain text message to send. No markdown — WeChat won't render it.",
-                            },
-                        },
-                        "required": ["user_id", "text"],
-                    },
-                },
-                handler=handle,
-                source="wechat",
-            )
-        )
-
-    def _register_wechat_contacts(self, registry: ToolRegistry) -> None:
-        get_conn = self._get_conn
-
-        def handle() -> str:
-            conn = get_conn()
-            if not conn or not conn.connected:
-                return "WeChat is not connected."
-            contacts = conn.list_contacts()
-            if not contacts:
-                return "No WeChat contacts yet. Users need to message the bot first."
-            lines = [f"- {c['display_name']} [user_id: {c['user_id']}]" for c in contacts]
-            return "\n".join(lines)
-
-        registry.register(
-            ToolEntry(
-                name="wechat_contacts",
-                mode=ToolMode.INLINE,
-                schema={
-                    "name": "wechat_contacts",
-                    "description": "List WeChat contacts who have messaged the bot. Returns user_ids for use with wechat_send.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {},
-                    },
-                },
-                handler=handle,
-                source="wechat",
-            )
-        )
diff --git a/docker-compose.yml b/docker-compose.yml
index cb302edf3..15c3e7c7a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,6 +3,10 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+    volumes:
+      # @@@staging-leon-home-volume - staging runtime state (models/members/sandboxes)
+      # must survive container replacement, otherwise each deploy boots with an empty ~/.leon.
+      - leon-home:/root/.leon
     restart: unless-stopped
 
   frontend:
@@ -14,3 +18,6 @@ services:
     depends_on:
       - backend
     restart: unless-stopped
+
+volumes:
+  leon-home:
diff --git a/docs/en/introduction.mdx b/docs/en/introduction.mdx
index 306238336..84e35bd7d 100644
--- a/docs/en/introduction.mdx
+++ b/docs/en/introduction.mdx
@@ -49,7 +49,7 @@ flowchart LR
         direction LR
         H["Human Entity"]
         A["Agent Entity"]
-        H <-->|chat_send / chat_read| A
+        H <-->|send_message / read_messages| A
     end
 
     subgraph Infra["Infrastructure"]
diff --git a/docs/en/multi-agent-chat.mdx b/docs/en/multi-agent-chat.mdx
index 6a10e8fec..2da8a8591 100644
--- a/docs/en/multi-agent-chat.mdx
+++ b/docs/en/multi-agent-chat.mdx
@@ -3,7 +3,7 @@ title: Multi-agent chat
 sidebarTitle: Social layer
 description: How humans and agents communicate on the Mycel social layer
 icon: comments
-keywords: [entity, chat, agent communication, social, directory, chat_send, SSE]
+keywords: [entity, chat, agent communication, social, list_chats, send_message, SSE]
 ---
 
 Mycel's social layer lets humans and agents coexist as equals in a shared messaging environment. Agents can initiate conversations, forward context to teammates, and collaborate autonomously — without any special orchestration code.
@@ -19,7 +19,7 @@ flowchart LR
         direction TB
         HE["Human Entity"]
         AE["Agent Entity"]
-        HE <-->|"chat_send / chat_read"| AE
+        HE <-->|"send_message / read_messages"| AE
     end
 
     T --> Chat
@@ -53,42 +53,33 @@ Every participant on the platform — human or agent — has an **Entity**. When
 
 ## Agent chat tools
 
-Agents have five built-in tools for social interaction:
+Agents have four built-in tools for social interaction:
 
 <AccordionGroup>
-  <Accordion title="directory — discover other entities" icon="address-book">
-    Browse all known Entities. Returns Entity IDs needed for other tools.
-
-    ```text
-    directory(search="Alice", type="human")
-    → - Alice [human] entity_id=m_abc123-1
-    ```
-  </Accordion>
-
-  <Accordion title="chats — list active conversations" icon="inbox">
+  <Accordion title="list_chats — list active conversations" icon="inbox">
     List the agent's active chats with unread counts and last message preview.
 
     ```text
-    chats(unread_only=true)
+    list_chats(unread_only=true)
     → - Alice [m_abc123-1] (3 unread) — last: "Can you help me with..."
     ```
   </Accordion>
 
-  <Accordion title="chat_read — read message history" icon="book-open">
+  <Accordion title="read_messages — read message history" icon="book-open">
     Read message history in a chat. Automatically marks messages as read.
 
     ```text
-    chat_read(entity_id="m_abc123-1", limit=10)
+    read_messages(entity_id="m_abc123-1", limit=10)
     → [Alice]: Can you help me with this bug?
       [you]: Sure, let me take a look.
     ```
   </Accordion>
 
-  <Accordion title="chat_send — send a message" icon="paper-plane">
+  <Accordion title="send_message — send a message" icon="paper-plane">
     Send a message. The agent must read unread messages before sending (enforced by the system).
 
     ```text
-    chat_send(content="Here's the fix.", entity_id="m_abc123-1")
+    send_message(content="Here's the fix.", entity_id="m_abc123-1")
     ```
 
     **Signal protocol** controls conversation flow:
@@ -100,11 +91,11 @@ Agents have five built-in tools for social interaction:
     | `close` | "Conversation over, do not reply" |
   </Accordion>
 
-  <Accordion title="chat_search — search message history" icon="magnifying-glass">
+  <Accordion title="search_messages — search message history" icon="magnifying-glass">
     Search through message history across all chats or within a specific chat.
 
     ```text
-    chat_search(query="bug fix", entity_id="m_abc123-1")
+    search_messages(query="bug fix", entity_id="m_abc123-1")
     ```
   </Accordion>
 </AccordionGroup>
@@ -124,15 +115,15 @@ sequenceDiagram
     API->>H: SSE push (message event)
     API->>Q: Enqueue notification
     Q->>T: Wake thread (if idle)
-    T->>API: chat_read (get actual message)
+    T->>API: read_messages (get actual message)
     T->>T: Process message
-    T->>API: chat_send (response)
+    T->>API: send_message (response)
     API->>DB: Store response
     API->>H: SSE push (message event)
 ```
 
 <Note>
-  Notifications don't include message content — the agent must call `chat_read` to read them. This enforces a consistent **read → respond** pattern and prevents agents from acting on stale summaries.
+  Notifications don't include message content — the agent must call `read_messages` to read them. This enforces a consistent **read → respond** pattern and prevents agents from acting on stale summaries.
 </Note>
 
 ## Real-time updates
diff --git a/docs/en/quickstart.mdx b/docs/en/quickstart.mdx
index 91954831c..204f99163 100644
--- a/docs/en/quickstart.mdx
+++ b/docs/en/quickstart.mdx
@@ -100,7 +100,7 @@ Mycel's social layer lets agents message each other — and you — like a group
   </Step>
 
   <Step title="Let agents talk to each other">
-    In the first agent's thread, tell it to message your code reviewer: "Ask the code reviewer to look at this function." The agent will call `chat_send` and the reviewer will respond autonomously.
+    In the first agent's thread, tell it to message your code reviewer: "Ask the code reviewer to look at this function." The agent will call `send_message` and the reviewer will respond autonomously.
   </Step>
 </Steps>
 
diff --git a/docs/zh/introduction.mdx b/docs/zh/introduction.mdx
index fdc5e8693..9566e8cfe 100644
--- a/docs/zh/introduction.mdx
+++ b/docs/zh/introduction.mdx
@@ -49,7 +49,7 @@ flowchart LR
         direction LR
         H["人类 Entity"]
         A["Agent Entity"]
-        H <-->|"chat_send / chat_read"| A
+        H <-->|"send_message / read_messages"| A
     end
 
     subgraph Infra["基础设施"]
diff --git a/docs/zh/multi-agent-chat.mdx b/docs/zh/multi-agent-chat.mdx
index 3a44bd48c..4fb44940a 100644
--- a/docs/zh/multi-agent-chat.mdx
+++ b/docs/zh/multi-agent-chat.mdx
@@ -3,7 +3,7 @@ title: 多 Agent 通讯
 sidebarTitle: 社交层
 description: 人与 Agent 如何在 Mycel 社交层中通讯
 icon: comments
-keywords: [entity, chat, agent 通讯, 社交, directory, chat_send, SSE]
+keywords: [entity, chat, agent 通讯, 社交, list_chats, send_message, SSE]
 ---
 
 Mycel 的社交层让人与 Agent 在共享的消息环境中平等共存。Agent 可以主动发起对话、把上下文转发给队友、自主协作 — 无需任何特殊的编排代码。
@@ -19,7 +19,7 @@ flowchart LR
         direction TB
         HE["人类 Entity"]
         AE["Agent Entity"]
-        HE <-->|"chat_send / chat_read"| AE
+        HE <-->|"send_message / read_messages"| AE
     end
 
     T --> Chat
@@ -52,39 +52,30 @@ flowchart LR
 ## Agent 聊天工具
 
 <AccordionGroup>
-  <Accordion title="directory — 发现其他 Entity" icon="address-book">
-    浏览所有已知的 Entity，返回其他工具需要的 Entity ID。
-
-    ```text
-    directory(search="Alice", type="human")
-    → - Alice [human] entity_id=m_abc123-1
-    ```
-  </Accordion>
-
-  <Accordion title="chats — 列出活跃对话" icon="inbox">
+  <Accordion title="list_chats — 列出活跃对话" icon="inbox">
     列出 Agent 的活跃对话，包含未读数和最新消息预览。
 
     ```text
-    chats(unread_only=true)
+    list_chats(unread_only=true)
     → - Alice [m_abc123-1] (3 条未读) — 最新："能帮我看看..."
     ```
   </Accordion>
 
-  <Accordion title="chat_read — 读取消息历史" icon="book-open">
+  <Accordion title="read_messages — 读取消息历史" icon="book-open">
     读取对话消息历史，自动标记为已读。
 
     ```text
-    chat_read(entity_id="m_abc123-1", limit=10)
+    read_messages(entity_id="m_abc123-1", limit=10)
     → [Alice]: 能帮我看看这个 bug 吗？
       [you]: 好的，我来看看。
     ```
   </Accordion>
 
-  <Accordion title="chat_send — 发送消息" icon="paper-plane">
+  <Accordion title="send_message — 发送消息" icon="paper-plane">
     发送消息。系统强制要求 Agent 先读取未读消息再发送。
 
     ```text
-    chat_send(content="这是修复方案。", entity_id="m_abc123-1")
+    send_message(content="这是修复方案。", entity_id="m_abc123-1")
     ```
 
     **信号协议**控制对话流转：
@@ -96,11 +87,11 @@ flowchart LR
     | `close` | "对话结束，不需要回复" |
   </Accordion>
 
-  <Accordion title="chat_search — 搜索消息历史" icon="magnifying-glass">
+  <Accordion title="search_messages — 搜索消息历史" icon="magnifying-glass">
     在所有对话或指定对话中搜索消息历史。
 
     ```text
-    chat_search(query="bug 修复", entity_id="m_abc123-1")
+    search_messages(query="bug 修复", entity_id="m_abc123-1")
     ```
   </Accordion>
 </AccordionGroup>
@@ -120,15 +111,15 @@ sequenceDiagram
     API->>H: SSE 推送（message 事件）
     API->>Q: 入队通知
     Q->>T: 唤醒 Thread（若空闲）
-    T->>API: chat_read（读取实际消息）
+    T->>API: read_messages（读取实际消息）
     T->>T: 处理消息
-    T->>API: chat_send（回复）
+    T->>API: send_message（回复）
     API->>DB: 存储回复
     API->>H: SSE 推送（message 事件）
 ```
 
 <Note>
-  通知不包含消息内容 — Agent 必须调用 `chat_read` 才能读到。这强制执行「先读后发」的一致模式。
+  通知不包含消息内容 — Agent 必须调用 `read_messages` 才能读到。这强制执行「先读后发」的一致模式。
 </Note>
 
 ## 联系人与投递设置
diff --git a/docs/zh/quickstart.mdx b/docs/zh/quickstart.mdx
index 884bf09f4..37c67e8c8 100644
--- a/docs/zh/quickstart.mdx
+++ b/docs/zh/quickstart.mdx
@@ -100,7 +100,7 @@ Mycel 的社交层让 Agent 之间可以像群聊一样互相发消息。
   </Step>
 
   <Step title="让两个 Agent 互相协作">
-    在第一个 Agent 的 Thread 中，告诉它去联系代码审查员：「帮我把这个函数发给代码审查员看看。」Agent 会调用 `chat_send` 工具，审查员会自主回复。
+    在第一个 Agent 的 Thread 中，告诉它去联系代码审查员：「帮我把这个函数发给代码审查员看看。」Agent 会调用 `send_message` 工具，审查员会自主回复。
   </Step>
 </Steps>
 
diff --git a/frontend/app/package-lock.json b/frontend/app/package-lock.json
index 8af285c77..96b3f10b2 100644
--- a/frontend/app/package-lock.json
+++ b/frontend/app/package-lock.json
@@ -62,6 +62,7 @@
       },
       "devDependencies": {
         "@eslint/js": "^9.39.1",
+        "@testing-library/react": "^16.3.2",
         "@types/node": "^24.10.1",
         "@types/react": "^19.2.5",
         "@types/react-dom": "^19.2.3",
@@ -71,6 +72,7 @@
         "eslint-plugin-react-hooks": "^7.0.1",
         "eslint-plugin-react-refresh": "^0.4.24",
         "globals": "^16.5.0",
+        "jsdom": "^28.1.0",
         "kimi-plugin-inspect-react": "^1.0.3",
         "postcss": "^8.5.6",
         "tailwindcss": "^3.4.19",
@@ -78,9 +80,17 @@
         "tw-animate-css": "^1.4.0",
         "typescript": "~5.9.3",
         "typescript-eslint": "^8.46.4",
-        "vite": "^7.2.4"
+        "vite": "^7.2.4",
+        "vitest": "^4.1.2"
       }
     },
+    "node_modules/@acemir/cssom": {
+      "version": "0.9.31",
+      "resolved": "https://registry.npmjs.org/@acemir/cssom/-/cssom-0.9.31.tgz",
+      "integrity": "sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@alloc/quick-lru": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
@@ -94,6 +104,64 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/@asamuzakjp/css-color": {
+      "version": "5.1.5",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-5.1.5.tgz",
+      "integrity": "sha512-8cMAA1bE66Mb/tfmkhcfJLjEPgyT7SSy6lW6id5XL113ai1ky76d/1L27sGnXCMsLfq66DInAU3OzuahB4lu9Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@csstools/css-calc": "^3.1.1",
+        "@csstools/css-color-parser": "^4.0.2",
+        "@csstools/css-parser-algorithms": "^4.0.0",
+        "@csstools/css-tokenizer": "^4.0.0",
+        "lru-cache": "^11.2.7"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
+    "node_modules/@asamuzakjp/css-color/node_modules/lru-cache": {
+      "version": "11.2.7",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz",
+      "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
+    "node_modules/@asamuzakjp/dom-selector": {
+      "version": "6.8.1",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-6.8.1.tgz",
+      "integrity": "sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@asamuzakjp/nwsapi": "^2.3.9",
+        "bidi-js": "^1.0.3",
+        "css-tree": "^3.1.0",
+        "is-potential-custom-element-name": "^1.0.1",
+        "lru-cache": "^11.2.6"
+      }
+    },
+    "node_modules/@asamuzakjp/dom-selector/node_modules/lru-cache": {
+      "version": "11.2.7",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz",
+      "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
+    "node_modules/@asamuzakjp/nwsapi": {
+      "version": "2.3.9",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz",
+      "integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@babel/code-frame": {
       "version": "7.28.6",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz",
@@ -125,7 +193,6 @@
       "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.5",
@@ -1846,6 +1913,159 @@
         "node": ">=6.9.0"
       }
     },
+    "node_modules/@bramus/specificity": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/@bramus/specificity/-/specificity-2.4.2.tgz",
+      "integrity": "sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "css-tree": "^3.0.0"
+      },
+      "bin": {
+        "specificity": "bin/cli.js"
+      }
+    },
+    "node_modules/@csstools/color-helpers": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz",
+      "integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT-0",
+      "engines": {
+        "node": ">=20.19.0"
+      }
+    },
+    "node_modules/@csstools/css-calc": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.1.1.tgz",
+      "integrity": "sha512-HJ26Z/vmsZQqs/o3a6bgKslXGFAungXGbinULZO3eMsOyNJHeBBZfup5FiZInOghgoM4Hwnmw+OgbJCNg1wwUQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "peerDependencies": {
+        "@csstools/css-parser-algorithms": "^4.0.0",
+        "@csstools/css-tokenizer": "^4.0.0"
+      }
+    },
+    "node_modules/@csstools/css-color-parser": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.0.2.tgz",
+      "integrity": "sha512-0GEfbBLmTFf0dJlpsNU7zwxRIH0/BGEMuXLTCvFYxuL1tNhqzTbtnFICyJLTNK4a+RechKP75e7w42ClXSnJQw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "@csstools/color-helpers": "^6.0.2",
+        "@csstools/css-calc": "^3.1.1"
+      },
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "peerDependencies": {
+        "@csstools/css-parser-algorithms": "^4.0.0",
+        "@csstools/css-tokenizer": "^4.0.0"
+      }
+    },
+    "node_modules/@csstools/css-parser-algorithms": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz",
+      "integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.19.0"
+      },
+      "peerDependencies": {
+        "@csstools/css-tokenizer": "^4.0.0"
+      }
+    },
+    "node_modules/@csstools/css-syntax-patches-for-csstree": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.2.tgz",
+      "integrity": "sha512-5GkLzz4prTIpoyeUiIu3iV6CSG3Plo7xRVOFPKI7FVEJ3mZ0A8SwK0XU3Gl7xAkiQ+mDyam+NNp875/C5y+jSA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT-0",
+      "peerDependencies": {
+        "css-tree": "^3.2.1"
+      },
+      "peerDependenciesMeta": {
+        "css-tree": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@csstools/css-tokenizer": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz",
+      "integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/csstools"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/csstools"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.19.0"
+      }
+    },
     "node_modules/@date-fns/tz": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/@date-fns/tz/-/tz-1.4.1.tgz",
@@ -2451,6 +2671,24 @@
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
+    "node_modules/@exodus/bytes": {
+      "version": "1.15.0",
+      "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.15.0.tgz",
+      "integrity": "sha512-UY0nlA+feH81UGSHv92sLEPLCeZFjXOuHhrIo0HQydScuQc8s0A7kL/UdgwgDq8g8ilksmuoF35YVTNphV2aBQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      },
+      "peerDependencies": {
+        "@noble/hashes": "^1.8.0 || ^2.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@noble/hashes": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@floating-ui/core": {
       "version": "1.7.3",
       "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz",
@@ -4607,12 +4845,76 @@
         "win32"
       ]
     },
+    "node_modules/@standard-schema/spec": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz",
+      "integrity": "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@standard-schema/utils": {
       "version": "0.3.0",
       "resolved": "https://registry.npmjs.org/@standard-schema/utils/-/utils-0.3.0.tgz",
       "integrity": "sha512-e7Mew686owMaPJVNNLs55PUvgz371nKgwsc4vxE49zsODpJEnxgxRo2y/OKrqueavXgZNMDVj3DdHFlaSAeU8g==",
       "license": "MIT"
     },
+    "node_modules/@testing-library/dom": {
+      "version": "10.4.1",
+      "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
+      "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "@babel/code-frame": "^7.10.4",
+        "@babel/runtime": "^7.12.5",
+        "@types/aria-query": "^5.0.1",
+        "aria-query": "5.3.0",
+        "dom-accessibility-api": "^0.5.9",
+        "lz-string": "^1.5.0",
+        "picocolors": "1.1.1",
+        "pretty-format": "^27.0.2"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@testing-library/react": {
+      "version": "16.3.2",
+      "resolved": "https://registry.npmjs.org/@testing-library/react/-/react-16.3.2.tgz",
+      "integrity": "sha512-XU5/SytQM+ykqMnAnvB2umaJNIOsLF3PVv//1Ew4CTcpz0/BRyy/af40qqrt7SjKpDdT1saBMc42CUok5gaw+g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.12.5"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "@testing-library/dom": "^10.0.0",
+        "@types/react": "^18.0.0 || ^19.0.0",
+        "@types/react-dom": "^18.0.0 || ^19.0.0",
+        "react": "^18.0.0 || ^19.0.0",
+        "react-dom": "^18.0.0 || ^19.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@types/aria-query": {
+      "version": "5.0.4",
+      "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz",
+      "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true
+    },
     "node_modules/@types/babel__core": {
       "version": "7.20.5",
       "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
@@ -4658,6 +4960,17 @@
         "@babel/types": "^7.28.2"
       }
     },
+    "node_modules/@types/chai": {
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz",
+      "integrity": "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/deep-eql": "*",
+        "assertion-error": "^2.0.1"
+      }
+    },
     "node_modules/@types/d3-array": {
       "version": "3.2.2",
       "resolved": "https://registry.npmjs.org/@types/d3-array/-/d3-array-3.2.2.tgz",
@@ -4730,6 +5043,13 @@
         "@types/ms": "*"
       }
     },
+    "node_modules/@types/deep-eql": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz",
+      "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/diff": {
       "version": "7.0.2",
       "resolved": "https://registry.npmjs.org/@types/diff/-/diff-7.0.2.tgz",
@@ -4788,7 +5108,6 @@
       "integrity": "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "undici-types": "~7.16.0"
       }
@@ -4799,7 +5118,6 @@
       "integrity": "sha512-MWtvHrGZLFttgeEj28VXHxpmwYbor/ATPYbBfSFZEIRK0ecCFLl2Qo55z52Hss+UV9CRN7trSeq1zbgx7YDWWg==",
       "devOptional": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "csstype": "^3.2.2"
       }
@@ -4810,7 +5128,6 @@
       "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==",
       "devOptional": true,
       "license": "MIT",
-      "peer": true,
       "peerDependencies": {
         "@types/react": "^19.2.0"
       }
@@ -4866,7 +5183,6 @@
       "integrity": "sha512-iIACsx8pxRnguSYhHiMn2PvhvfpopO9FXHyn1mG5txZIsAaB6F0KwbFnUQN3KCiG3Jcuad/Cao2FAs1Wp7vAyg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.52.0",
         "@typescript-eslint/types": "8.52.0",
@@ -5118,82 +5434,215 @@
         "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
       }
     },
-    "node_modules/acorn": {
-      "version": "8.15.0",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
-      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+    "node_modules/@vitest/expect": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-4.1.2.tgz",
+      "integrity": "sha512-gbu+7B0YgUJ2nkdsRJrFFW6X7NTP44WlhiclHniUhxADQJH5Szt9mZ9hWnJPJ8YwOK5zUOSSlSvyzRf0u1DSBQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
-      "bin": {
-        "acorn": "bin/acorn"
+      "dependencies": {
+        "@standard-schema/spec": "^1.1.0",
+        "@types/chai": "^5.2.2",
+        "@vitest/spy": "4.1.2",
+        "@vitest/utils": "4.1.2",
+        "chai": "^6.2.2",
+        "tinyrainbow": "^3.1.0"
       },
-      "engines": {
-        "node": ">=0.4.0"
+      "funding": {
+        "url": "https://opencollective.com/vitest"
       }
     },
-    "node_modules/acorn-jsx": {
-      "version": "5.3.2",
-      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
-      "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
+    "node_modules/@vitest/mocker": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-4.1.2.tgz",
+      "integrity": "sha512-Ize4iQtEALHDttPRCmN+FKqOl2vxTiNUhzobQFFt/BM1lRUTG7zRCLOykG/6Vo4E4hnUdfVLo5/eqKPukcWW7Q==",
       "dev": true,
       "license": "MIT",
+      "dependencies": {
+        "@vitest/spy": "4.1.2",
+        "estree-walker": "^3.0.3",
+        "magic-string": "^0.30.21"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      },
       "peerDependencies": {
-        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+        "msw": "^2.4.9",
+        "vite": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "msw": {
+          "optional": true
+        },
+        "vite": {
+          "optional": true
+        }
       }
     },
-    "node_modules/ajv": {
-      "version": "6.12.6",
-      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
-      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+    "node_modules/@vitest/pretty-format": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-4.1.2.tgz",
+      "integrity": "sha512-dwQga8aejqeuB+TvXCMzSQemvV9hNEtDDpgUKDzOmNQayl2OG241PSWeJwKRH3CiC+sESrmoFd49rfnq7T4RnA==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "fast-deep-equal": "^3.1.1",
-        "fast-json-stable-stringify": "^2.0.0",
-        "json-schema-traverse": "^0.4.1",
-        "uri-js": "^4.2.2"
+        "tinyrainbow": "^3.1.0"
       },
       "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/epoberezkin"
+        "url": "https://opencollective.com/vitest"
       }
     },
-    "node_modules/ansi-styles": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+    "node_modules/@vitest/runner": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-4.1.2.tgz",
+      "integrity": "sha512-Gr+FQan34CdiYAwpGJmQG8PgkyFVmARK8/xSijia3eTFgVfpcpztWLuP6FttGNfPLJhaZVP/euvujeNYar36OQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "color-convert": "^2.0.1"
-      },
-      "engines": {
-        "node": ">=8"
+        "@vitest/utils": "4.1.2",
+        "pathe": "^2.0.3"
       },
       "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+        "url": "https://opencollective.com/vitest"
       }
     },
-    "node_modules/any-promise": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
-      "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==",
-      "dev": true,
-      "license": "MIT"
-    },
-    "node_modules/anymatch": {
-      "version": "3.1.3",
-      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
-      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+    "node_modules/@vitest/snapshot": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-4.1.2.tgz",
+      "integrity": "sha512-g7yfUmxYS4mNxk31qbOYsSt2F4m1E02LFqO53Xpzg3zKMhLAPZAjjfyl9e6z7HrW6LvUdTwAQR3HHfLjpko16A==",
       "dev": true,
-      "license": "ISC",
+      "license": "MIT",
       "dependencies": {
-        "normalize-path": "^3.0.0",
-        "picomatch": "^2.0.4"
+        "@vitest/pretty-format": "4.1.2",
+        "@vitest/utils": "4.1.2",
+        "magic-string": "^0.30.21",
+        "pathe": "^2.0.3"
       },
-      "engines": {
-        "node": ">= 8"
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/@vitest/spy": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-4.1.2.tgz",
+      "integrity": "sha512-DU4fBnbVCJGNBwVA6xSToNXrkZNSiw59H8tcuUspVMsBDBST4nfvsPsEHDHGtWRRnqBERBQu7TrTKskmjqTXKA==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/@vitest/utils": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-4.1.2.tgz",
+      "integrity": "sha512-xw2/TiX82lQHA06cgbqRKFb5lCAy3axQ4H4SoUFhUsg+wztiet+co86IAMDtF6Vm1hc7J6j09oh/rgDn+JdKIQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@vitest/pretty-format": "4.1.2",
+        "convert-source-map": "^2.0.0",
+        "tinyrainbow": "^3.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      }
+    },
+    "node_modules/acorn": {
+      "version": "8.15.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
+      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "acorn": "bin/acorn"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/acorn-jsx": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
+      "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      }
+    },
+    "node_modules/agent-base": {
+      "version": "7.1.4",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
+      "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/ajv": {
+      "version": "6.12.6",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "fast-json-stable-stringify": "^2.0.0",
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/any-promise": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz",
+      "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/anymatch": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
+      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "normalize-path": "^3.0.0",
+        "picomatch": "^2.0.4"
+      },
+      "engines": {
+        "node": ">= 8"
       }
     },
     "node_modules/anymatch/node_modules/picomatch": {
@@ -5235,6 +5684,27 @@
         "node": ">=10"
       }
     },
+    "node_modules/aria-query": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz",
+      "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "peer": true,
+      "dependencies": {
+        "dequal": "^2.0.3"
+      }
+    },
+    "node_modules/assertion-error": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz",
+      "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      }
+    },
     "node_modules/autoprefixer": {
       "version": "10.4.23",
       "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.23.tgz",
@@ -5341,6 +5811,16 @@
         "baseline-browser-mapping": "dist/cli.js"
       }
     },
+    "node_modules/bidi-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz",
+      "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "require-from-string": "^2.0.2"
+      }
+    },
     "node_modules/binary-extensions": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz",
@@ -5398,7 +5878,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.9.0",
         "caniuse-lite": "^1.0.30001759",
@@ -5464,6 +5943,16 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/chai": {
+      "version": "6.2.2",
+      "resolved": "https://registry.npmjs.org/chai/-/chai-6.2.2.tgz",
+      "integrity": "sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/chalk": {
       "version": "4.1.2",
       "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
@@ -5692,6 +6181,20 @@
         "node": ">= 8"
       }
     },
+    "node_modules/css-tree": {
+      "version": "3.2.1",
+      "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-3.2.1.tgz",
+      "integrity": "sha512-X7sjQzceUhu1u7Y/ylrRZFU2FS6LRiFVp6rKLPg23y3x3c3DOKAwuXGDp+PAGjh6CSnCjYeAul8pcT8bAl+lSA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "mdn-data": "2.27.1",
+        "source-map-js": "^1.2.1"
+      },
+      "engines": {
+        "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0"
+      }
+    },
     "node_modules/cssesc": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz",
@@ -5705,6 +6208,32 @@
         "node": ">=4"
       }
     },
+    "node_modules/cssstyle": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-6.2.0.tgz",
+      "integrity": "sha512-Fm5NvhYathRnXNVndkUsCCuR63DCLVVwGOOwQw782coXFi5HhkXdu289l59HlXZBawsyNccXfWRYvLzcDCdDig==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@asamuzakjp/css-color": "^5.0.1",
+        "@csstools/css-syntax-patches-for-csstree": "^1.0.28",
+        "css-tree": "^3.1.0",
+        "lru-cache": "^11.2.6"
+      },
+      "engines": {
+        "node": ">=20"
+      }
+    },
+    "node_modules/cssstyle/node_modules/lru-cache": {
+      "version": "11.2.7",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz",
+      "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==",
+      "dev": true,
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
     "node_modules/csstype": {
       "version": "3.2.3",
       "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
@@ -5832,6 +6361,20 @@
         "node": ">=12"
       }
     },
+    "node_modules/data-urls": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz",
+      "integrity": "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "whatwg-mimetype": "^5.0.0",
+        "whatwg-url": "^16.0.0"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
     "node_modules/date-fns": {
       "version": "4.1.0",
       "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-4.1.0.tgz",
@@ -5865,6 +6408,13 @@
         }
       }
     },
+    "node_modules/decimal.js": {
+      "version": "10.6.0",
+      "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz",
+      "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/decimal.js-light": {
       "version": "2.5.1",
       "resolved": "https://registry.npmjs.org/decimal.js-light/-/decimal.js-light-2.5.1.tgz",
@@ -5942,6 +6492,14 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/dom-accessibility-api": {
+      "version": "0.5.16",
+      "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz",
+      "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true
+    },
     "node_modules/dom-helpers": {
       "version": "5.2.1",
       "resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz",
@@ -5963,8 +6521,7 @@
       "version": "8.6.0",
       "resolved": "https://registry.npmjs.org/embla-carousel/-/embla-carousel-8.6.0.tgz",
       "integrity": "sha512-SjWyZBHJPbqxHOzckOfo8lHisEaJWmwd23XppYFYVh10bU66/Pn5tkVkbkCMZVdbUE5eTCI2nD8OyIP4Z+uwkA==",
-      "license": "MIT",
-      "peer": true
+      "license": "MIT"
     },
     "node_modules/embla-carousel-react": {
       "version": "8.6.0",
@@ -6000,6 +6557,13 @@
         "url": "https://github.com/fb55/entities?sponsor=1"
       }
     },
+    "node_modules/es-module-lexer": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-2.0.0.tgz",
+      "integrity": "sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/esbuild": {
       "version": "0.27.2",
       "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz",
@@ -6071,7 +6635,6 @@
       "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -6250,6 +6813,16 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/estree-walker": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz",
+      "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@types/estree": "^1.0.0"
+      }
+    },
     "node_modules/esutils": {
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
@@ -6266,6 +6839,16 @@
       "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
       "license": "MIT"
     },
+    "node_modules/expect-type": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz",
+      "integrity": "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=12.0.0"
+      }
+    },
     "node_modules/extend": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
@@ -6697,6 +7280,19 @@
         "hermes-estree": "0.25.1"
       }
     },
+    "node_modules/html-encoding-sniffer": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz",
+      "integrity": "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@exodus/bytes": "^1.6.0"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
     "node_modules/html-url-attributes": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz",
@@ -6717,6 +7313,34 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/http-proxy-agent": {
+      "version": "7.0.2",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
+      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.0",
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/https-proxy-agent": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
+      "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "agent-base": "^7.1.2",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
     "node_modules/ignore": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -6897,6 +7521,13 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/is-potential-custom-element-name": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz",
+      "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/isexe": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
@@ -6910,7 +7541,6 @@
       "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "jiti": "bin/jiti.js"
       }
@@ -6934,6 +7564,60 @@
         "js-yaml": "bin/js-yaml.js"
       }
     },
+    "node_modules/jsdom": {
+      "version": "28.1.0",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-28.1.0.tgz",
+      "integrity": "sha512-0+MoQNYyr2rBHqO1xilltfDjV9G7ymYGlAUazgcDLQaUf8JDHbuGwsxN6U9qWaElZ4w1B2r7yEGIL3GdeW3Rug==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@acemir/cssom": "^0.9.31",
+        "@asamuzakjp/dom-selector": "^6.8.1",
+        "@bramus/specificity": "^2.4.2",
+        "@exodus/bytes": "^1.11.0",
+        "cssstyle": "^6.0.1",
+        "data-urls": "^7.0.0",
+        "decimal.js": "^10.6.0",
+        "html-encoding-sniffer": "^6.0.0",
+        "http-proxy-agent": "^7.0.2",
+        "https-proxy-agent": "^7.0.6",
+        "is-potential-custom-element-name": "^1.0.1",
+        "parse5": "^8.0.0",
+        "saxes": "^6.0.0",
+        "symbol-tree": "^3.2.4",
+        "tough-cookie": "^6.0.0",
+        "undici": "^7.21.0",
+        "w3c-xmlserializer": "^5.0.0",
+        "webidl-conversions": "^8.0.1",
+        "whatwg-mimetype": "^5.0.0",
+        "whatwg-url": "^16.0.0",
+        "xml-name-validator": "^5.0.0"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      },
+      "peerDependencies": {
+        "canvas": "^3.0.0"
+      },
+      "peerDependenciesMeta": {
+        "canvas": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jsdom/node_modules/parse5": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.0.tgz",
+      "integrity": "sha512-9m4m5GSgXjL4AjumKzq1Fgfp3Z8rsvjRNbnkVwfu2ImRqE5D0LnY2QfDen18FSY9C573YU5XxSapdHZTZ2WolA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "entities": "^6.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
     "node_modules/jsesc": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz",
@@ -7121,6 +7805,17 @@
         "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
       }
     },
+    "node_modules/lz-string": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz",
+      "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "bin": {
+        "lz-string": "bin/bin.js"
+      }
+    },
     "node_modules/magic-string": {
       "version": "0.30.21",
       "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz",
@@ -7435,6 +8130,13 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/mdn-data": {
+      "version": "2.27.1",
+      "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.27.1.tgz",
+      "integrity": "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ==",
+      "dev": true,
+      "license": "CC0-1.0"
+    },
     "node_modules/merge2": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
@@ -8138,6 +8840,17 @@
         "node": ">= 6"
       }
     },
+    "node_modules/obug": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz",
+      "integrity": "sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==",
+      "dev": true,
+      "funding": [
+        "https://github.com/sponsors/sxzz",
+        "https://opencollective.com/debug"
+      ],
+      "license": "MIT"
+    },
     "node_modules/optionator": {
       "version": "0.9.4",
       "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -8265,6 +8978,13 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/pathe": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz",
+      "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/picocolors": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
@@ -8278,7 +8998,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -8326,7 +9045,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "nanoid": "^3.3.11",
         "picocolors": "^1.1.1",
@@ -8480,6 +9198,44 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/pretty-format": {
+      "version": "27.5.1",
+      "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz",
+      "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "ansi-regex": "^5.0.1",
+        "ansi-styles": "^5.0.0",
+        "react-is": "^17.0.1"
+      },
+      "engines": {
+        "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0"
+      }
+    },
+    "node_modules/pretty-format/node_modules/ansi-styles": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
+      "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true,
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/pretty-format/node_modules/react-is": {
+      "version": "17.0.2",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz",
+      "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==",
+      "dev": true,
+      "license": "MIT",
+      "peer": true
+    },
     "node_modules/prop-types": {
       "version": "15.8.1",
       "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
@@ -8552,7 +9308,6 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
       "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -8583,7 +9338,6 @@
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz",
       "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "scheduler": "^0.27.0"
       },
@@ -8596,7 +9350,6 @@
       "resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.70.0.tgz",
       "integrity": "sha512-COOMajS4FI3Wuwrs3GPpi/Jeef/5W1DRR84Yl5/ShlT3dKVFUfoGiEZ/QE6Uw8P4T2/CLJdcTVYKvWBMQTEpvw==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18.0.0"
       },
@@ -9008,6 +9761,16 @@
       "integrity": "sha512-4ZJgIB9EG9fQE41mOJCRHMmnxDTKHWawQoJWZyUbZuj680wVyogu2ihnj8Edqm7vh2mo/TWHyEZpn2kqeDvS7w==",
       "license": "Apache-2.0"
     },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/resolve": {
       "version": "1.22.11",
       "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.11.tgz",
@@ -9119,6 +9882,19 @@
         "queue-microtask": "^1.2.2"
       }
     },
+    "node_modules/saxes": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
+      "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==",
+      "dev": true,
+      "license": "ISC",
+      "dependencies": {
+        "xmlchars": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=v12.22.7"
+      }
+    },
     "node_modules/scheduler": {
       "version": "0.27.0",
       "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
@@ -9164,6 +9940,13 @@
         "node": ">=8"
       }
     },
+    "node_modules/siginfo": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz",
+      "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==",
+      "dev": true,
+      "license": "ISC"
+    },
     "node_modules/sonner": {
       "version": "2.0.7",
       "resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
@@ -9194,6 +9977,20 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/stackback": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz",
+      "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/std-env": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/std-env/-/std-env-4.0.0.tgz",
+      "integrity": "sha512-zUMPtQ/HBY3/50VbpkupYHbRroTRZJPRLvreamgErJVys0ceuzMkD44J/QjqhHjOzK42GQ3QZIeFG1OYfOtKqQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/streamdown": {
       "version": "2.4.0",
       "resolved": "https://registry.npmjs.org/streamdown/-/streamdown-2.4.0.tgz",
@@ -9315,6 +10112,13 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/symbol-tree": {
+      "version": "3.2.4",
+      "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
+      "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/tailwind-merge": {
       "version": "3.4.0",
       "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.4.0.tgz",
@@ -9331,7 +10135,6 @@
       "integrity": "sha512-3ofp+LL8E+pK/JuPLPggVAIaEuhvIz4qNcf3nA1Xn2o/7fb7s/TYpHhwGDv1ZU3PkBluUVaF8PyCHcm48cKLWQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@alloc/quick-lru": "^5.2.0",
         "arg": "^5.0.2",
@@ -9403,6 +10206,23 @@
       "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
       "license": "MIT"
     },
+    "node_modules/tinybench": {
+      "version": "2.9.0",
+      "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
+      "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/tinyexec": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-1.0.4.tgz",
+      "integrity": "sha512-u9r3uZC0bdpGOXtlxUIdwf9pkmvhqJdrVCH9fapQtgy/OeTTMZ1nqH7agtvEfmGui6e1XxjcdrlxvxJvc3sMqw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/tinyglobby": {
       "version": "0.2.15",
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
@@ -9420,6 +10240,36 @@
         "url": "https://github.com/sponsors/SuperchupuDev"
       }
     },
+    "node_modules/tinyrainbow": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-3.1.0.tgz",
+      "integrity": "sha512-Bf+ILmBgretUrdJxzXM0SgXLZ3XfiaUuOj/IKQHuTXip+05Xn+uyEYdVg0kYDipTBcLrCVyUzAPz7QmArb0mmw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/tldts": {
+      "version": "7.0.28",
+      "resolved": "https://registry.npmjs.org/tldts/-/tldts-7.0.28.tgz",
+      "integrity": "sha512-+Zg3vWhRUv8B1maGSTFdev9mjoo8Etn2Ayfs4cnjlD3CsGkxXX4QyW3j2WJ0wdjYcYmy7Lx2RDsZMhgCWafKIw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "tldts-core": "^7.0.28"
+      },
+      "bin": {
+        "tldts": "bin/cli.js"
+      }
+    },
+    "node_modules/tldts-core": {
+      "version": "7.0.28",
+      "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.0.28.tgz",
+      "integrity": "sha512-7W5Efjhsc3chVdFhqtaU0KtK32J37Zcr9RKtID54nG+tIpcY79CQK/veYPODxtD/LJ4Lue66jvrQzIX2Z2/pUQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/to-regex-range": {
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
@@ -9433,6 +10283,32 @@
         "node": ">=8.0"
       }
     },
+    "node_modules/tough-cookie": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz",
+      "integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "tldts": "^7.0.5"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/tr46": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz",
+      "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "punycode": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=20"
+      }
+    },
     "node_modules/trim-lines": {
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz",
@@ -9508,7 +10384,6 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -9541,6 +10416,16 @@
         "typescript": ">=4.8.4 <6.0.0"
       }
     },
+    "node_modules/undici": {
+      "version": "7.24.7",
+      "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.7.tgz",
+      "integrity": "sha512-H/nlJ/h0ggGC+uRL3ovD+G0i4bqhvsDOpbDv7At5eFLlj2b41L8QliGbnl2H7SnDiYhENphh1tQFJZf+MyfLsQ==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=20.18.1"
+      }
+    },
     "node_modules/undici-types": {
       "version": "7.16.0",
       "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
@@ -9862,7 +10747,6 @@
       "integrity": "sha512-dZwN5L1VlUBewiP6H9s2+B3e3Jg96D0vzN+Ry73sOefebhYr9f94wwkMNN/9ouoU8pV1BqA1d1zGk8928cx0rg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "^0.27.0",
         "fdir": "^6.5.0",
@@ -9932,6 +10816,101 @@
         }
       }
     },
+    "node_modules/vitest": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/vitest/-/vitest-4.1.2.tgz",
+      "integrity": "sha512-xjR1dMTVHlFLh98JE3i/f/WePqJsah4A0FK9cc8Ehp9Udk0AZk6ccpIZhh1qJ/yxVWRZ+Q54ocnD8TXmkhspGg==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@vitest/expect": "4.1.2",
+        "@vitest/mocker": "4.1.2",
+        "@vitest/pretty-format": "4.1.2",
+        "@vitest/runner": "4.1.2",
+        "@vitest/snapshot": "4.1.2",
+        "@vitest/spy": "4.1.2",
+        "@vitest/utils": "4.1.2",
+        "es-module-lexer": "^2.0.0",
+        "expect-type": "^1.3.0",
+        "magic-string": "^0.30.21",
+        "obug": "^2.1.1",
+        "pathe": "^2.0.3",
+        "picomatch": "^4.0.3",
+        "std-env": "^4.0.0-rc.1",
+        "tinybench": "^2.9.0",
+        "tinyexec": "^1.0.2",
+        "tinyglobby": "^0.2.15",
+        "tinyrainbow": "^3.1.0",
+        "vite": "^6.0.0 || ^7.0.0 || ^8.0.0",
+        "why-is-node-running": "^2.3.0"
+      },
+      "bin": {
+        "vitest": "vitest.mjs"
+      },
+      "engines": {
+        "node": "^20.0.0 || ^22.0.0 || >=24.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/vitest"
+      },
+      "peerDependencies": {
+        "@edge-runtime/vm": "*",
+        "@opentelemetry/api": "^1.9.0",
+        "@types/node": "^20.0.0 || ^22.0.0 || >=24.0.0",
+        "@vitest/browser-playwright": "4.1.2",
+        "@vitest/browser-preview": "4.1.2",
+        "@vitest/browser-webdriverio": "4.1.2",
+        "@vitest/ui": "4.1.2",
+        "happy-dom": "*",
+        "jsdom": "*",
+        "vite": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@edge-runtime/vm": {
+          "optional": true
+        },
+        "@opentelemetry/api": {
+          "optional": true
+        },
+        "@types/node": {
+          "optional": true
+        },
+        "@vitest/browser-playwright": {
+          "optional": true
+        },
+        "@vitest/browser-preview": {
+          "optional": true
+        },
+        "@vitest/browser-webdriverio": {
+          "optional": true
+        },
+        "@vitest/ui": {
+          "optional": true
+        },
+        "happy-dom": {
+          "optional": true
+        },
+        "jsdom": {
+          "optional": true
+        },
+        "vite": {
+          "optional": false
+        }
+      }
+    },
+    "node_modules/w3c-xmlserializer": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz",
+      "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "xml-name-validator": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/web-namespaces": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz",
@@ -9942,6 +10921,41 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/webidl-conversions": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz",
+      "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==",
+      "dev": true,
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=20"
+      }
+    },
+    "node_modules/whatwg-mimetype": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz",
+      "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=20"
+      }
+    },
+    "node_modules/whatwg-url": {
+      "version": "16.0.1",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz",
+      "integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@exodus/bytes": "^1.11.0",
+        "tr46": "^6.0.0",
+        "webidl-conversions": "^8.0.1"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+      }
+    },
     "node_modules/which": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
@@ -9958,6 +10972,23 @@
         "node": ">= 8"
       }
     },
+    "node_modules/why-is-node-running": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz",
+      "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "siginfo": "^2.0.0",
+        "stackback": "0.0.2"
+      },
+      "bin": {
+        "why-is-node-running": "cli.js"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/word-wrap": {
       "version": "1.2.5",
       "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",
@@ -9968,6 +10999,23 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/xml-name-validator": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz",
+      "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/xmlchars": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
+      "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/yallist": {
       "version": "3.1.1",
       "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
@@ -9993,7 +11041,6 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.5.tgz",
       "integrity": "sha512-k7Nwx6vuWx1IJ9Bjuf4Zt1PEllcwe7cls3VNzm4CQ1/hgtFUK2bRNG3rvnpPUhFjmqJKAKtjV576KnUkHocg/g==",
       "license": "MIT",
-      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
diff --git a/frontend/app/package.json b/frontend/app/package.json
index 52199cd30..b613f7a9f 100644
--- a/frontend/app/package.json
+++ b/frontend/app/package.json
@@ -7,7 +7,8 @@
     "dev": "vite",
     "build": "tsc -b && vite build",
     "lint": "eslint .",
-    "preview": "vite preview"
+    "preview": "vite preview",
+    "test": "vitest run"
   },
   "dependencies": {
     "@hookform/resolvers": "^5.2.2",
@@ -64,6 +65,7 @@
   },
   "devDependencies": {
     "@eslint/js": "^9.39.1",
+    "@testing-library/react": "^16.3.2",
     "@types/node": "^24.10.1",
     "@types/react": "^19.2.5",
     "@types/react-dom": "^19.2.3",
@@ -73,6 +75,7 @@
     "eslint-plugin-react-hooks": "^7.0.1",
     "eslint-plugin-react-refresh": "^0.4.24",
     "globals": "^16.5.0",
+    "jsdom": "^28.1.0",
     "kimi-plugin-inspect-react": "^1.0.3",
     "postcss": "^8.5.6",
     "tailwindcss": "^3.4.19",
@@ -80,6 +83,7 @@
     "tw-animate-css": "^1.4.0",
     "typescript": "~5.9.3",
     "typescript-eslint": "^8.46.4",
-    "vite": "^7.2.4"
+    "vite": "^7.2.4",
+    "vitest": "^4.1.2"
   }
 }
diff --git a/frontend/app/src/api/client.ts b/frontend/app/src/api/client.ts
index 2dd5c8c56..10bdb4f2d 100644
--- a/frontend/app/src/api/client.ts
+++ b/frontend/app/src/api/client.ts
@@ -11,7 +11,10 @@ import type {
   LeaseStatus,
   ThreadDetail,
   ThreadSummary,
-  SandboxChannelFilesResult,
+  ThreadPermissions,
+  ThreadPermissionRules,
+  PermissionRuleBehavior,
+  AskUserAnswer,
   SandboxFileResult,
   SandboxFilesListResult,
   SandboxUploadResult,
@@ -99,26 +102,55 @@ export async function getThread(threadId: string): Promise<ThreadDetail> {
   return request(`/api/threads/${encodeURIComponent(threadId)}`);
 }
 
-export async function getThreadRuntime(threadId: string): Promise<StreamStatus> {
-  return request(`/api/threads/${encodeURIComponent(threadId)}/runtime`);
+export async function getThreadPermissions(threadId: string, signal?: AbortSignal): Promise<ThreadPermissions> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/permissions`, { signal });
 }
 
-export async function sendMessage(threadId: string, message: string): Promise<{ status: string; routing: string }> {
-  return request(`/api/threads/${encodeURIComponent(threadId)}/messages`, {
+export async function resolveThreadPermission(
+  threadId: string,
+  requestId: string,
+  decision: "allow" | "deny",
+  message?: string,
+  answers?: AskUserAnswer[],
+  annotations?: Record<string, unknown>,
+): Promise<{ ok: boolean; thread_id: string; request_id: string }> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/permissions/${encodeURIComponent(requestId)}/resolve`, {
     method: "POST",
-    body: JSON.stringify({ message }),
+    body: JSON.stringify({ decision, message, answers, annotations }),
   });
 }
 
-export async function queueMessage(threadId: string, message: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/queue`, {
+export async function addThreadPermissionRule(
+  threadId: string,
+  behavior: PermissionRuleBehavior,
+  toolName: string,
+): Promise<{ ok: boolean; thread_id: string; scope: string; rules: ThreadPermissionRules; managed_only: boolean }> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/permissions/rules`, {
     method: "POST",
-    body: JSON.stringify({ message }),
+    body: JSON.stringify({ behavior, tool_name: toolName }),
   });
 }
 
-export async function getQueue(threadId: string): Promise<{ messages: Array<{ id: number; content: string; created_at: string }> }> {
-  return request(`/api/threads/${encodeURIComponent(threadId)}/queue`);
+export async function removeThreadPermissionRule(
+  threadId: string,
+  behavior: PermissionRuleBehavior,
+  toolName: string,
+): Promise<{ ok: boolean; thread_id: string; scope: string; rules: ThreadPermissionRules; managed_only: boolean }> {
+  return request(
+    `/api/threads/${encodeURIComponent(threadId)}/permissions/rules/${encodeURIComponent(behavior)}/${encodeURIComponent(toolName)}`,
+    { method: "DELETE" },
+  );
+}
+
+export async function getThreadRuntime(threadId: string): Promise<StreamStatus> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/runtime`);
+}
+
+export async function sendMessage(threadId: string, message: string): Promise<{ status: string; routing: string }> {
+  return request(`/api/threads/${encodeURIComponent(threadId)}/messages`, {
+    method: "POST",
+    body: JSON.stringify({ message }),
+  });
 }
 
 // --- Sandbox API ---
@@ -163,32 +195,6 @@ export async function listMyLeases(signal?: AbortSignal): Promise<UserLeaseSumma
   return payload.leases;
 }
 
-export async function pauseThreadSandbox(threadId: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/sandbox/pause`, { method: "POST" });
-}
-
-export async function resumeThreadSandbox(threadId: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/sandbox/resume`, { method: "POST" });
-}
-
-export async function destroyThreadSandbox(threadId: string): Promise<void> {
-  await request(`/api/threads/${encodeURIComponent(threadId)}/sandbox`, { method: "DELETE" });
-}
-
-export async function pauseSandboxSession(sessionId: string, provider: string): Promise<void> {
-  await request(
-    `/api/sandbox/sessions/${encodeURIComponent(sessionId)}/pause?provider=${encodeURIComponent(provider)}`,
-    { method: "POST" },
-  );
-}
-
-export async function resumeSandboxSession(sessionId: string, provider: string): Promise<void> {
-  await request(
-    `/api/sandbox/sessions/${encodeURIComponent(sessionId)}/resume?provider=${encodeURIComponent(provider)}`,
-    { method: "POST" },
-  );
-}
-
 export async function destroySandboxSession(sessionId: string, provider: string): Promise<void> {
   await request(
     `/api/sandbox/sessions/${encodeURIComponent(sessionId)}?provider=${encodeURIComponent(provider)}`,
@@ -225,12 +231,6 @@ export async function readSandboxFile(threadId: string, path: string): Promise<S
   return request(`${sandboxFilesBase(threadId)}/read?path=${encodeURIComponent(path)}`);
 }
 
-export async function listSandboxChannelFiles(
-  threadId: string,
-): Promise<SandboxChannelFilesResult> {
-  return request(`${sandboxFilesBase(threadId)}/channel-files`);
-}
-
 export async function uploadSandboxFile(
   threadId: string,
   opts: { file: File; path?: string },
@@ -261,11 +261,6 @@ export function getSandboxDownloadUrl(
 
 // --- Settings API ---
 
-export async function listSandboxConfigs(): Promise<Record<string, Record<string, unknown>>> {
-  const payload = await request<{ sandboxes: Record<string, Record<string, unknown>> }>("/api/settings/sandboxes");
-  return payload.sandboxes;
-}
-
 export async function saveSandboxConfig(name: string, config: Record<string, unknown>): Promise<void> {
   await request("/api/settings/sandboxes", {
     method: "POST",
@@ -275,10 +270,6 @@ export async function saveSandboxConfig(name: string, config: Record<string, unk
 
 // --- Observation API ---
 
-export async function getObservationConfig(): Promise<Record<string, unknown>> {
-  return request("/api/settings/observation");
-}
-
 export async function saveObservationConfig(
   active: string | null,
   config?: Record<string, unknown>,
diff --git a/frontend/app/src/api/types.ts b/frontend/app/src/api/types.ts
index 08d990935..c031f3582 100644
--- a/frontend/app/src/api/types.ts
+++ b/frontend/app/src/api/types.ts
@@ -45,6 +45,49 @@ export interface ThreadDetail {
   sandbox: SandboxInfo | null;
 }
 
+export interface PermissionRequest {
+  request_id: string;
+  thread_id: string;
+  tool_name: string;
+  args: Record<string, unknown>;
+  message?: string | null;
+}
+
+export interface AskUserQuestionOption {
+  label: string;
+  description: string;
+  preview?: string | null;
+}
+
+export interface AskUserQuestionPrompt {
+  header: string;
+  question: string;
+  options: AskUserQuestionOption[];
+  multiSelect?: boolean;
+}
+
+export interface AskUserAnswer {
+  header?: string;
+  question?: string;
+  selected_options: string[];
+  free_text?: string | null;
+}
+
+export type PermissionRuleBehavior = "allow" | "deny" | "ask";
+
+export interface ThreadPermissionRules {
+  allow: string[];
+  deny: string[];
+  ask: string[];
+}
+
+export interface ThreadPermissions {
+  thread_id: string;
+  requests: PermissionRequest[];
+  session_rules: ThreadPermissionRules;
+  managed_only: boolean;
+}
+
 export interface SandboxType {
   name: string;
   provider?: string;
@@ -219,6 +262,7 @@ export interface StreamStatus {
   state: { state: string; flags: Record<string, boolean> };
   tokens: { total_tokens: number; input_tokens: number; output_tokens: number; cost: number };
   context: { message_count: number; estimated_tokens: number; usage_percent: number; near_limit: boolean };
+  model?: string;
   current_tool?: string;
   last_seq?: number;
   run_start_seq?: number;
diff --git a/frontend/app/src/components/FileBrowser.tsx b/frontend/app/src/components/FileBrowser.tsx
deleted file mode 100644
index 4cef7086a..000000000
--- a/frontend/app/src/components/FileBrowser.tsx
+++ /dev/null
@@ -1,101 +0,0 @@
-import { useState } from 'react';
-import { authFetch } from '@/store/auth-store';
-import { useFileList } from '@/hooks/useFileList';
-import { MoreVertical } from 'lucide-react';
-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuItem,
-  DropdownMenuTrigger,
-} from '@/components/ui/dropdown-menu';
-import { Button } from '@/components/ui/button';
-import {
-  AlertDialog,
-  AlertDialogAction,
-  AlertDialogCancel,
-  AlertDialogContent,
-  AlertDialogDescription,
-  AlertDialogFooter,
-  AlertDialogHeader,
-  AlertDialogTitle,
-} from '@/components/ui/alert-dialog';
-
-interface FileBrowserProps {
-  threadId: string;
-}
-
-export function FileBrowser({ threadId }: FileBrowserProps) {
-  const { files, loading, error, refetch } = useFileList(threadId);
-  const [deleteTarget, setDeleteTarget] = useState<string | null>(null);
-  const [deleting, setDeleting] = useState(false);
-
-  const handleDownload = (path: string) => {
-    const url = `/api/threads/${threadId}/files/download?path=${encodeURIComponent(path)}`;
-    window.open(url, '_blank');
-  };
-
-  const handleDelete = async () => {
-    if (!deleteTarget) return;
-    setDeleting(true);
-    try {
-      const res = await authFetch(
-        `/api/threads/${threadId}/files/files?path=${encodeURIComponent(deleteTarget)}`,
-        { method: 'DELETE' }
-      );
-      if (!res.ok) throw new Error('Failed to delete file');
-      await refetch();
-    } catch (e) {
-      alert(e instanceof Error ? e.message : 'Failed to delete file');
-    } finally {
-      setDeleting(false);
-      setDeleteTarget(null);
-    }
-  };
-
-  if (loading) return <div>加载文件中...</div>;
-  if (error) return <div>错误：{error}</div>;
-  if (files.length === 0) return <div>暂无已上传文件</div>;
-
-  return (
-    <>
-      <div className="space-y-2">
-        {files.map((file) => (
-          <div key={file.relative_path} className="flex items-center justify-between p-2 border rounded">
-            <span>{file.relative_path}</span>
-            <div className="flex items-center gap-2">
-              <span className="text-sm text-gray-500">{(file.size_bytes / 1024).toFixed(1)} KB</span>
-              <DropdownMenu>
-                <DropdownMenuTrigger asChild>
-                  <Button variant="ghost" size="sm" aria-label="File actions">
-                    <MoreVertical className="h-4 w-4" />
-                  </Button>
-                </DropdownMenuTrigger>
-                <DropdownMenuContent align="end">
-                  <DropdownMenuItem onClick={() => handleDownload(file.relative_path)}>下载</DropdownMenuItem>
-                  <DropdownMenuItem className="text-destructive" onClick={() => setDeleteTarget(file.relative_path)} disabled={deleting}>删除</DropdownMenuItem>
-                </DropdownMenuContent>
-              </DropdownMenu>
-            </div>
-          </div>
-        ))}
-      </div>
-
-      <AlertDialog open={!!deleteTarget} onOpenChange={() => setDeleteTarget(null)}>
-        <AlertDialogContent>
-          <AlertDialogHeader>
-            <AlertDialogTitle>删除文件？</AlertDialogTitle>
-            <AlertDialogDescription>
-              确定要删除 "{deleteTarget}" 吗？此操作无法撤销。
-            </AlertDialogDescription>
-          </AlertDialogHeader>
-          <AlertDialogFooter>
-            <AlertDialogCancel>取消</AlertDialogCancel>
-            <AlertDialogAction onClick={handleDelete} disabled={deleting}>
-              {deleting ? '删除中...' : '删除'}
-            </AlertDialogAction>
-          </AlertDialogFooter>
-        </AlertDialogContent>
-      </AlertDialog>
-    </>
-  );
-}
diff --git a/frontend/app/src/components/Header.tsx b/frontend/app/src/components/Header.tsx
index 9273f8c7b..a4a5e07cd 100644
--- a/frontend/app/src/components/Header.tsx
+++ b/frontend/app/src/components/Header.tsx
@@ -1,4 +1,4 @@
-import { ChevronLeft, PanelLeft, Pause, Play } from "lucide-react";
+import { ChevronLeft, PanelLeft } from "lucide-react";
 import { useNavigate } from "react-router-dom";
 import type { SandboxInfo } from "../api";
 import { useIsMobile } from "../hooks/use-mobile";
@@ -22,8 +22,6 @@ interface HeaderProps {
   sandboxInfo: SandboxInfo | null;
   currentModel?: string;
   onToggleSidebar: () => void;
-  onPauseSandbox: () => void;
-  onResumeSandbox: () => void;
   onModelChange?: (model: string) => void;
 }
 
@@ -33,8 +31,6 @@ export default function Header({
   sandboxInfo,
   currentModel = "leon:medium",
   onToggleSidebar,
-  onPauseSandbox,
-  onResumeSandbox,
   onModelChange,
 }: HeaderProps) {
   const isMobile = useIsMobile();
@@ -90,25 +86,6 @@ export default function Header({
           threadId={activeThreadId}
           onModelChange={onModelChange}
         />
-
-        {hasRemote && sandboxInfo?.status === "running" && (
-          <button
-            className="px-3 py-1.5 rounded-lg text-xs flex items-center gap-2 border border-border text-foreground-secondary hover:bg-muted hover:text-foreground"
-            onClick={onPauseSandbox}
-          >
-            <Pause className="w-3.5 h-3.5" />
-            暂停
-          </button>
-        )}
-        {hasRemote && sandboxInfo?.status === "paused" && (
-          <button
-            className="px-3 py-1.5 rounded-lg text-xs flex items-center gap-2 border border-border text-foreground-secondary hover:bg-muted hover:text-foreground"
-            onClick={onResumeSandbox}
-          >
-            <Play className="w-3.5 h-3.5" />
-            恢复
-          </button>
-        )}
       </div>
     </header>
   );
diff --git a/frontend/app/src/components/NewChatDialog.tsx b/frontend/app/src/components/NewChatDialog.tsx
index 1a7ed3a29..c5eb6ff63 100644
--- a/frontend/app/src/components/NewChatDialog.tsx
+++ b/frontend/app/src/components/NewChatDialog.tsx
@@ -41,8 +41,8 @@ export default function NewChatDialog({ open, onOpenChange }: NewChatDialogProps
     <Dialog open={open} onOpenChange={onOpenChange}>
       <DialogContent className="sm:max-w-md p-0 gap-0">
         <DialogHeader className="px-4 pt-4 pb-3">
-          <DialogTitle className="text-base">发起会话</DialogTitle>
-          <DialogDescription className="sr-only">选择成员发起新对话</DialogDescription>
+          <DialogTitle className="text-base">打开成员线程</DialogTitle>
+          <DialogDescription className="sr-only">选择成员打开专属线程</DialogDescription>
         </DialogHeader>
         <div className="px-4 pb-3">
           <div className="relative">
diff --git a/frontend/app/src/components/SandboxSessionsModal.test.tsx b/frontend/app/src/components/SandboxSessionsModal.test.tsx
new file mode 100644
index 000000000..b6bcb10a8
--- /dev/null
+++ b/frontend/app/src/components/SandboxSessionsModal.test.tsx
@@ -0,0 +1,53 @@
+// @vitest-environment jsdom
+
+import { render, screen, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import SandboxSessionsModal from "./SandboxSessionsModal";
+import type { SandboxSession } from "../api";
+
+const { listSandboxSessions } = vi.hoisted(() => ({
+  listSandboxSessions: vi.fn(),
+}));
+
+vi.mock("../api", async () => {
+  const actual = await vi.importActual<typeof import("../api")>("../api");
+  return {
+    ...actual,
+    listSandboxSessions,
+    destroySandboxSession: vi.fn(),
+  };
+});
+
+describe("SandboxSessionsModal", () => {
+  beforeEach(() => {
+    listSandboxSessions.mockReset();
+  });
+
+  it("does not render pause or resume controls for running or paused sessions", async () => {
+    const sessions: SandboxSession[] = [
+      {
+        session_id: "session-running",
+        thread_id: "thread-running",
+        provider: "local",
+        status: "running",
+      },
+      {
+        session_id: "session-paused",
+        thread_id: "thread-paused",
+        provider: "daytona_selfhost",
+        status: "paused",
+      },
+    ];
+    listSandboxSessions.mockResolvedValue(sessions);
+
+    render(<SandboxSessionsModal isOpen onClose={vi.fn()} onSessionMutated={vi.fn()} />);
+
+    await waitFor(() => {
+      expect(listSandboxSessions).toHaveBeenCalled();
+    });
+
+    expect(screen.queryByTitle("暂停")).toBeNull();
+    expect(screen.queryByTitle("恢复")).toBeNull();
+    expect(screen.getAllByTitle("销毁")).toHaveLength(2);
+  });
+});
diff --git a/frontend/app/src/components/SandboxSessionsModal.tsx b/frontend/app/src/components/SandboxSessionsModal.tsx
index 955a1b28c..48cae6a1e 100644
--- a/frontend/app/src/components/SandboxSessionsModal.tsx
+++ b/frontend/app/src/components/SandboxSessionsModal.tsx
@@ -1,10 +1,8 @@
-import { Loader2, Pause, Play, Trash2 } from "lucide-react";
-import { useEffect, useState } from "react";
+import { Loader2, Trash2 } from "lucide-react";
+import { useCallback, useEffect, useState } from "react";
 import {
   destroySandboxSession,
   listSandboxSessions,
-  pauseSandboxSession,
-  resumeSandboxSession,
   type SandboxSession,
 } from "../api";
 import {
@@ -29,7 +27,7 @@ export default function SandboxSessionsModal({ isOpen, onClose, onSessionMutated
   const [busy, setBusy] = useState<string | null>(null);
   const [error, setError] = useState<string | null>(null);
 
-  async function refresh(opts?: { silent?: boolean }) {
+  const refresh = useCallback(async (opts?: { silent?: boolean }) => {
     const silent = opts?.silent ?? false;
     const showInitialLoading = !hasLoaded && !silent;
     if (showInitialLoading) {
@@ -48,7 +46,7 @@ export default function SandboxSessionsModal({ isOpen, onClose, onSessionMutated
       setLoading(false);
       setRefreshing(false);
     }
-  }
+  }, [hasLoaded]);
 
   useEffect(() => {
     if (!isOpen) return;
@@ -57,7 +55,7 @@ export default function SandboxSessionsModal({ isOpen, onClose, onSessionMutated
       void refresh({ silent: true });
     }, 2500);
     return () => window.clearInterval(timer);
-  }, [isOpen]);
+  }, [isOpen, refresh]);
 
   async function withBusy(row: SandboxSession, fn: () => Promise<void>) {
     setBusy(row.session_id);
@@ -153,26 +151,6 @@ export default function SandboxSessionsModal({ isOpen, onClose, onSessionMutated
                     </div>
                   </div>
                   <div className="flex items-center gap-1.5 flex-shrink-0">
-                    {row.status === "running" && (
-                      <button
-                        className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-accent hover:text-foreground disabled:opacity-30"
-                        disabled={busy === row.session_id}
-                        onClick={() => void withBusy(row, () => pauseSandboxSession(row.session_id, row.provider))}
-                        title="暂停"
-                      >
-                        <Pause className="w-4 h-4" />
-                      </button>
-                    )}
-                    {row.status === "paused" && (
-                      <button
-                        className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-accent hover:text-success disabled:opacity-30"
-                        disabled={busy === row.session_id}
-                        onClick={() => void withBusy(row, () => resumeSandboxSession(row.session_id, row.provider))}
-                        title="恢复"
-                      >
-                        <Play className="w-4 h-4" />
-                      </button>
-                    )}
                     <button
                       className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground hover:bg-destructive/10 hover:text-destructive disabled:opacity-30"
                       disabled={busy === row.session_id}
diff --git a/frontend/app/src/components/Sidebar.tsx b/frontend/app/src/components/Sidebar.tsx
index 16e27551e..25867486e 100644
--- a/frontend/app/src/components/Sidebar.tsx
+++ b/frontend/app/src/components/Sidebar.tsx
@@ -24,6 +24,16 @@ function requireSidebarLabel(thread: ThreadSummary): string {
   return thread.sidebar_label;
 }
 
+function memberThreadHref(memberId: string, mainThreadId?: string): string {
+  const encodedMemberId = encodeURIComponent(memberId);
+  // @@@main-thread-direct-route - sidebar switching should reuse the known main
+  // thread route directly; bouncing through /threads/:memberId remounts
+  // NewChatPage and re-runs member bootstrap before landing in ChatPage.
+  return mainThreadId
+    ? `/threads/${encodedMemberId}/${mainThreadId}`
+    : `/threads/${encodedMemberId}`;
+}
+
 function formatRelativeTime(dateStr?: string): string {
   if (!dateStr) return "";
   const date = new Date(dateStr);
@@ -298,7 +308,7 @@ export default function Sidebar({
             return (
               <div key={group.memberId} className="relative group/item w-full flex justify-center">
                 <Link
-                  to={`/threads/${encodeURIComponent(group.memberId)}`}
+                  to={memberThreadHref(group.memberId, mainThread?.thread_id)}
                   title={group.memberName}
                   className={`flex items-center justify-center rounded-xl p-1 transition-colors duration-fast ${
                     isActive ? "bg-muted" : "hover:bg-muted/70"
@@ -394,6 +404,7 @@ export default function Sidebar({
               const isExpanded = expandedMembers.has(group.memberId);
               const urlId = encodeURIComponent(group.memberId);
               const mainThread = group.threads.find((thread) => thread.is_main);
+              const memberHref = memberThreadHref(group.memberId, mainThread?.thread_id);
               const memberIsActive = isMemberActive(group.memberId, mainThread?.thread_id);
               const childThreads = group.threads.filter((thread) => !thread.is_main);
               return (
@@ -415,7 +426,7 @@ export default function Sidebar({
                       } ${isExpanded ? "rotate-90" : ""}`} />
                     </button>
                     <Link
-                      to={`/threads/${urlId}`}
+                      to={memberHref}
                       className="flex items-center gap-1.5 min-w-0 flex-1"
                     >
                       <MemberAvatar name={group.memberName} avatarUrl={group.avatarUrl} type="mycel_agent" size="xs" />
diff --git a/frontend/app/src/components/computer-panel/AgentsView.tsx b/frontend/app/src/components/computer-panel/AgentsView.tsx
index 51a537de0..d9866046f 100644
--- a/frontend/app/src/components/computer-panel/AgentsView.tsx
+++ b/frontend/app/src/components/computer-panel/AgentsView.tsx
@@ -2,6 +2,9 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { Loader2 } from "lucide-react";
 import type { AssistantTurn, ToolStep } from "../../api";
 import { useThreadData } from "../../hooks/use-thread-data";
+import { useDisplayDeltas } from "../../hooks/use-display-deltas";
+import { useThreadStream } from "../../hooks/use-thread-stream";
+import { resolveAgentVisualStatus, type AgentVisualStatus } from "./agent-visual-status";
 import { parseAgentArgs } from "./utils";
 import type { FlowItem } from "./utils";
 import { FlowList } from "./flow-items";
@@ -22,12 +25,43 @@ export function AgentsView({ steps }: AgentsViewProps) {
   const dragStartX = useRef(0);
   const dragStartWidth = useRef(0);
 
-  const focused = steps.find((s) => s.id === selectedAgentId) ?? null;
+  const effectiveSelectedAgentId = useMemo(() => {
+    if (steps.length === 0) return null;
+    if (selectedAgentId && steps.some((step) => step.id === selectedAgentId)) return selectedAgentId;
+    return (
+      [...steps].reverse().find((step) => {
+        const status = step.subagent_stream?.status;
+        return status === "running" || step.status === "calling";
+      })?.id ?? steps[steps.length - 1].id
+    );
+  }, [steps, selectedAgentId]);
+
+  const focused = steps.find((s) => s.id === effectiveSelectedAgentId) ?? null;
   const stream = focused?.subagent_stream;
   const threadId = stream?.thread_id || undefined;
-  const isRunning = stream?.status === "running" || focused?.status === "calling";
-
-  const { entries, loading, refreshThread } = useThreadData(threadId);
+  const { entries, loading, refreshThread, setEntries, displaySeq } = useThreadData(threadId);
+  const refreshThreads = useCallback(async () => {}, []);
+  // @@@child-thread-live-bridge - the Agent pane must subscribe to the child
+  // thread's own SSE stream. Polling child detail alone misses the running
+  // window and makes the pane look empty until a later refresh.
+  const childStream = useThreadStream(threadId ?? "", {
+    loading: loading || !threadId,
+    refreshThreads,
+  });
+  const childDisplay = useDisplayDeltas({
+    threadId: threadId ?? "",
+    onUpdate: setEntries,
+    displaySeq,
+    stream: childStream,
+  });
+  const focusedStatus =
+    focused
+      ? resolveAgentVisualStatus(focused, {
+        childDisplayRunning: childDisplay.isRunning,
+        childRuntimeState: childStream.runtimeStatus?.state?.state ?? null,
+      })
+      : null;
+  const isRunning = focusedStatus === "running";
 
   // Poll every second while sub-agent is running
   useEffect(() => {
@@ -61,7 +95,7 @@ export function AgentsView({ steps }: AgentsViewProps) {
           id: tc.id, name: tc.name, args: tc.args,
           status: tc.status === "done" ? "done" : "calling",
           result: tc.result,
-          timestamp: Date.now(),
+          timestamp: focused?.timestamp ?? 0,
         },
         turnId: "live",
       });
@@ -73,7 +107,7 @@ export function AgentsView({ steps }: AgentsViewProps) {
     }
 
     return items;
-  }, [entries]);
+  }, [entries, stream, focused?.timestamp]);
 
   const handleMouseDown = useCallback((e: React.MouseEvent) => {
     e.preventDefault();
@@ -118,7 +152,8 @@ export function AgentsView({ steps }: AgentsViewProps) {
             <AgentListItem
               key={step.id}
               step={step}
-              isSelected={step.id === selectedAgentId}
+              visualStatus={step.id === effectiveSelectedAgentId ? focusedStatus ?? null : null}
+              isSelected={step.id === effectiveSelectedAgentId}
               onClick={() => setSelectedAgentId(step.id)}
             />
           ))}
@@ -141,7 +176,7 @@ export function AgentsView({ steps }: AgentsViewProps) {
           </div>
         ) : (
           <>
-            <AgentDetailHeader focused={focused} stream={stream} />
+            <AgentDetailHeader focused={focused} stream={stream} visualStatus={focusedStatus ?? "completed"} />
             <AgentPromptSection args={focused.args} />
             {loading ? (
               <div className="h-full flex items-center justify-center">
@@ -164,14 +199,25 @@ export function AgentsView({ steps }: AgentsViewProps) {
 
 /* -- Agent list item -- */
 
-function AgentListItem({ step, isSelected, onClick }: { step: ToolStep; isSelected: boolean; onClick: () => void }) {
+function AgentListItem({
+  step,
+  visualStatus,
+  isSelected,
+  onClick,
+}: {
+  step: ToolStep;
+  visualStatus: AgentVisualStatus | null;
+  isSelected: boolean;
+  onClick: () => void;
+}) {
   const args = parseAgentArgs(step.args);
   const ss = step.subagent_stream;
   const displayName = ss?.description || args.description || args.prompt?.slice(0, 40) || "子任务";
   const prompt = args.prompt || "";
-  const isRunning = ss?.status === "running" || (step.status === "calling" && ss?.status !== "completed");
-  const isError = step.status === "error" || ss?.status === "error";
-  const isDone = !isRunning && !isError && (step.status === "done" || ss?.status === "completed");
+  const status = resolveAgentVisualStatus(step, { statusOverride: visualStatus });
+  const isRunning = status === "running";
+  const isError = status === "error";
+  const isDone = status === "completed";
   const statusDot = isRunning ? "bg-success animate-pulse" : isError ? "bg-destructive" : isDone ? "bg-success" : "bg-warning animate-pulse";
 
   return (
@@ -194,21 +240,27 @@ function AgentListItem({ step, isSelected, onClick }: { step: ToolStep; isSelect
 
 /* -- Agent detail header -- */
 
-function getStatusLabel(focused: ToolStep, stream: SubagentStream | undefined): string {
-  if (stream?.status === "running") return "运行中";
-  if (stream?.status === "error") return "出错";
-  if (focused.status === "calling") return "启动中";
+function getStatusLabel(status: AgentVisualStatus): string {
+  if (status === "running") return "运行中";
+  if (status === "error") return "出错";
   return "已完成";
 }
 
-function getStatusDotClass(focused: ToolStep, stream: SubagentStream | undefined): string {
-  if (stream?.status === "running") return "bg-success animate-pulse";
-  if (stream?.status === "error") return "bg-destructive";
-  if (focused.status === "calling") return "bg-warning animate-pulse";
+function getStatusDotClass(status: AgentVisualStatus): string {
+  if (status === "running") return "bg-success animate-pulse";
+  if (status === "error") return "bg-destructive";
   return "bg-success";
 }
 
-function AgentDetailHeader({ focused, stream }: { focused: ToolStep; stream: SubagentStream | undefined }) {
+function AgentDetailHeader({
+  focused,
+  stream,
+  visualStatus,
+}: {
+  focused: ToolStep;
+  stream: SubagentStream | undefined;
+  visualStatus: AgentVisualStatus;
+}) {
   const args = parseAgentArgs(focused.args);
   const displayName = stream?.description || args.description || args.prompt?.slice(0, 40) || "子任务";
   const agentType = args.subagent_type;
@@ -218,8 +270,8 @@ function AgentDetailHeader({ focused, stream }: { focused: ToolStep; stream: Sub
         <span className="text-2xs font-mono bg-border text-foreground-secondary px-1.5 py-0.5 rounded flex-shrink-0">{agentType}</span>
       )}
       <div className="text-sm font-medium text-foreground truncate flex-1">{displayName}</div>
-      <span className={`w-2 h-2 rounded-full flex-shrink-0 ${getStatusDotClass(focused, stream)}`} />
-      <span className="text-2xs text-muted-foreground/70 flex-shrink-0">{getStatusLabel(focused, stream)}</span>
+      <span className={`w-2 h-2 rounded-full flex-shrink-0 ${getStatusDotClass(visualStatus)}`} />
+      <span className="text-2xs text-muted-foreground/70 flex-shrink-0">{getStatusLabel(visualStatus)}</span>
     </div>
   );
 }
@@ -239,4 +291,3 @@ function AgentPromptSection({ args }: { args: unknown }) {
     </div>
   );
 }
-
diff --git a/frontend/app/src/components/computer-panel/PanelHeader.test.tsx b/frontend/app/src/components/computer-panel/PanelHeader.test.tsx
new file mode 100644
index 000000000..c061dfe59
--- /dev/null
+++ b/frontend/app/src/components/computer-panel/PanelHeader.test.tsx
@@ -0,0 +1,31 @@
+// @vitest-environment jsdom
+
+import { render, screen } from "@testing-library/react";
+import { describe, expect, it, vi } from "vitest";
+import { PanelHeader } from "./PanelHeader";
+
+describe("PanelHeader", () => {
+  it("does not render pause or resume controls for remote sandboxes", () => {
+    const onClose = vi.fn();
+
+    const { rerender } = render(
+      <PanelHeader
+        threadId="thread-1"
+        onClose={onClose}
+      />,
+    );
+
+    expect(screen.getAllByRole("button")).toHaveLength(1);
+    expect(screen.getByTitle("收起视窗")).toBeTruthy();
+
+    rerender(
+      <PanelHeader
+        threadId="thread-1"
+        onClose={onClose}
+      />,
+    );
+
+    expect(screen.getAllByRole("button")).toHaveLength(1);
+    expect(screen.getByTitle("收起视窗")).toBeTruthy();
+  });
+});
diff --git a/frontend/app/src/components/computer-panel/PanelHeader.tsx b/frontend/app/src/components/computer-panel/PanelHeader.tsx
index 8340d2634..871586479 100644
--- a/frontend/app/src/components/computer-panel/PanelHeader.tsx
+++ b/frontend/app/src/components/computer-panel/PanelHeader.tsx
@@ -1,17 +1,9 @@
-import { Pause, Play } from "lucide-react";
-import { pauseThreadSandbox, resumeThreadSandbox, type LeaseStatus } from "../../api";
-
 interface PanelHeaderProps {
   threadId: string | null;
-  isRemote: boolean;
-  lease: LeaseStatus | null;
   onClose: () => void;
-  onRefreshStatus: () => Promise<unknown>;
 }
 
-export function PanelHeader({ threadId, isRemote, lease, onClose, onRefreshStatus }: PanelHeaderProps) {
-  const instanceState = lease?.instance?.state;
-
+export function PanelHeader({ threadId, onClose }: PanelHeaderProps) {
   return (
     <div className="h-12 flex items-center justify-between px-4 flex-shrink-0 border-b border-border">
       <div>
@@ -21,22 +13,6 @@ export function PanelHeader({ threadId, isRemote, lease, onClose, onRefreshStatu
         </p>
       </div>
       <div className="flex items-center gap-1">
-        {isRemote && instanceState === "running" && (
-          <button
-            className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground/70 hover:bg-muted hover:text-foreground"
-            onClick={() => void (threadId && pauseThreadSandbox(threadId).then(() => onRefreshStatus()))}
-          >
-            <Pause className="w-4 h-4" />
-          </button>
-        )}
-        {isRemote && instanceState === "paused" && (
-          <button
-            className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground/70 hover:bg-muted hover:text-success"
-            onClick={() => void (threadId && resumeThreadSandbox(threadId).then(() => onRefreshStatus()))}
-          >
-            <Play className="w-4 h-4" />
-          </button>
-        )}
         <button
           className="w-8 h-8 rounded-lg flex items-center justify-center text-muted-foreground/70 hover:bg-muted hover:text-foreground"
           onClick={onClose}
diff --git a/frontend/app/src/components/computer-panel/agent-visual-status.test.ts b/frontend/app/src/components/computer-panel/agent-visual-status.test.ts
new file mode 100644
index 000000000..a40713d3a
--- /dev/null
+++ b/frontend/app/src/components/computer-panel/agent-visual-status.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, it } from "vitest";
+import type { ToolStep } from "../../api";
+import { resolveAgentVisualStatus } from "./agent-visual-status";
+
+function makeStep(): ToolStep {
+  return {
+    id: "tool-1",
+    name: "Agent",
+    args: {},
+    status: "calling",
+    timestamp: Date.now(),
+    subagent_stream: {
+      task_id: "task-1",
+      thread_id: "subagent-1",
+      description: "inspect",
+      text: "done text",
+      tool_calls: [],
+      status: "running",
+    },
+  };
+}
+
+describe("resolveAgentVisualStatus", () => {
+  it("trusts the child thread idle state over a stale parent running badge", () => {
+    expect(
+      resolveAgentVisualStatus(makeStep(), {
+        childDisplayRunning: false,
+        childRuntimeState: "idle",
+      }),
+    ).toBe("completed");
+  });
+
+  it("keeps the agent running while the child display is still open", () => {
+    expect(
+      resolveAgentVisualStatus(makeStep(), {
+        childDisplayRunning: true,
+        childRuntimeState: "active",
+      }),
+    ).toBe("running");
+  });
+});
diff --git a/frontend/app/src/components/computer-panel/agent-visual-status.ts b/frontend/app/src/components/computer-panel/agent-visual-status.ts
new file mode 100644
index 000000000..09b2df236
--- /dev/null
+++ b/frontend/app/src/components/computer-panel/agent-visual-status.ts
@@ -0,0 +1,25 @@
+import type { ToolStep } from "../../api";
+
+export type AgentVisualStatus = "running" | "completed" | "error";
+
+interface ResolveAgentVisualStatusOptions {
+  childDisplayRunning?: boolean;
+  childRuntimeState?: string | null;
+  statusOverride?: AgentVisualStatus | null;
+}
+
+export function resolveAgentVisualStatus(
+  step: ToolStep,
+  options: ResolveAgentVisualStatusOptions = {},
+): AgentVisualStatus {
+  const { childDisplayRunning = false, childRuntimeState = null, statusOverride = null } = options;
+  const stream = step.subagent_stream;
+
+  if (statusOverride) return statusOverride;
+  if (step.status === "error" || stream?.status === "error") return "error";
+  if (childRuntimeState === "idle" && !childDisplayRunning) return "completed";
+  if (childDisplayRunning) return "running";
+  if (stream?.status === "running") return "running";
+  if (step.status === "done" || stream?.status === "completed") return "completed";
+  return "running";
+}
diff --git a/frontend/app/src/components/computer-panel/index.tsx b/frontend/app/src/components/computer-panel/index.tsx
index bb8f9df65..a8932d95b 100644
--- a/frontend/app/src/components/computer-panel/index.tsx
+++ b/frontend/app/src/components/computer-panel/index.tsx
@@ -31,25 +31,36 @@ export default function ComputerPanel({
   const agentSteps = useMemo(() => extractAgentSteps(chatEntries), [chatEntries]);
   const { width: treeWidth, onMouseDown: onDragStart } = useResizable(288, 160, 500);
 
-  const { lease, refreshStatus } = useSandboxStatus({ threadId, isRemote });
-
-  const fileExplorer = useFileExplorer({ threadId });
+  const { refreshStatus } = useSandboxStatus({ threadId, isRemote });
+  const {
+    currentPath,
+    setCurrentPath,
+    workspaceRoot,
+    treeNodes,
+    selectedFilePath,
+    selectedFileContent,
+    loadingWorkspace,
+    workspaceError,
+    handleToggleFolder,
+    handleSelectFile,
+    refreshWorkspace,
+  } = useFileExplorer({ threadId });
 
   // Refresh sandbox status when panel opens
   useEffect(() => {
     if (!isOpen) return;
     refreshStatus().then((cwd) => {
-      if (cwd && !fileExplorer.currentPath) {
-        fileExplorer.setCurrentPath(cwd);
+      if (cwd && !currentPath) {
+        setCurrentPath(cwd);
       }
     });
-  }, [isOpen, threadId, sandboxType]);
+  }, [isOpen, refreshStatus, currentPath, setCurrentPath]);
 
   // Refresh workspace when files tab is active
   useEffect(() => {
     if (!isOpen || !threadId || activeTab !== "files") return;
-    void fileExplorer.refreshWorkspace();
-  }, [isOpen, threadId, activeTab]);
+    void refreshWorkspace();
+  }, [isOpen, threadId, activeTab, refreshWorkspace]);
 
   if (!isOpen) return null;
 
@@ -60,10 +71,7 @@ export default function ComputerPanel({
     >
       <PanelHeader
         threadId={threadId}
-        isRemote={isRemote}
-        lease={lease}
         onClose={onClose}
-        onRefreshStatus={refreshStatus}
       />
 
       <TabBar
@@ -84,16 +92,16 @@ export default function ComputerPanel({
 
         {activeTab === "files" && (
           <FilesView
-            workspaceRoot={fileExplorer.workspaceRoot}
-            treeNodes={fileExplorer.treeNodes}
-            loadingWorkspace={fileExplorer.loadingWorkspace}
-            workspaceError={fileExplorer.workspaceError}
-            selectedFilePath={fileExplorer.selectedFilePath}
-            selectedFileContent={fileExplorer.selectedFileContent}
+            workspaceRoot={workspaceRoot}
+            treeNodes={treeNodes}
+            loadingWorkspace={loadingWorkspace}
+            workspaceError={workspaceError}
+            selectedFilePath={selectedFilePath}
+            selectedFileContent={selectedFileContent}
             treeWidth={treeWidth}
             onDragStart={onDragStart}
-            onToggleFolder={fileExplorer.handleToggleFolder}
-            onSelectFile={fileExplorer.handleSelectFile}
+            onToggleFolder={handleToggleFolder}
+            onSelectFile={handleSelectFile}
           />
         )}
 
diff --git a/frontend/app/src/components/computer-panel/utils.ts b/frontend/app/src/components/computer-panel/utils.ts
index 532bd5ce4..89199ab8b 100644
--- a/frontend/app/src/components/computer-panel/utils.ts
+++ b/frontend/app/src/components/computer-panel/utils.ts
@@ -6,36 +6,7 @@ import type { TreeNode } from "./types";
 export type FlowItem =
   | { type: "text"; content: string; turnId: string }
   | { type: "tool"; step: ToolStep; turnId: string };
-
-/** Extract a chronological message flow (text + tool) from chat entries.
- *  The last non-empty text segment per turn is excluded (already shown in chat area). */
-export function extractMessageFlow(entries: ChatEntry[]): FlowItem[] {
-  const items: FlowItem[] = [];
-  for (const entry of entries) {
-    if (entry.role !== "assistant") continue;
-    const segs = entry.segments;
-    // Find last non-empty text index — exclude it (displayed in chat area)
-    let lastTextIdx = -1;
-    for (let i = segs.length - 1; i >= 0; i--) {
-      const seg = segs[i];
-      if (seg.type === "text" && seg.content.trim()) {
-        lastTextIdx = i;
-        break;
-      }
-    }
-    for (let i = 0; i < segs.length; i++) {
-      const seg = segs[i];
-      if (seg.type === "tool") {
-        items.push({ type: "tool", step: seg.step, turnId: entry.id });
-      } else if (seg.type === "text" && i !== lastTextIdx && seg.content.trim()) {
-        items.push({ type: "text", content: seg.content, turnId: entry.id });
-      }
-    }
-  }
-  return items;
-}
-
-export function joinPath(base: string, name: string): string {
+function joinPath(base: string, name: string): string {
   if (base.endsWith("/")) return `${base}${name}`;
   return `${base}/${name}`;
 }
@@ -68,20 +39,6 @@ export function extractAgentSteps(entries: ChatEntry[]): ToolStep[] {
   return steps;
 }
 
-/** Extract all tool steps from chat entries */
-export function extractAllToolSteps(entries: ChatEntry[]): ToolStep[] {
-  const steps: ToolStep[] = [];
-  for (const entry of entries) {
-    if (entry.role !== "assistant") continue;
-    for (const seg of entry.segments) {
-      if (seg.type === "tool") {
-        steps.push(seg.step);
-      }
-    }
-  }
-  return steps;
-}
-
 export function parseCommandArgs(args: unknown): { command?: string; cwd?: string; description?: string } {
   if (args && typeof args === "object") {
     const a = args as Record<string, unknown>;
diff --git a/frontend/app/src/components/tool-renderers/utils.ts b/frontend/app/src/components/tool-renderers/utils.ts
index 68b211e59..3ad31a53f 100644
--- a/frontend/app/src/components/tool-renderers/utils.ts
+++ b/frontend/app/src/components/tool-renderers/utils.ts
@@ -40,7 +40,3 @@ export function inferLanguage(filePath: string): string {
 
   return langMap[ext] || 'plaintext';
 }
-
-export function countLines(text: string): number {
-  return text.split('\n').length;
-}
diff --git a/frontend/app/src/hooks/use-background-tasks.ts b/frontend/app/src/hooks/use-background-tasks.ts
index 1b6e1b10e..c2da771d5 100644
--- a/frontend/app/src/hooks/use-background-tasks.ts
+++ b/frontend/app/src/hooks/use-background-tasks.ts
@@ -1,5 +1,5 @@
 import { useState, useEffect, useCallback } from 'react';
-import { useThreadStream } from './use-thread-stream';
+import type { UseThreadStreamResult } from './use-thread-stream';
 import type { StreamEvent } from '../api/types';
 
 export interface BackgroundTask {
@@ -14,23 +14,38 @@ export interface BackgroundTask {
 
 interface UseBackgroundTasksProps {
   threadId: string;
-  loading: boolean;
-  refreshThreads: () => Promise<void>;
+  subscribe: UseThreadStreamResult["subscribe"];
 }
 
-export function useBackgroundTasks({ threadId, loading, refreshThreads }: UseBackgroundTasksProps) {
+const threadTasksInflight = new Map<string, Promise<BackgroundTask[]>>();
+
+function loadThreadTasks(threadId: string): Promise<BackgroundTask[]> {
+  const existing = threadTasksInflight.get(threadId);
+  if (existing) return existing;
+  // @@@tasks-inflight-dedup - React StrictMode remounts the page in dev.
+  // Reuse the first thread task fetch so the dev switch hot path does not
+  // double-hit /tasks before the first response lands.
+  const pending = fetch(`/api/threads/${threadId}/tasks`)
+    .then(async (response) => {
+      if (!response.ok) {
+        throw new Error(response.statusText || `HTTP ${response.status}`);
+      }
+      return response.json() as Promise<BackgroundTask[]>;
+    })
+    .finally(() => {
+      threadTasksInflight.delete(threadId);
+    });
+  threadTasksInflight.set(threadId, pending);
+  return pending;
+}
+
+export function useBackgroundTasks({ threadId, subscribe }: UseBackgroundTasksProps) {
   const [tasks, setTasks] = useState<BackgroundTask[]>([]);
-  const { subscribe } = useThreadStream(threadId, { loading, refreshThreads });
 
   // 从 API 获取任务列表
   const fetchTasks = useCallback(async () => {
     try {
-      const response = await fetch(`/api/threads/${threadId}/tasks`);
-      if (!response.ok) {
-        console.error('[BackgroundTasks] Failed to fetch tasks:', response.statusText);
-        return;
-      }
-      const data = await response.json();
+      const data = await loadThreadTasks(threadId);
       setTasks(data);
     } catch (err) {
       console.error('[BackgroundTasks] Error fetching tasks:', err);
diff --git a/frontend/app/src/hooks/use-display-deltas.test.tsx b/frontend/app/src/hooks/use-display-deltas.test.tsx
new file mode 100644
index 000000000..90d0edc48
--- /dev/null
+++ b/frontend/app/src/hooks/use-display-deltas.test.tsx
@@ -0,0 +1,213 @@
+// @vitest-environment jsdom
+
+import { act, cleanup, fireEvent, render, screen } from "@testing-library/react";
+import { useState } from "react";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import type { ChatEntry, StreamEvent } from "../api";
+import { useDisplayDeltas } from "./use-display-deltas";
+
+vi.mock("../api", async () => {
+  const actual = await vi.importActual<typeof import("../api")>("../api");
+  return {
+    ...actual,
+    cancelRun: vi.fn(async () => undefined),
+    postRun: vi.fn(async () => ({ run_id: "run-1", thread_id: "thread-1" })),
+  };
+});
+
+let latestHandler: ((event: StreamEvent) => void) | null = null;
+
+afterEach(() => {
+  latestHandler = null;
+  cleanup();
+});
+
+function Harness({
+  initialEntries,
+  threadId = "thread-1",
+  streamIsRunning = true,
+}: {
+  initialEntries: ChatEntry[];
+  threadId?: string;
+  streamIsRunning?: boolean;
+}) {
+  const [entries, setEntries] = useState<ChatEntry[]>(initialEntries);
+  const { isRunning, handleSendMessage } = useDisplayDeltas({
+    threadId,
+    onUpdate: setEntries,
+    displaySeq: 0,
+    stream: {
+      runtimeStatus: null,
+      isRunning: streamIsRunning,
+      subscribe: (handler) => {
+        latestHandler = handler;
+        return () => {
+          if (latestHandler === handler) latestHandler = null;
+        };
+      },
+    },
+  });
+  return (
+    <>
+      <pre data-testid="entries">{JSON.stringify(entries)}</pre>
+      <div data-testid="running">{String(isRunning)}</div>
+      <button data-testid="send" onClick={() => void handleSendMessage("hello")} />
+    </>
+  );
+}
+
+describe("useDisplayDeltas", () => {
+  it("marks the parent Agent tool done when subagent completion arrives", () => {
+    const initialEntries: ChatEntry[] = [
+      {
+        id: "turn-1",
+        role: "assistant",
+        timestamp: Date.now(),
+        segments: [
+          {
+            type: "tool",
+            step: {
+              id: "tool-1",
+              name: "Agent",
+              args: {},
+              status: "calling",
+              timestamp: Date.now(),
+              subagent_stream: {
+                task_id: "task-1",
+                thread_id: "subagent-task-1",
+                description: "inspect workspace",
+                text: "",
+                tool_calls: [],
+                status: "running",
+              },
+            },
+          },
+        ],
+      },
+    ];
+
+    render(<Harness initialEntries={initialEntries} />);
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "update_segment",
+          index: 0,
+          patch: {
+            subagent_stream_status: "completed",
+          },
+        },
+      });
+    });
+
+    const entries = JSON.parse(screen.getByTestId("entries").textContent || "[]");
+    expect(entries[0].segments[0].step.subagent_stream.status).toBe("completed");
+    expect(entries[0].segments[0].step.status).toBe("done");
+  });
+
+  it("stops reporting running after the assistant turn finalizes", () => {
+    render(<Harness initialEntries={[]} />);
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "append_entry",
+          entry: {
+            id: "turn-1",
+            role: "assistant",
+            timestamp: Date.now(),
+            streaming: true,
+            segments: [],
+          },
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("true");
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "finalize_turn",
+          timestamp: Date.now(),
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("false");
+  });
+
+  it("resets display-owned running state when the child thread changes", () => {
+    const view = render(<Harness initialEntries={[]} threadId="thread-1" />);
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "append_entry",
+          entry: {
+            id: "turn-1",
+            role: "assistant",
+            timestamp: Date.now(),
+            streaming: true,
+            segments: [],
+          },
+        },
+      });
+    });
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "finalize_turn",
+          timestamp: Date.now(),
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("false");
+
+    view.rerender(<Harness initialEntries={[]} threadId="thread-2" />);
+
+    expect(screen.getByTestId("running").textContent).toBe("true");
+  });
+
+  it("clears queued-send pending once the assistant turn starts streaming", () => {
+    render(<Harness initialEntries={[]} streamIsRunning={false} />);
+
+    fireEvent.click(screen.getByTestId("send"));
+    expect(screen.getByTestId("running").textContent).toBe("true");
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "append_entry",
+          entry: {
+            id: "turn-1",
+            role: "assistant",
+            timestamp: Date.now(),
+            streaming: true,
+            segments: [],
+          },
+        },
+      });
+    });
+
+    act(() => {
+      latestHandler?.({
+        type: "display_delta",
+        data: {
+          type: "finalize_turn",
+          timestamp: Date.now(),
+        },
+      });
+    });
+
+    expect(screen.getByTestId("running").textContent).toBe("false");
+  });
+});
diff --git a/frontend/app/src/hooks/use-display-deltas.ts b/frontend/app/src/hooks/use-display-deltas.ts
index 1ad01e6e3..50c526931 100644
--- a/frontend/app/src/hooks/use-display-deltas.ts
+++ b/frontend/app/src/hooks/use-display-deltas.ts
@@ -16,7 +16,7 @@ import {
   type ChatEntry,
   type StreamStatus,
 } from "../api";
-import { useThreadStream } from "./use-thread-stream";
+import type { UseThreadStreamResult } from "./use-thread-stream";
 import { makeId } from "./utils";
 
 // --- Delta types from backend ---
@@ -115,6 +115,7 @@ function applyDelta(entries: ChatEntry[], delta: DisplayDelta): ChatEntry[] {
           if (seg.step.subagent_stream) {
             seg.step = {
               ...seg.step,
+              status: patch.subagent_stream_status === "completed" ? "done" : seg.step.status,
               subagent_stream: { ...seg.step.subagent_stream, status: patch.subagent_stream_status as "completed" },
             };
           }
@@ -153,12 +154,10 @@ function applyDelta(entries: ChatEntry[], delta: DisplayDelta): ChatEntry[] {
 
 interface DisplayDeltaDeps {
   threadId: string;
-  refreshThreads: () => Promise<void>;
   onUpdate: (updater: (prev: ChatEntry[]) => ChatEntry[]) => void;
-  loading: boolean;
-  runStarted?: boolean;
   /** display_seq from GET response — skip deltas with _display_seq <= this */
   displaySeq: number;
+  stream: Pick<UseThreadStreamResult, "runtimeStatus" | "isRunning" | "subscribe">;
 }
 
 export interface DisplayDeltaState {
@@ -174,14 +173,16 @@ export interface DisplayDeltaActions {
 export function useDisplayDeltas(
   deps: DisplayDeltaDeps,
 ): DisplayDeltaState & DisplayDeltaActions {
-  const { threadId, refreshThreads, onUpdate, loading, runStarted, displaySeq } = deps;
+  const { threadId, onUpdate, displaySeq, stream } = deps;
 
   const [sendPending, setSendPending] = useState(false);
-
-  const { isRunning: streamIsRunning, runtimeStatus, subscribe } =
-    useThreadStream(threadId, { loading, refreshThreads, runStarted });
-
-  const isRunning = streamIsRunning || sendPending;
+  const [displayRunState, setDisplayRunState] = useState<{
+    threadId: string;
+    state: "unknown" | "open" | "closed";
+  }>({ threadId, state: "unknown" });
+  const { isRunning: streamIsRunning, runtimeStatus, subscribe } = stream;
+  const currentDisplayRunState = displayRunState.threadId === threadId ? displayRunState.state : "unknown";
+  const isRunning = sendPending || (currentDisplayRunState === "unknown" ? streamIsRunning : currentDisplayRunState === "open");
 
   useEffect(() => {
     if (!streamIsRunning) return;
@@ -208,11 +209,18 @@ export function useDisplayDeltas(
       // @@@display-seq-dedup — skip stale deltas replayed from ring buffer
       const deltaSeq = delta._display_seq;
       if (typeof deltaSeq === "number" && deltaSeq <= displaySeqRef.current) return;
+      if (delta.type === "append_entry" && delta.entry.role === "assistant" && delta.entry.streaming !== false) {
+        setSendPending(false);
+        setDisplayRunState({ threadId, state: "open" });
+      }
+      if (delta.type === "finalize_turn") {
+        setDisplayRunState({ threadId, state: "closed" });
+      }
       flushSync(() => {
         onUpdateRef.current((prev) => applyDelta(prev, delta));
       });
     });
-  }, [subscribe]);
+  }, [subscribe, threadId]);
 
   const handleSendMessage = useCallback(
     async (message: string, attachments?: string[]) => {
diff --git a/frontend/app/src/hooks/use-sandbox-manager.ts b/frontend/app/src/hooks/use-sandbox-manager.ts
index 8ec4b18d5..31c0715b1 100644
--- a/frontend/app/src/hooks/use-sandbox-manager.ts
+++ b/frontend/app/src/hooks/use-sandbox-manager.ts
@@ -1,8 +1,6 @@
-import { useCallback, useEffect, useState } from "react";
+import { useEffect } from "react";
 import {
   getThreadLease,
-  pauseThreadSandbox,
-  resumeThreadSandbox,
   type SandboxInfo,
 } from "../api";
 
@@ -11,21 +9,10 @@ interface SandboxManagerDeps {
   isStreaming: boolean;
   activeSandbox: SandboxInfo | null;
   setActiveSandbox: React.Dispatch<React.SetStateAction<SandboxInfo | null>>;
-  loadThread: (threadId: string) => Promise<void>;
 }
 
-export interface SandboxManagerState {
-  sandboxActionError: string | null;
-}
-
-export interface SandboxManagerActions {
-  handlePauseSandbox: () => Promise<void>;
-  handleResumeSandbox: () => Promise<void>;
-}
-
-export function useSandboxManager(deps: SandboxManagerDeps): SandboxManagerState & SandboxManagerActions {
-  const { activeThreadId, isStreaming, activeSandbox, setActiveSandbox, loadThread } = deps;
-  const [sandboxActionError, setSandboxActionError] = useState<string | null>(null);
+export function useSandboxManager(deps: SandboxManagerDeps): void {
+  const { activeThreadId, isStreaming, activeSandbox, setActiveSandbox } = deps;
 
   // Poll sandbox status while streaming (remote sandboxes only)
   useEffect(() => {
@@ -58,29 +45,5 @@ export function useSandboxManager(deps: SandboxManagerDeps): SandboxManagerState
       cancelled = true;
       window.clearInterval(timer);
     };
-  }, [isStreaming, activeThreadId, activeSandbox?.type, setActiveSandbox]);
-
-  const handlePauseSandbox = useCallback(async () => {
-    if (!activeThreadId) return;
-    setSandboxActionError(null);
-    try {
-      await pauseThreadSandbox(activeThreadId);
-      await loadThread(activeThreadId);
-    } catch (e) {
-      setSandboxActionError(e instanceof Error ? e.message : String(e));
-    }
-  }, [activeThreadId, loadThread]);
-
-  const handleResumeSandbox = useCallback(async () => {
-    if (!activeThreadId) return;
-    setSandboxActionError(null);
-    try {
-      await resumeThreadSandbox(activeThreadId);
-      await loadThread(activeThreadId);
-    } catch (e) {
-      setSandboxActionError(e instanceof Error ? e.message : String(e));
-    }
-  }, [activeThreadId, loadThread]);
-
-  return { sandboxActionError, handlePauseSandbox, handleResumeSandbox };
+  }, [isStreaming, activeThreadId, activeSandbox, setActiveSandbox]);
 }
diff --git a/frontend/app/src/hooks/use-thread-data.ts b/frontend/app/src/hooks/use-thread-data.ts
index 1c0a85de0..93dea1ee1 100644
--- a/frontend/app/src/hooks/use-thread-data.ts
+++ b/frontend/app/src/hooks/use-thread-data.ts
@@ -3,6 +3,7 @@ import {
   getThread,
   type ChatEntry,
   type SandboxInfo,
+  type ThreadDetail,
 } from "../api";
 
 export interface ThreadDataState {
@@ -20,6 +21,18 @@ export interface ThreadDataActions {
   refreshThread: () => Promise<void>;
 }
 
+const threadDetailInflight = new Map<string, Promise<ThreadDetail>>();
+
+function loadThreadDetail(threadId: string): Promise<ThreadDetail> {
+  const existing = threadDetailInflight.get(threadId);
+  if (existing) return existing;
+  const pending = getThread(threadId).finally(() => {
+    threadDetailInflight.delete(threadId);
+  });
+  threadDetailInflight.set(threadId, pending);
+  return pending;
+}
+
 export function useThreadData(threadId: string | undefined, skipInitialLoad = false, initialEntries?: ChatEntry[]): ThreadDataState & ThreadDataActions {
   const [entries, setEntries] = useState<ChatEntry[]>(initialEntries ?? []);
   const [activeSandbox, setActiveSandbox] = useState<SandboxInfo | null>(null);
@@ -29,7 +42,7 @@ export function useThreadData(threadId: string | undefined, skipInitialLoad = fa
   const loadThread = useCallback(async (id: string, silent = false) => {
     if (!silent) setLoading(true);
     try {
-      const thread = await getThread(id);
+      const thread = await loadThreadDetail(id);
       // @@@display-builder — backend returns pre-computed entries + display_seq
       setEntries(thread.entries ?? []);
       setDisplaySeq(thread.display_seq ?? 0);
@@ -60,7 +73,7 @@ export function useThreadData(threadId: string | undefined, skipInitialLoad = fa
       // @@@skip-entries-not-sandbox — skipInitialLoad skips ENTRIES (to avoid
       // overwriting optimistic entries), but we still need sandbox status so
       // TaskProgress shows the correct indicator from the start.
-      getThread(threadId).then(thread => {
+      loadThreadDetail(threadId).then(thread => {
         const sandbox = thread.sandbox;
         setActiveSandbox(sandbox && typeof sandbox === "object" ? (sandbox as SandboxInfo) : null);
       }).catch(() => {});
diff --git a/frontend/app/src/hooks/use-thread-manager.ts b/frontend/app/src/hooks/use-thread-manager.ts
index f167a0bcb..bcdff6953 100644
--- a/frontend/app/src/hooks/use-thread-manager.ts
+++ b/frontend/app/src/hooks/use-thread-manager.ts
@@ -10,6 +10,11 @@ import {
   type ThreadSummary,
 } from "../api";
 
+let bootstrapInflight: Promise<{
+  sandboxTypes: SandboxType[];
+  threads: ThreadSummary[];
+}> | null = null;
+
 export interface ThreadManagerState {
   threads: ThreadSummary[];
   sandboxTypes: SandboxType[];
@@ -38,6 +43,16 @@ function upsertThread(prev: ThreadSummary[], thread: ThreadSummary): ThreadSumma
   return [thread, ...next];
 }
 
+function loadThreadBootstrap() {
+  if (bootstrapInflight) return bootstrapInflight;
+  bootstrapInflight = Promise.all([listSandboxTypes(), listThreads()])
+    .then(([sandboxTypes, threads]) => ({ sandboxTypes, threads }))
+    .finally(() => {
+      bootstrapInflight = null;
+    });
+  return bootstrapInflight;
+}
+
 export function useThreadManager(): ThreadManagerState & ThreadManagerActions {
   const [threads, setThreads] = useState<ThreadSummary[]>([]);
   const [sandboxTypes, setSandboxTypes] = useState<SandboxType[]>([{ name: "local", available: true }]);
@@ -51,19 +66,31 @@ export function useThreadManager(): ThreadManagerState & ThreadManagerActions {
 
   // Bootstrap: load sandbox types + threads on mount
   useEffect(() => {
+    let cancelled = false;
+
     void (async () => {
       try {
-        const [types] = await Promise.all([listSandboxTypes(), refreshThreads()]);
+        // @@@thread-bootstrap-singleflight - /threads now redirects before AppLayout mounts,
+        // but dev StrictMode still double-mounts the thread shell. Reuse the first
+        // bootstrap request so sidebar threads/provider inventory do not refetch twice.
+        const { sandboxTypes: types, threads: rows } = await loadThreadBootstrap();
+        if (cancelled) return;
+        setThreads(rows);
         setSandboxTypes(types);
         const preferred = types.find((t) => t.available)?.name ?? "local";
         setSelectedSandbox(preferred);
       } catch {
         // ignore bootstrap errors in UI; user can retry by action
       } finally {
+        if (cancelled) return;
         setLoading(false);
       }
     })();
-  }, [refreshThreads]);
+
+    return () => {
+      cancelled = true;
+    };
+  }, []);
 
   const handleCreateThread = useCallback(async (
     sandbox?: string,
diff --git a/frontend/app/src/hooks/use-thread-permissions.test.tsx b/frontend/app/src/hooks/use-thread-permissions.test.tsx
new file mode 100644
index 000000000..1ba1dfbd6
--- /dev/null
+++ b/frontend/app/src/hooks/use-thread-permissions.test.tsx
@@ -0,0 +1,72 @@
+// @vitest-environment jsdom
+
+import { render } from "@testing-library/react";
+import { useEffect } from "react";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { useThreadPermissions } from "./use-thread-permissions";
+
+const { getThreadPermissions } = vi.hoisted(() => ({
+  getThreadPermissions: vi.fn(),
+}));
+
+vi.mock("../api", async () => {
+  const actual = await vi.importActual<typeof import("../api")>("../api");
+  return {
+    ...actual,
+    getThreadPermissions,
+    addThreadPermissionRule: vi.fn(),
+    removeThreadPermissionRule: vi.fn(),
+    resolveThreadPermission: vi.fn(),
+  };
+});
+
+afterEach(() => {
+  vi.clearAllMocks();
+  window.history.replaceState({}, "", "/");
+});
+
+function Harness({ threadId }: { threadId?: string }) {
+  const state = useThreadPermissions(threadId);
+  useEffect(() => {
+    void state.loading;
+  }, [state.loading]);
+  return null;
+}
+
+describe("useThreadPermissions", () => {
+  it("does not log an error when an in-flight permissions request is aborted on unmount", async () => {
+    const consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined);
+
+    getThreadPermissions.mockImplementation((_threadId: string, signal?: AbortSignal) => new Promise((_, reject) => {
+      signal?.addEventListener("abort", () => {
+        reject(new DOMException("The user aborted a request.", "AbortError"));
+      });
+    }));
+
+    const view = render(<Harness threadId="thread-1" />);
+    view.unmount();
+
+    await Promise.resolve();
+
+    expect(consoleError).not.toHaveBeenCalled();
+    consoleError.mockRestore();
+  });
+
+  it("does not log a failed fetch once navigation already left the thread route", async () => {
+    window.history.replaceState({}, "", "/threads/member-1/thread-1");
+    const consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined);
+
+    getThreadPermissions.mockImplementation(async () => {
+      window.history.replaceState({}, "", "/resources");
+      throw new TypeError("Failed to fetch");
+    });
+
+    render(<Harness threadId="thread-1" />);
+
+    await Promise.resolve();
+    await Promise.resolve();
+
+    expect(consoleError).not.toHaveBeenCalled();
+    consoleError.mockRestore();
+  });
+});
diff --git a/frontend/app/src/hooks/use-thread-permissions.ts b/frontend/app/src/hooks/use-thread-permissions.ts
new file mode 100644
index 000000000..02ae74143
--- /dev/null
+++ b/frontend/app/src/hooks/use-thread-permissions.ts
@@ -0,0 +1,163 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import {
+  addThreadPermissionRule,
+  getThreadPermissions,
+  removeThreadPermissionRule,
+  resolveThreadPermission,
+  type AskUserAnswer,
+  type PermissionRequest,
+  type ThreadPermissionRules,
+  type PermissionRuleBehavior,
+} from "../api";
+
+export interface ThreadPermissionsState {
+  requests: PermissionRequest[];
+  sessionRules: ThreadPermissionRules;
+  managedOnly: boolean;
+  loading: boolean;
+  resolvingId: string | null;
+}
+
+export interface ThreadPermissionsActions {
+  refreshPermissions: () => Promise<void>;
+  resolvePermission: (
+    requestId: string,
+    decision: "allow" | "deny",
+    message?: string,
+    answers?: AskUserAnswer[],
+    annotations?: Record<string, unknown>,
+  ) => Promise<void>;
+  addSessionRule: (behavior: PermissionRuleBehavior, toolName: string) => Promise<void>;
+  removeSessionRule: (behavior: PermissionRuleBehavior, toolName: string) => Promise<void>;
+}
+
+function isActiveThreadRoute(threadId: string): boolean {
+  const path = window.location.pathname.replace(/\/+$/, "");
+  return path.startsWith("/threads/") && path.endsWith(`/${encodeURIComponent(threadId)}`);
+}
+
+export function useThreadPermissions(threadId: string | undefined): ThreadPermissionsState & ThreadPermissionsActions {
+  const [requests, setRequests] = useState<PermissionRequest[]>([]);
+  const [sessionRules, setSessionRules] = useState<ThreadPermissionRules>({ allow: [], deny: [], ask: [] });
+  const [managedOnly, setManagedOnly] = useState(false);
+  const [loading, setLoading] = useState(false);
+  const [resolvingId, setResolvingId] = useState<string | null>(null);
+  const refreshGenerationRef = useRef(0);
+  const requestAbortRef = useRef<AbortController | null>(null);
+
+  const refreshPermissions = useCallback(async () => {
+    if (!threadId) {
+      setRequests([]);
+      setSessionRules({ allow: [], deny: [], ask: [] });
+      setManagedOnly(false);
+      return;
+    }
+    // @@@permission-refresh-generation - route switches can leave an old
+    // permissions fetch resolving after the chat page has already unmounted.
+    // Only the latest in-scope refresh is allowed to touch state or logs.
+    const generation = ++refreshGenerationRef.current;
+    requestAbortRef.current?.abort();
+    const controller = new AbortController();
+    requestAbortRef.current = controller;
+    setLoading(true);
+    try {
+      const payload = await getThreadPermissions(threadId, controller.signal);
+      if (refreshGenerationRef.current !== generation) return;
+      setRequests(payload.requests ?? []);
+      setSessionRules(payload.session_rules ?? { allow: [], deny: [], ask: [] });
+      setManagedOnly(payload.managed_only ?? false);
+    } catch (err) {
+      if (controller.signal.aborted) return;
+      if (refreshGenerationRef.current !== generation) return;
+      // @@@permission-route-teardown - browser navigation can tear down the old
+      // thread page before React cleanup runs, which surfaces as a generic
+      // Failed to fetch from the abandoned route. Only log if this thread page
+      // is still the active route.
+      if (!isActiveThreadRoute(threadId)) return;
+      console.error("[useThreadPermissions] Failed to load permissions:", err);
+    } finally {
+      if (requestAbortRef.current === controller) {
+        requestAbortRef.current = null;
+      }
+      if (refreshGenerationRef.current === generation) {
+        setLoading(false);
+      }
+    }
+  }, [threadId]);
+
+  const resolvePermissionRequest = useCallback(
+    async (
+      requestId: string,
+      decision: "allow" | "deny",
+      message?: string,
+      answers?: AskUserAnswer[],
+      annotations?: Record<string, unknown>,
+    ) => {
+      if (!threadId) return;
+      setResolvingId(requestId);
+      try {
+        await resolveThreadPermission(threadId, requestId, decision, message, answers, annotations);
+        await refreshPermissions();
+      } finally {
+        setResolvingId(null);
+      }
+    },
+    [refreshPermissions, threadId],
+  );
+
+  const addSessionRule = useCallback(
+    async (behavior: PermissionRuleBehavior, toolName: string) => {
+      if (!threadId) return;
+      await addThreadPermissionRule(threadId, behavior, toolName);
+      await refreshPermissions();
+    },
+    [refreshPermissions, threadId],
+  );
+
+  const removeSessionRule = useCallback(
+    async (behavior: PermissionRuleBehavior, toolName: string) => {
+      if (!threadId) return;
+      await removeThreadPermissionRule(threadId, behavior, toolName);
+      await refreshPermissions();
+    },
+    [refreshPermissions, threadId],
+  );
+
+  useEffect(() => {
+    if (!threadId) {
+      refreshGenerationRef.current += 1;
+      setRequests([]);
+      setSessionRules({ allow: [], deny: [], ask: [] });
+      setManagedOnly(false);
+      setLoading(false);
+      return;
+    }
+    void refreshPermissions();
+
+    // @@@permission-poll-bridge - permission requests are thread-scoped runtime
+    // state, but they are not first-class SSE events yet. Poll the small
+    // thread endpoint so ask-mode is owner-visible without inventing a second
+    // client-side state source.
+    const timer = window.setInterval(() => {
+      void refreshPermissions();
+    }, 2000);
+    return () => {
+      refreshGenerationRef.current += 1;
+      requestAbortRef.current?.abort();
+      requestAbortRef.current = null;
+      window.clearInterval(timer);
+    };
+  }, [threadId, refreshPermissions]);
+
+  return {
+    requests,
+    sessionRules,
+    managedOnly,
+    loading,
+    resolvingId,
+    refreshPermissions,
+    resolvePermission: resolvePermissionRequest,
+    addSessionRule,
+    removeSessionRule,
+  };
+}
diff --git a/frontend/app/src/hooks/use-thread-stream.ts b/frontend/app/src/hooks/use-thread-stream.ts
index 34dcb0f70..57de9d641 100644
--- a/frontend/app/src/hooks/use-thread-stream.ts
+++ b/frontend/app/src/hooks/use-thread-stream.ts
@@ -1,4 +1,4 @@
-import { useCallback, useEffect, useReducer, useRef } from "react";
+import { useCallback, useEffect, useReducer, useState } from "react";
 import { getThreadRuntime, streamThreadEvents, type StreamStatus } from "../api";
 import type { StreamEvent } from "../api/types";
 
@@ -35,11 +35,11 @@ class ThreadConnectionManager {
   private threadId = "";
   private ac: AbortController | null = null;
   private version = 0;
-  // @@@dedup-events — track seen seqs in a set (not monotonic max) because
-  // activity_sink and run emit write to thread_buf concurrently, so events
-  // can arrive out of seq order.  A monotonic lastSeenSeq would wrongly skip
-  // lower-seq events that arrive after a higher-seq one.
-  private seenSeqs = new Set<number>();
+  // @@@dedup-events - dedupe by event-type+seq, not raw seq alone. Backend
+  // derived display_delta events intentionally reuse the source event _seq, so
+  // seq-only dedupe would drop the UI-driving delta right after user_message /
+  // run_start and make the thread look frozen until a manual refresh.
+  private seenEventKeys = new Set<string>();
   private subscribers = new Set<(event: StreamEvent) => void>();
   private listener: (() => void) | null = null; // React re-render trigger
   private refreshThreads: (() => Promise<void>) | null = null;
@@ -90,14 +90,15 @@ class ThreadConnectionManager {
             // can open duplicate SSE connections in dev; both deliver the same events).
             const d = (event.data ?? {}) as { _seq?: number };
             if (typeof d._seq === "number") {
-              if (this.seenSeqs.has(d._seq)) {
+              const eventKey = `${event.type}:${d._seq}`;
+              if (this.seenEventKeys.has(eventKey)) {
                 return;
               }
-              this.seenSeqs.add(d._seq);
+              this.seenEventKeys.add(eventKey);
               // Cap set size to prevent unbounded growth
-              if (this.seenSeqs.size > 5000) {
-                const sorted = [...this.seenSeqs].sort((a, b) => a - b);
-                for (let i = 0; i < 2500; i++) this.seenSeqs.delete(sorted[i]);
+              if (this.seenEventKeys.size > 5000) {
+                const oldKeys = [...this.seenEventKeys];
+                for (let i = 0; i < 2500; i++) this.seenEventKeys.delete(oldKeys[i]);
               }
             }
             if (event.type === "status" && event.data) {
@@ -201,12 +202,11 @@ export function useThreadStream(
 ): UseThreadStreamResult {
   const { loading, refreshThreads, runStarted } = deps;
   const [, rerender] = useReducer((x: number) => x + 1, 0);
-  const mgrRef = useRef<ThreadConnectionManager | null>(null);
-  if (!mgrRef.current) mgrRef.current = new ThreadConnectionManager();
-  const mgr = mgrRef.current;
+  const [mgr] = useState(() => new ThreadConnectionManager());
 
-  // Keep refreshThreads callback up-to-date without re-creating the manager
-  mgr.setRefreshThreads(refreshThreads);
+  useEffect(() => {
+    mgr.setRefreshThreads(refreshThreads);
+  }, [mgr, refreshThreads]);
 
   // State changes → re-render; dispose on unmount
   useEffect(() => {
@@ -216,14 +216,14 @@ export function useThreadStream(
 
   // Connection lifecycle — driven by threadId/loading/runStarted
   useEffect(() => {
-    if (loading) return;
+    if (loading || !threadId) return;
     if (runStarted) {
       mgr.initForNewRun(threadId);
     } else {
       mgr.initFromRuntime(threadId);
     }
     return () => mgr.disconnect();
-  }, [mgr, threadId, loading]);
+  }, [mgr, threadId, loading, runStarted]);
 
   // Tab visibility: reconnect on error when tab becomes visible
   useEffect(() => {
diff --git a/frontend/app/src/main.tsx b/frontend/app/src/main.tsx
index e12e74dc7..d5b1c4c44 100644
--- a/frontend/app/src/main.tsx
+++ b/frontend/app/src/main.tsx
@@ -9,46 +9,6 @@ import './styles/motion-presets.css'
 import './styles/effects.css'
 import { router } from './router.tsx'
 
-const serializeLogArg = (arg: unknown): string => {
-  if (typeof arg !== 'object' || arg === null) {
-    return String(arg)
-  }
-
-  try {
-    const seen = new WeakSet<object>()
-    // @@@safe-log-serialization - prevent circular references from turning console.log calls into runtime errors
-    const json = JSON.stringify(arg, (_key, value) => {
-      if (typeof value === 'object' && value !== null) {
-        if (seen.has(value)) {
-          return '[Circular]'
-        }
-        seen.add(value)
-      }
-      return value
-    })
-    return json ?? String(arg)
-  } catch (error) {
-    originalLog('[frontend-debug-log] failed to serialize console.log arg:', error)
-    return String(arg)
-  }
-}
-
-// Intercept console.log and send to backend
-const originalLog = console.log;
-console.log = (...args: unknown[]) => {
-  originalLog(...args);
-  // Send to backend for logging
-  const message = args.map((arg) => serializeLogArg(arg)).join(' ')
-
-  fetch('/api/debug/log', {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ message, timestamp: new Date().toISOString() }),
-  }).catch((error) => {
-    originalLog('[frontend-debug-log] failed to send /api/debug/log:', error)
-  })
-};
-
 createRoot(document.getElementById('root')!).render(
   <StrictMode>
     <RouterProvider router={router} />
diff --git a/frontend/app/src/pages/AppLayout.tsx b/frontend/app/src/pages/AppLayout.tsx
index f76c90c5f..a6d2d515e 100644
--- a/frontend/app/src/pages/AppLayout.tsx
+++ b/frontend/app/src/pages/AppLayout.tsx
@@ -1,4 +1,4 @@
-import { useEffect, useState } from "react";
+import { useState } from "react";
 import { Link, Outlet, useParams } from "react-router-dom";
 import { DragHandle } from "../components/DragHandle";
 import NewChatDialog from "../components/NewChatDialog";
@@ -10,7 +10,6 @@ import type { ThreadSummary } from "../api";
 import { useIsMobile } from "../hooks/use-mobile";
 import { useResizableX } from "../hooks/use-resizable-x";
 import { useThreadManager } from "../hooks/use-thread-manager";
-import { useAppStore } from "../store/app-store";
 import MemberAvatar from "../components/MemberAvatar";
 import { Plus, Trash2 } from "lucide-react";
 
@@ -28,8 +27,6 @@ export default function AppLayout() {
     threads, sandboxTypes, loading,
     refreshThreads, handleCreateThread, handleDeleteThread,
   } = tm;
-  const fetchMembers = useAppStore(s => s.fetchMembers);
-  useEffect(() => { void fetchMembers(); }, [fetchMembers]);
 
   const isMobile = useIsMobile();
   const { threadId } = useParams<{ memberId?: string; threadId?: string }>();
@@ -132,7 +129,7 @@ function MobileThreadList({ threads, loading, onNewChat, onDeleteThread, newChat
         ) : threads.length === 0 ? (
           <div className="flex flex-col items-center justify-center py-20 px-4">
             <p className="text-sm text-muted-foreground mb-3">暂无会话</p>
-            <button onClick={onNewChat} className="px-4 py-2 rounded-lg bg-primary text-primary-foreground text-sm">发起会话</button>
+            <button onClick={onNewChat} className="px-4 py-2 rounded-lg bg-primary text-primary-foreground text-sm">打开成员线程</button>
           </div>
         ) : (
           threads.map(t => {
diff --git a/frontend/app/src/pages/ChatPage.tsx b/frontend/app/src/pages/ChatPage.tsx
index e4bb378d1..5faa3d926 100644
--- a/frontend/app/src/pages/ChatPage.tsx
+++ b/frontend/app/src/pages/ChatPage.tsx
@@ -1,15 +1,19 @@
-import { useCallback, useEffect, useState } from "react";
+import { useCallback, useEffect, useMemo, useState } from "react";
 import { useParams, useOutletContext, useLocation } from "react-router-dom";
+import { Check, ShieldAlert, X } from "lucide-react";
 import { toast } from "sonner";
 import ChatArea from "../components/ChatArea";
-import type { AssistantTurn } from "../api";
+import type { AssistantTurn, AskUserAnswer, AskUserQuestionPrompt, PermissionRequest } from "../api";
 import { uploadSandboxFile } from "../api";
+import { Alert, AlertDescription, AlertTitle } from "../components/ui/alert";
+import { Button } from "../components/ui/button";
 import ComputerPanel from "../components/ComputerPanel";
 import { DragHandle } from "../components/DragHandle";
 import Header from "../components/Header";
 import InputBox from "../components/InputBox";
 import TaskProgress from "../components/TaskProgress";
 import TokenStats from "../components/TokenStats";
+import { askUserQuestionSelectionKey, buildAskUserAnswers } from "./ask-user-question";
 import { authFetch, useAuthStore } from "../store/auth-store";
 import { useAppActions } from "../hooks/use-app-actions";
 import { useBackgroundTasks } from "../hooks/use-background-tasks";
@@ -18,6 +22,9 @@ import { useResizableX } from "../hooks/use-resizable-x";
 import { useSandboxManager } from "../hooks/use-sandbox-manager";
 import { useDisplayDeltas } from "../hooks/use-display-deltas";
 import { useThreadData } from "../hooks/use-thread-data";
+import { useThreadPermissions } from "../hooks/use-thread-permissions";
+import { useThreadStream } from "../hooks/use-thread-stream";
+import type { PermissionRuleBehavior } from "../api";
 import type { ThreadManagerState, ThreadManagerActions } from "../hooks/use-thread-manager";
 
 interface OutletContext {
@@ -27,6 +34,12 @@ interface OutletContext {
   setSessionsOpen: (value: boolean) => void;
 }
 
+function isAskUserQuestionRequest(
+  request: PermissionRequest | null,
+): request is PermissionRequest & { args: PermissionRequest["args"] & { questions: AskUserQuestionPrompt[] } } {
+  return !!request && request.tool_name === "AskUserQuestion" && Array.isArray(request.args?.questions);
+}
+
 /** Thin wrapper: key={threadId} forces remount → all hook state resets naturally. */
 export default function ChatPage() {
   const { threadId } = useParams<{ memberId: string; threadId: string }>();
@@ -50,6 +63,7 @@ function ChatPageInner({ threadId }: { threadId: string }) {
 
   const state = location.state as { selectedModel?: string; runStarted?: boolean; message?: string } | null;
   const [currentModel, setCurrentModel] = useState<string>(state?.selectedModel ?? "");
+  const [defaultModel, setDefaultModel] = useState<string>("");
 
   // location.state.runStarted is set by NewChatPage on SPA navigation only.
   // On page refresh the browser preserves state but React Router resets it to null,
@@ -60,52 +74,49 @@ function ChatPageInner({ threadId }: { threadId: string }) {
   // Backend sends user_message + run_start via display_delta.
   const initialEntries = undefined;
 
-  useEffect(() => {
-    if (state?.selectedModel) return;
-    authFetch(`/api/threads/${threadId}/runtime`)
-      .then((r) => r.json())
-      .then((d) => {
-        if (d.model) {
-          setCurrentModel(d.model);
-          return;
-        }
-        return fetch("/api/settings")
-          .then((r) => r.json())
-          .then((settings) => setCurrentModel(settings.default_model || "leon:large"));
-      })
-      .catch(() => setCurrentModel("leon:large"));
-  }, [state?.selectedModel, threadId]);
-
   const { entries, activeSandbox, loading, displaySeq, setEntries, setActiveSandbox, refreshThread } = useThreadData(threadId, runStarted, initialEntries);
+  const threadStream = useThreadStream(threadId, {
+    loading,
+    refreshThreads: tm.refreshThreads,
+    runStarted,
+  });
+  const {
+    requests: pendingPermissionRequests,
+    sessionRules,
+    managedOnly,
+    resolvingId,
+    addSessionRule,
+    removeSessionRule,
+    resolvePermission,
+  } = useThreadPermissions(threadId);
 
   const { runtimeStatus, isRunning, handleSendMessage, handleStopStreaming } =
     useDisplayDeltas({
       threadId,
-      refreshThreads: tm.refreshThreads,
       onUpdate: (updater) => setEntries(updater),
-      loading,
-      runStarted,
       displaySeq,
+      stream: threadStream,
     });
 
-  // @@@debug-entries — expose current entries for backend comparison
   useEffect(() => {
-    (window as Window & { __debugEntries?: () => unknown[] }).__debugEntries =
-      () => JSON.parse(JSON.stringify(entries)) as unknown[];
-  }, [entries]);
+    if (state?.selectedModel || runtimeStatus?.model || currentModel) return;
+    if (threadStream.phase === "connecting" || threadStream.phase === "idle") return;
+    fetch("/api/settings")
+      .then((r) => r.json())
+      .then((settings) => setDefaultModel(settings.default_model || "leon:large"))
+      .catch(() => setDefaultModel("leon:large"));
+  }, [currentModel, runtimeStatus?.model, state?.selectedModel, threadStream.phase]);
 
-  const { tasks, refresh: refreshTasks } = useBackgroundTasks({ threadId, loading, refreshThreads: tm.refreshThreads });
+  const { tasks, refresh: refreshTasks } = useBackgroundTasks({ threadId, subscribe: threadStream.subscribe });
 
   const isStreaming = isRunning;
 
-  const { sandboxActionError, handlePauseSandbox, handleResumeSandbox } =
-    useSandboxManager({
-      activeThreadId: threadId,
-      isStreaming,
-      activeSandbox,
-      setActiveSandbox,
-      loadThread: refreshThread,
-    });
+  useSandboxManager({
+    activeThreadId: threadId,
+    isStreaming,
+    activeSandbox,
+    setActiveSandbox,
+  });
 
   const ui = useAppActions({ activeThreadId: threadId });
   const {
@@ -148,6 +159,117 @@ function ChatPageInner({ threadId }: { threadId: string }) {
   );
 
   const computerResize = useResizableX(600, 360, 1200, true);
+  const currentPermissionRequest = pendingPermissionRequests[0] ?? null;
+  const [questionSelectionsByRequest, setQuestionSelectionsByRequest] = useState<Record<string, Record<string, string[]>>>({});
+  const questionSelections = useMemo(
+    () => (currentPermissionRequest ? (questionSelectionsByRequest[currentPermissionRequest.request_id] ?? {}) : {}),
+    [currentPermissionRequest, questionSelectionsByRequest],
+  );
+  const effectiveModel = (state?.selectedModel ?? runtimeStatus?.model ?? currentModel) || defaultModel;
+
+  const handleResolvePermission = useCallback(
+    async (decision: "allow" | "deny") => {
+      if (!currentPermissionRequest) return;
+      try {
+        await resolvePermission(currentPermissionRequest.request_id, decision);
+        await refreshThread();
+        toast.success(decision === "allow" ? "已批准该权限请求" : "已拒绝该权限请求");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        toast.error(`权限处理失败: ${message}`);
+      }
+    },
+    [currentPermissionRequest, refreshThread, resolvePermission],
+  );
+
+  const handleQuestionSelection = useCallback(
+    (questionIndex: number, question: AskUserQuestionPrompt, optionLabel: string) => {
+      if (!currentPermissionRequest) return;
+      const key = askUserQuestionSelectionKey(questionIndex);
+      setQuestionSelectionsByRequest((prev) => {
+        const currentForRequest = prev[currentPermissionRequest.request_id] ?? {};
+        const current = currentForRequest[key] ?? [];
+        if (question.multiSelect) {
+          const next = current.includes(optionLabel)
+            ? current.filter((item) => item !== optionLabel)
+            : [...current, optionLabel];
+          return {
+            ...prev,
+            [currentPermissionRequest.request_id]: { ...currentForRequest, [key]: next },
+          };
+        }
+        return {
+          ...prev,
+          [currentPermissionRequest.request_id]: { ...currentForRequest, [key]: [optionLabel] },
+        };
+      });
+    },
+    [currentPermissionRequest],
+  );
+
+  const handleSubmitQuestionAnswers = useCallback(async () => {
+    if (!currentPermissionRequest || !isAskUserQuestionRequest(currentPermissionRequest)) return;
+    const answers: AskUserAnswer[] = buildAskUserAnswers(currentPermissionRequest.args.questions, questionSelections);
+    try {
+      await resolvePermission(
+        currentPermissionRequest.request_id,
+        "allow",
+        undefined,
+        answers,
+        typeof currentPermissionRequest.args.annotations === "object" && currentPermissionRequest.args.annotations !== null
+          ? currentPermissionRequest.args.annotations as Record<string, unknown>
+          : undefined,
+      );
+      await refreshThread();
+      toast.success("已提交回答，Leon 会继续当前任务");
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      toast.error(`提交回答失败: ${message}`);
+    }
+  }, [currentPermissionRequest, questionSelections, refreshThread, resolvePermission]);
+
+  const questionPrompts = isAskUserQuestionRequest(currentPermissionRequest)
+    ? currentPermissionRequest.args.questions
+    : [];
+  const canSubmitQuestionAnswers = questionPrompts.length > 0
+    && questionPrompts.every((_, index) => (questionSelections[askUserQuestionSelectionKey(index)] ?? []).length > 0);
+
+  const handlePersistedPermissionDecision = useCallback(
+    async (decision: "allow" | "deny") => {
+      if (!currentPermissionRequest) return;
+      try {
+        await addSessionRule(decision, currentPermissionRequest.tool_name);
+        await resolvePermission(currentPermissionRequest.request_id, decision);
+        await refreshThread();
+        toast.success(decision === "allow" ? "已为当前线程保存长期批准" : "已为当前线程保存长期拒绝");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        toast.error(`线程权限规则保存失败: ${message}`);
+      }
+    },
+    [addSessionRule, currentPermissionRequest, refreshThread, resolvePermission],
+  );
+
+  const activeSessionRules = ([
+    ["allow", sessionRules.allow],
+    ["deny", sessionRules.deny],
+    ["ask", sessionRules.ask],
+  ] as const).flatMap(([behavior, tools]) =>
+    tools.map((toolName) => ({ behavior, toolName })),
+  );
+
+  const handleRemoveSessionRule = useCallback(
+    async (behavior: PermissionRuleBehavior, toolName: string) => {
+      try {
+        await removeSessionRule(behavior, toolName);
+        toast.success("已移除当前线程权限规则");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        toast.error(`移除线程权限规则失败: ${message}`);
+      }
+    },
+    [removeSessionRule],
+  );
 
   // @@@workspace-upload — upload attached files then send message with attachment filenames
   async function handleSendWithAttachments(message: string): Promise<void> {
@@ -175,18 +297,151 @@ function ChatPageInner({ threadId }: { threadId: string }) {
         activeThreadId={threadId}
         threadTitle={currentThread?.entity_name ?? null}
         sandboxInfo={activeSandbox}
-        currentModel={currentModel}
+        currentModel={effectiveModel}
         onToggleSidebar={() => setSidebarCollapsed(v => !v)}
-        onPauseSandbox={() => void handlePauseSandbox()}
-        onResumeSandbox={() => void handleResumeSandbox()}
         onModelChange={setCurrentModel}
       />
 
       <div className="flex-1 flex min-h-0">
         <div className="flex-1 flex flex-col min-w-[320px]">
-          {sandboxActionError && (
-            <div className="px-3 py-2 text-xs bg-destructive/10 text-destructive border-b border-destructive/20">
-              {sandboxActionError}
+          {currentPermissionRequest && (
+            <div className="px-3 py-2 border-b border-warning/20 bg-warning/5">
+              <div className="max-w-3xl mx-auto">
+                <Alert className="border-warning/20 bg-transparent px-0 py-0">
+                  <ShieldAlert className="text-warning" />
+                  <AlertTitle>{isAskUserQuestionRequest(currentPermissionRequest) ? "回答问题" : `权限确认：${currentPermissionRequest.tool_name}`}</AlertTitle>
+                  <AlertDescription>
+                    {isAskUserQuestionRequest(currentPermissionRequest) ? (
+                      <div className="space-y-3">
+                        <p>{currentPermissionRequest.message || "Leon 需要你的回答后才能继续。"}</p>
+                        {questionPrompts.map((question, index) => {
+                          const selected = questionSelections[askUserQuestionSelectionKey(index)] ?? [];
+                          return (
+                            <div key={`${currentPermissionRequest.request_id}:${index}`} className="space-y-2 rounded-lg border border-border/60 bg-background/70 p-3">
+                              <div>
+                                <p className="text-sm font-medium">{question.header}</p>
+                                <p className="text-sm text-muted-foreground">{question.question}</p>
+                              </div>
+                              <div className="space-y-2">
+                                {question.options.map((option) => {
+                                  const active = selected.includes(option.label);
+                                  return (
+                                    <button
+                                      key={option.label}
+                                      type="button"
+                                      className={`w-full rounded-lg border px-3 py-2 text-left transition-colors ${
+                                        active
+                                          ? "border-primary bg-primary/10 text-foreground"
+                                          : "border-border/60 bg-background hover:border-primary/40 hover:bg-muted/40"
+                                      }`}
+                                      onClick={() => handleQuestionSelection(index, question, option.label)}
+                                    >
+                                      <div className="text-sm font-medium">{option.label}</div>
+                                      <div className="text-xs text-muted-foreground">{option.description}</div>
+                                    </button>
+                                  );
+                                })}
+                              </div>
+                            </div>
+                          );
+                        })}
+                        <div className="flex items-center gap-2 pt-1">
+                          <Button
+                            size="sm"
+                            onClick={() => void handleSubmitQuestionAnswers()}
+                            disabled={resolvingId === currentPermissionRequest.request_id || !canSubmitQuestionAnswers}
+                          >
+                            提交回答
+                          </Button>
+                        </div>
+                      </div>
+                    ) : (
+                      <>
+                        <p>{currentPermissionRequest.message || "该工具需要你明确批准后才能继续。"}</p>
+                        <p className="text-xs text-muted-foreground">
+                          处理后不会自动重跑；Leon 需要在下一次相同操作时继续执行。
+                        </p>
+                        <code className="block w-full overflow-x-auto rounded-md bg-background/80 px-2 py-1 text-xs text-foreground border border-border/60">
+                          {JSON.stringify(currentPermissionRequest.args)}
+                        </code>
+                      </>
+                    )}
+                    {pendingPermissionRequests.length > 1 && (
+                      <p className="text-xs text-muted-foreground">
+                        还有 {pendingPermissionRequests.length - 1} 条待处理请求。
+                      </p>
+                    )}
+                    {!isAskUserQuestionRequest(currentPermissionRequest) && (
+                      <>
+                        <div className="flex items-center gap-2 pt-1">
+                          <Button
+                            size="sm"
+                            onClick={() => void handleResolvePermission("allow")}
+                            disabled={resolvingId === currentPermissionRequest.request_id}
+                          >
+                            <Check className="w-4 h-4" />
+                            批准
+                          </Button>
+                          <Button
+                            size="sm"
+                            variant="outline"
+                            onClick={() => void handleResolvePermission("deny")}
+                            disabled={resolvingId === currentPermissionRequest.request_id}
+                          >
+                            <X className="w-4 h-4" />
+                            拒绝
+                          </Button>
+                          {!managedOnly && (
+                            <>
+                              <Button
+                                size="sm"
+                                variant="secondary"
+                                onClick={() => void handlePersistedPermissionDecision("allow")}
+                                disabled={resolvingId === currentPermissionRequest.request_id}
+                              >
+                                本线程始终批准
+                              </Button>
+                              <Button
+                                size="sm"
+                                variant="secondary"
+                                onClick={() => void handlePersistedPermissionDecision("deny")}
+                                disabled={resolvingId === currentPermissionRequest.request_id}
+                              >
+                                本线程始终拒绝
+                              </Button>
+                            </>
+                          )}
+                        </div>
+                        {managedOnly && (
+                          <p className="pt-1 text-xs text-muted-foreground">
+                            当前为 managed-only 模式，不能写入线程级权限覆盖规则。
+                          </p>
+                        )}
+                      </>
+                    )}
+                  </AlertDescription>
+                </Alert>
+              </div>
+            </div>
+          )}
+          {activeSessionRules.length > 0 && (
+            <div className="px-3 py-2 border-b border-border/60 bg-muted/20">
+              <div className="max-w-3xl mx-auto flex flex-wrap items-center gap-2">
+                <span className="text-xs font-medium text-muted-foreground">本线程权限规则</span>
+                {activeSessionRules.map(({ behavior, toolName }) => (
+                  <Button
+                    key={`${behavior}:${toolName}`}
+                    type="button"
+                    size="sm"
+                    variant="outline"
+                    className="h-7 gap-2 text-xs"
+                    onClick={() => void handleRemoveSessionRule(behavior, toolName)}
+                  >
+                    <span>{behavior}:{toolName}</span>
+                    <X className="w-3 h-3" />
+                  </Button>
+                ))}
+              </div>
             </div>
           )}
           <div className="relative flex-1 flex flex-col min-h-0">
diff --git a/frontend/app/src/pages/ConnectionsPage.tsx b/frontend/app/src/pages/ConnectionsPage.tsx
deleted file mode 100644
index f31300465..000000000
--- a/frontend/app/src/pages/ConnectionsPage.tsx
+++ /dev/null
@@ -1,480 +0,0 @@
-import { useState, useEffect, useCallback, useRef } from "react";
-import { Plug, QrCode, Loader2, CheckCircle2, XCircle, MessageCircle, Settings, X, ArrowRight } from "lucide-react";
-import { QRCodeSVG } from "qrcode.react";
-import { request } from "@/api/client";
-import MemberAvatar from "@/components/MemberAvatar";
-import { toast } from "sonner";
-
-// --- Types ---
-
-interface RoutingConfig {
-  type?: "thread" | "chat";
-  id?: string;
-  label?: string;
-}
-
-interface WeChatState {
-  connected: boolean;
-  polling?: boolean;
-  account_id?: string;
-  user_id?: string;
-  contacts?: { user_id: string; display_name: string }[];
-  routing?: RoutingConfig;
-}
-
-interface RoutingTarget {
-  id: string;
-  label: string;
-  avatar_url?: string;
-}
-
-interface RoutingTargets {
-  threads: RoutingTarget[];
-  chats: RoutingTarget[];
-}
-
-// --- Main Page ---
-
-export default function ConnectionsPage() {
-  return (
-    <div className="h-full flex flex-col bg-background">
-      <div className="h-14 flex items-center px-4 md:px-6 border-b border-border shrink-0">
-        <Plug className="w-4 h-4 text-muted-foreground mr-2" />
-        <h2 className="text-sm font-semibold text-foreground">Connections</h2>
-      </div>
-      <div className="flex-1 overflow-auto p-4 md:p-6">
-        <div className="max-w-2xl mx-auto space-y-4">
-          <WeChatCard />
-        </div>
-      </div>
-    </div>
-  );
-}
-
-// --- WeChat Connection Card ---
-
-type WeChatPhase = "idle" | "loading-qr" | "showing-qr" | "connected";
-
-function WeChatCard() {
-  const [phase, setPhase] = useState<WeChatPhase>("idle");
-  const [state, setState] = useState<WeChatState | null>(null);
-  const [qrImgUrl, setQrImgUrl] = useState<string | null>(null);
-  const [scanStatus, setScanStatus] = useState<string>("");
-  const [settingsOpen, setSettingsOpen] = useState(false);
-  const pollRef = useRef(false);
-
-  // Fetch current state on mount
-  useEffect(() => {
-    request<WeChatState>("/api/connections/wechat/state").then((s) => {
-      setState(s);
-      if (s.connected) setPhase("connected");
-    }).catch((e) => console.error("WeChat state fetch failed:", e));
-  }, []);
-
-  // Refresh state periodically when connected
-  useEffect(() => {
-    if (phase !== "connected") return;
-    const interval = setInterval(() => {
-      request<WeChatState>("/api/connections/wechat/state").then(setState).catch((e) => console.error("WeChat state fetch failed:", e));
-    }, 10000);
-    return () => clearInterval(interval);
-  }, [phase]);
-
-  const startConnect = useCallback(async () => {
-    setPhase("loading-qr");
-    try {
-      const data = await request<{ qrcode: string; qrcode_img_url: string }>(
-        "/api/connections/wechat/qrcode",
-        { method: "POST" },
-      );
-      setQrImgUrl(data.qrcode_img_url);
-      setPhase("showing-qr");
-      setScanStatus("等待扫码...");
-      pollRef.current = true;
-      pollQrStatus(data.qrcode);
-    } catch (err) {
-      toast.error(`Failed: ${err instanceof Error ? err.message : "unknown"}`);
-      setPhase("idle");
-    }
-  }, []);
-
-  const pollQrStatus = useCallback(async (qr: string) => {
-    while (pollRef.current) {
-      try {
-        const result = await request<{ status: string; account_id?: string }>(
-          "/api/connections/wechat/qrcode/poll",
-          { method: "POST", body: JSON.stringify({ qrcode: qr }) },
-        );
-        if (!pollRef.current) return;
-        switch (result.status) {
-          case "scaned":
-            setScanStatus("已扫码，请在微信中确认...");
-            break;
-          case "confirmed":
-            setScanStatus("");
-            setPhase("connected");
-            pollRef.current = false;
-            request<WeChatState>("/api/connections/wechat/state").then(setState);
-            toast.success("WeChat connected");
-            return;
-          case "expired":
-            setScanStatus("二维码已过期");
-            setPhase("idle");
-            pollRef.current = false;
-            return;
-          case "error":
-            setScanStatus("连接失败");
-            setPhase("idle");
-            pollRef.current = false;
-            return;
-          default:
-            break;
-        }
-      } catch {
-        await new Promise((r) => setTimeout(r, 2000));
-      }
-    }
-  }, []);
-
-  const handleDisconnect = useCallback(async () => {
-    pollRef.current = false;
-    try {
-      await request("/api/connections/wechat/disconnect", { method: "POST" });
-      setState(null);
-      setPhase("idle");
-      setQrImgUrl(null);
-      toast.success("WeChat disconnected");
-    } catch (err) {
-      toast.error(`Disconnect failed: ${err instanceof Error ? err.message : "unknown"}`);
-    }
-  }, []);
-
-  useEffect(() => () => { pollRef.current = false; }, []);
-
-  const routing = state?.routing;
-  const hasRouting = routing?.type && routing?.id;
-
-  return (
-    <div className="rounded-xl border border-border bg-card overflow-hidden">
-      {/* Card header */}
-      <div className="flex items-center justify-between px-5 py-4 border-b border-border">
-        <div className="flex items-center gap-3">
-          <div className="w-9 h-9 rounded-lg bg-success/10 flex items-center justify-center">
-            <MessageCircle className="w-5 h-5 text-success" />
-          </div>
-          <div>
-            <h3 className="text-sm font-medium text-foreground">WeChat</h3>
-            <p className="text-xs text-muted-foreground">连接微信，让 Agent 收发消息</p>
-          </div>
-        </div>
-        <div className="flex items-center gap-2">
-          {phase === "connected" && (
-            <button
-              onClick={() => setSettingsOpen(true)}
-              className="p-1.5 rounded-lg text-muted-foreground hover:text-foreground hover:bg-muted transition-colors duration-fast"
-              title="Message routing settings"
-            >
-              <Settings className="w-4 h-4" />
-            </button>
-          )}
-          <StatusBadge phase={phase} />
-        </div>
-      </div>
-
-      {/* Card body */}
-      <div className="px-5 py-4">
-        {phase === "idle" && (
-          <button
-            onClick={startConnect}
-            className="flex items-center gap-2 px-4 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:opacity-90 transition-opacity duration-fast"
-          >
-            <QrCode className="w-4 h-4" />
-            扫码连接
-          </button>
-        )}
-
-        {phase === "loading-qr" && (
-          <div className="flex items-center gap-2 text-sm text-muted-foreground">
-            <Loader2 className="w-4 h-4 animate-spin" />
-            获取二维码中...
-          </div>
-        )}
-
-        {phase === "showing-qr" && qrImgUrl && (
-          <div className="space-y-3">
-            <div className="flex justify-center">
-              <div className="p-4 bg-background rounded-xl">
-                <QRCodeSVG value={qrImgUrl} size={192} level="M" />
-              </div>
-            </div>
-            <p className="text-center text-xs text-muted-foreground">{scanStatus}</p>
-            <button
-              onClick={() => { pollRef.current = false; setPhase("idle"); }}
-              className="block mx-auto text-xs text-muted-foreground hover:text-foreground transition-colors duration-fast"
-            >
-              取消
-            </button>
-          </div>
-        )}
-
-        {phase === "connected" && state && (
-          <div className="space-y-4">
-            {/* Routing indicator */}
-            <div className="flex items-center gap-2 text-sm">
-              <span className="text-muted-foreground">消息发送至</span>
-              {hasRouting ? (
-                <span className="inline-flex items-center gap-1 px-2 py-0.5 rounded bg-primary/10 text-primary text-xs font-medium">
-                  {routing!.type === "thread" ? "会话" : "聊天"}：{routing!.label || routing!.id?.slice(0, 12)}
-                </span>
-              ) : (
-                <button
-                  onClick={() => setSettingsOpen(true)}
-                  className="inline-flex items-center gap-1 px-2 py-0.5 rounded bg-warning/10 text-warning text-xs font-medium hover:bg-warning/20 transition-colors duration-fast"
-                >
-                  未配置 — 点击设置
-                </button>
-              )}
-            </div>
-
-            <div className="grid grid-cols-2 gap-x-8 gap-y-2 text-sm">
-              <div className="text-muted-foreground">账号</div>
-              <div className="font-mono text-xs text-foreground truncate">{state.account_id}</div>
-              <div className="text-muted-foreground">轮询</div>
-              <div className="text-foreground">{state.polling ? "运行中" : "已停止"}</div>
-              <div className="text-muted-foreground">联系人</div>
-              <div className="text-foreground">{state.contacts?.length || 0} 个</div>
-            </div>
-
-            {state.contacts && state.contacts.length > 0 && (
-              <div className="pt-2 border-t border-border">
-                <p className="text-xs text-muted-foreground mb-2">最近联系人</p>
-                <div className="space-y-1">
-                  {state.contacts.map((c) => (
-                    <div key={c.user_id} className="flex items-center gap-2 text-xs">
-                      <div className="w-5 h-5 rounded-full bg-muted flex items-center justify-center text-2xs font-medium">
-                        {c.display_name[0]?.toUpperCase()}
-                      </div>
-                      <span className="text-foreground">{c.display_name}</span>
-                      <span className="text-muted-foreground font-mono truncate">{c.user_id}</span>
-                    </div>
-                  ))}
-                </div>
-              </div>
-            )}
-
-            <button
-              onClick={handleDisconnect}
-              className="flex items-center gap-2 px-3 py-1.5 rounded-lg text-xs text-destructive hover:bg-destructive/10 transition-colors duration-fast"
-            >
-              <XCircle className="w-3.5 h-3.5" />
-              断开连接
-            </button>
-          </div>
-        )}
-      </div>
-
-      {/* Settings dialog */}
-      {settingsOpen && (
-        <RoutingDialog
-          currentRouting={routing || {}}
-          onClose={() => setSettingsOpen(false)}
-          onSaved={(newRouting) => {
-            setState((s) => s ? { ...s, routing: newRouting } : s);
-            setSettingsOpen(false);
-          }}
-        />
-      )}
-    </div>
-  );
-}
-
-// --- Routing Settings Dialog ---
-
-function RoutingDialog({
-  currentRouting,
-  onClose,
-  onSaved,
-}: {
-  currentRouting: RoutingConfig;
-  onClose: () => void;
-  onSaved: (r: RoutingConfig) => void;
-}) {
-  const [targets, setTargets] = useState<RoutingTargets | null>(null);
-  const [loading, setLoading] = useState(true);
-  const [tab, setTab] = useState<"thread" | "chat">(currentRouting.type || "thread");
-  const [selectedId, setSelectedId] = useState<string>(currentRouting.id || "");
-
-  useEffect(() => {
-    request<RoutingTargets>("/api/connections/wechat/routing/targets")
-      .then(setTargets)
-      .catch((e) => toast.error(`Failed to load targets: ${e.message}`))
-      .finally(() => setLoading(false));
-  }, []);
-
-  const handleSave = async () => {
-    if (!selectedId) return;
-    const items = tab === "thread" ? targets?.threads : targets?.chats;
-    const item = items?.find((t) => t.id === selectedId);
-    try {
-      await request("/api/connections/wechat/routing", {
-        method: "POST",
-        body: JSON.stringify({ type: tab, id: selectedId, label: item?.label || "" }),
-      });
-      onSaved({ type: tab, id: selectedId, label: item?.label || "" });
-      toast.success("路由已保存");
-    } catch (e) {
-      toast.error(`Failed: ${e instanceof Error ? e.message : "unknown"}`);
-    }
-  };
-
-  const handleClear = async () => {
-    try {
-      await request("/api/connections/wechat/routing", { method: "DELETE" });
-      onSaved({});
-      toast.success("路由已清除");
-    } catch (e) {
-      toast.error(`Failed: ${e instanceof Error ? e.message : "unknown"}`);
-    }
-  };
-
-  return (
-    <>
-      <div className="fixed inset-0 bg-black/40 z-40" onClick={onClose} />
-      <div className="fixed inset-0 z-50 flex items-center justify-center p-4 pointer-events-none">
-        <div className="bg-card border border-border rounded-xl shadow-xl w-full max-w-md max-h-[80vh] flex flex-col pointer-events-auto">
-          {/* Header */}
-          <div className="flex items-center justify-between px-5 py-4 border-b border-border shrink-0">
-            <h3 className="text-sm font-semibold text-foreground">消息路由</h3>
-            <button onClick={onClose} className="p-1 rounded hover:bg-muted transition-colors duration-fast">
-              <X className="w-4 h-4 text-muted-foreground" />
-            </button>
-          </div>
-
-          {/* Tab selector */}
-          <div className="px-5 pt-4 shrink-0">
-            <p className="text-xs text-muted-foreground mb-3">
-              选择微信消息的接收目标
-            </p>
-            <div className="flex gap-1 p-0.5 bg-muted rounded-lg">
-              <button
-                onClick={() => { setTab("thread"); setSelectedId(""); }}
-                className={`flex-1 py-1.5 text-xs font-medium rounded-md transition-colors duration-fast ${
-                  tab === "thread" ? "bg-background text-foreground shadow-sm" : "text-muted-foreground"
-                }`}
-              >
-                会话
-              </button>
-              <button
-                onClick={() => { setTab("chat"); setSelectedId(""); }}
-                className={`flex-1 py-1.5 text-xs font-medium rounded-md transition-colors duration-fast ${
-                  tab === "chat" ? "bg-background text-foreground shadow-sm" : "text-muted-foreground"
-                }`}
-              >
-                聊天
-              </button>
-            </div>
-          </div>
-
-          {/* List */}
-          <div className="flex-1 overflow-auto px-5 py-3">
-            {loading ? (
-              <div className="flex items-center gap-2 text-sm text-muted-foreground py-8 justify-center">
-                <Loader2 className="w-4 h-4 animate-spin" />
-                加载中...
-              </div>
-            ) : (
-              <ItemList
-                items={(tab === "thread" ? targets?.threads : targets?.chats) || []}
-                selectedId={selectedId}
-                onSelect={setSelectedId}
-                emptyText={tab === "thread" ? "暂无会话" : "暂无聊天"}
-              />
-            )}
-          </div>
-
-          {/* Footer */}
-          <div className="flex items-center justify-between px-5 py-3 border-t border-border shrink-0">
-            <button
-              onClick={handleClear}
-              className="text-xs text-muted-foreground hover:text-foreground transition-colors duration-fast"
-            >
-              清除路由
-            </button>
-            <button
-              onClick={handleSave}
-              disabled={!selectedId}
-              className="flex items-center gap-1.5 px-4 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:opacity-90 disabled:opacity-40 transition-opacity duration-fast"
-            >
-              保存
-              <ArrowRight className="w-3 h-3" />
-            </button>
-          </div>
-        </div>
-      </div>
-    </>
-  );
-}
-
-function ItemList({
-  items,
-  selectedId,
-  onSelect,
-  emptyText,
-}: {
-  items: RoutingTarget[];
-  selectedId: string;
-  onSelect: (id: string) => void;
-  emptyText: string;
-}) {
-  if (items.length === 0) {
-    return <p className="text-xs text-muted-foreground text-center py-8">{emptyText}</p>;
-  }
-  return (
-    <div className="space-y-1">
-      {items.map((item) => (
-        <button
-          key={item.id}
-          onClick={() => onSelect(item.id)}
-          className={`w-full flex items-center gap-3 px-3 py-2.5 rounded-lg text-left transition-colors duration-fast ${
-            selectedId === item.id
-              ? "bg-primary/10 ring-1 ring-primary/30"
-              : "hover:bg-muted"
-          }`}
-        >
-          <MemberAvatar name={item.label} avatarUrl={item.avatar_url} size="sm" type="mycel_agent" />
-          <div className="min-w-0 flex-1">
-            <p className="text-sm text-foreground truncate">{item.label}</p>
-            <p className="text-2xs text-muted-foreground font-mono truncate">{item.id}</p>
-          </div>
-          {selectedId === item.id && (
-            <CheckCircle2 className="w-4 h-4 text-primary shrink-0" />
-          )}
-        </button>
-      ))}
-    </div>
-  );
-}
-
-function StatusBadge({ phase }: { phase: WeChatPhase }) {
-  if (phase === "connected") {
-    return (
-      <span className="flex items-center gap-1.5 px-2.5 py-1 rounded-full text-xs font-medium bg-success/10 text-success">
-        <CheckCircle2 className="w-3 h-3" />
-        已连接
-      </span>
-    );
-  }
-  if (phase === "showing-qr" || phase === "loading-qr") {
-    return (
-      <span className="flex items-center gap-1.5 px-2.5 py-1 rounded-full text-xs font-medium bg-warning/10 text-warning">
-        <Loader2 className="w-3 h-3 animate-spin" />
-        连接中
-      </span>
-    );
-  }
-  return (
-    <span className="flex items-center gap-1.5 px-2.5 py-1 rounded-full text-xs font-medium bg-muted text-muted-foreground">
-      Not connected
-    </span>
-  );
-}
diff --git a/frontend/app/src/pages/MembersPage.tsx b/frontend/app/src/pages/MembersPage.tsx
index 6f9de5262..12987254a 100644
--- a/frontend/app/src/pages/MembersPage.tsx
+++ b/frontend/app/src/pages/MembersPage.tsx
@@ -178,7 +178,7 @@ export default function MembersPage() {
               };
               const handleStartChat = (e: React.MouseEvent) => {
                 e.stopPropagation();
-                navigate("/chat", { state: { startWith: member.id, memberName: member.name } });
+                navigate(`/threads/${member.id}`);
               };
               const handleCopy = async (e: React.MouseEvent) => {
                 e.stopPropagation();
@@ -204,7 +204,7 @@ export default function MembersPage() {
                 } catch { toast.error("删除失败"); }
               };
               return (
-                <div key={member.id} onClick={handleCardClick} className="surface-interactive p-4 cursor-pointer group hover:-translate-y-0.5 hover:shadow-md" role="button" aria-label={isBuiltin ? `与 ${member.name} 对话` : `查看成员 ${member.name}`} tabIndex={0} onKeyDown={(e) => e.key === "Enter" && handleCardClick()}>
+                <div key={member.id} onClick={handleCardClick} className="surface-interactive p-4 cursor-pointer group hover:-translate-y-0.5 hover:shadow-md" role="button" aria-label={`查看成员 ${member.name}`} tabIndex={0} onKeyDown={(e) => e.key === "Enter" && handleCardClick()}>
                   <div className="flex items-start justify-between mb-3">
                     <AvatarUploadTrigger memberId={member.id} name={member.name} hasAvatar={!!member.avatar_url} />
                     <div className="flex items-center gap-1.5">
@@ -229,7 +229,7 @@ export default function MembersPage() {
                             <MessageSquare className="w-3.5 h-3.5" />
                           </button>
                         </TooltipTrigger>
-                        <TooltipContent side="bottom"><p>发起会话</p></TooltipContent>
+                        <TooltipContent side="bottom"><p>打开线程</p></TooltipContent>
                       </Tooltip>
                       <Tooltip>
                         <TooltipTrigger asChild>
diff --git a/frontend/app/src/pages/NewChatPage.test.tsx b/frontend/app/src/pages/NewChatPage.test.tsx
new file mode 100644
index 000000000..cb07bdfd6
--- /dev/null
+++ b/frontend/app/src/pages/NewChatPage.test.tsx
@@ -0,0 +1,179 @@
+// @vitest-environment jsdom
+
+import { render, screen, waitFor } from "@testing-library/react";
+import { MemoryRouter, Outlet, Route, Routes } from "react-router-dom";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import NewChatPage from "./NewChatPage";
+import { useAuthStore } from "../store/auth-store";
+import { useAppStore } from "../store/app-store";
+
+const handleGetMainThread = vi.fn();
+
+vi.mock("zustand/middleware", async () => {
+  const actual = await vi.importActual<typeof import("zustand/middleware")>("zustand/middleware");
+  return {
+    ...actual,
+    persist: ((initializer: unknown) => initializer) as typeof actual.persist,
+  };
+});
+
+vi.mock("../components/CenteredInputBox", () => ({
+  default: () => <div>centered-input-box</div>,
+}));
+
+vi.mock("../components/WorkspaceSetupModal", () => ({
+  default: () => null,
+}));
+
+vi.mock("../components/FilesystemBrowser", () => ({
+  default: () => null,
+}));
+
+vi.mock("../components/MemberAvatar", () => ({
+  default: ({ name }: { name: string }) => <div>{name}</div>,
+}));
+
+vi.mock("../hooks/use-workspace-settings", () => ({
+  useWorkspaceSettings: () => ({
+    settings: { default_workspace: null, recent_workspaces: [], default_model: "leon:large", enabled_models: ["leon:large"] },
+    loading: false,
+    hasWorkspace: false,
+    refreshSettings: vi.fn(),
+    setDefaultWorkspace: vi.fn(),
+  }),
+}));
+
+vi.mock("../api", () => ({
+  postRun: vi.fn(),
+}));
+
+vi.mock("../api/client", () => ({
+  getDefaultThreadConfig: vi.fn(() => new Promise(() => {})),
+  listMyLeases: vi.fn(async () => []),
+  saveDefaultThreadConfig: vi.fn(async () => undefined),
+}));
+
+function ContextOutlet() {
+  return (
+    <Outlet
+      context={{
+        tm: {
+          threads: [],
+          sandboxTypes: [{ name: "local", available: true }],
+          selectedSandbox: "local",
+          loading: false,
+          setSelectedSandbox: vi.fn(),
+          setThreads: vi.fn(),
+          refreshThreads: vi.fn(),
+          handleCreateThread: vi.fn(),
+          handleGetMainThread,
+          handleDeleteThread: vi.fn(),
+        },
+        sidebarCollapsed: false,
+        setSidebarCollapsed: vi.fn(),
+        setSessionsOpen: vi.fn(),
+      }}
+    />
+  );
+}
+
+describe("NewChatPage", () => {
+  beforeEach(() => {
+    handleGetMainThread.mockReset();
+    handleGetMainThread.mockResolvedValue(null);
+
+    useAuthStore.setState({
+      token: "token",
+      user: { id: "u-1", name: "tester", type: "human", avatar: null },
+      agent: null,
+      entityId: "u-1",
+      setupInfo: null,
+      login: vi.fn(),
+      sendOtp: vi.fn(),
+      verifyOtp: vi.fn(),
+      completeRegister: vi.fn(),
+      clearSetupInfo: vi.fn(),
+      logout: vi.fn(),
+    });
+
+    useAppStore.setState({
+      memberList: [{
+        id: "m_xVuNpKJNxblZ",
+        name: "Morel",
+        description: "",
+        status: "active",
+        version: "1.0.0",
+        avatar_url: "/avatars/morel.png",
+        config: {
+          prompt: "",
+          rules: [],
+          tools: [],
+          mcps: [],
+          skills: [],
+          subAgents: [],
+        },
+        created_at: 0,
+        updated_at: 0,
+      }],
+      taskList: [],
+      cronJobs: [],
+      librarySkills: [],
+      libraryMcps: [],
+      libraryAgents: [],
+      libraryRecipes: [],
+      userProfile: { name: "User", initials: "U", email: "" },
+      loaded: true,
+      error: null,
+      loadAll: vi.fn(),
+      retry: vi.fn(),
+      resetSessionData: vi.fn(),
+      fetchMembers: vi.fn(),
+      addMember: vi.fn(),
+      updateMember: vi.fn(),
+      updateMemberConfig: vi.fn(),
+      publishMember: vi.fn(),
+      deleteMember: vi.fn(),
+      getMemberById: vi.fn(),
+      fetchTasks: vi.fn(),
+      addTask: vi.fn(),
+      updateTask: vi.fn(),
+      deleteTask: vi.fn(),
+      bulkUpdateTaskStatus: vi.fn(),
+      bulkDeleteTasks: vi.fn(),
+      fetchCronJobs: vi.fn(),
+      addCronJob: vi.fn(),
+      updateCronJob: vi.fn(),
+      deleteCronJob: vi.fn(),
+      triggerCronJob: vi.fn(),
+      fetchLibrary: vi.fn(),
+      fetchLibraryNames: vi.fn(),
+      addResource: vi.fn(),
+      updateResource: vi.fn(),
+      deleteResource: vi.fn(),
+      fetchResourceContent: vi.fn(),
+      updateResourceContent: vi.fn(),
+      fetchProfile: vi.fn(),
+      updateProfile: vi.fn(),
+      getMemberNames: vi.fn(),
+      getResourceUsedBy: vi.fn(),
+    });
+  });
+
+  it("does not block the create-chat UI on a pending default-config fetch once main thread resolves null", async () => {
+    render(
+      <MemoryRouter initialEntries={["/threads/m_xVuNpKJNxblZ"]}>
+        <Routes>
+          <Route element={<ContextOutlet />}>
+            <Route path="/threads/:memberId" element={<NewChatPage />} />
+          </Route>
+        </Routes>
+      </MemoryRouter>,
+    );
+
+    await waitFor(() => {
+      expect(screen.getByText("开始与 Morel 对话")).toBeTruthy();
+    });
+    expect(screen.queryByText("正在检查 Morel 的主对话")).toBeNull();
+    expect(screen.getByText("centered-input-box")).toBeTruthy();
+  });
+});
diff --git a/frontend/app/src/pages/NewChatPage.tsx b/frontend/app/src/pages/NewChatPage.tsx
index 235ca48f4..4e1c739be 100644
--- a/frontend/app/src/pages/NewChatPage.tsx
+++ b/frontend/app/src/pages/NewChatPage.tsx
@@ -22,6 +22,34 @@ interface OutletContext {
   setSessionsOpen: (value: boolean) => void;
 }
 
+function ResolveStateCard({
+  memberName,
+  memberAvatarUrl,
+  title,
+  description,
+  destructive = false,
+}: {
+  memberName: string;
+  memberAvatarUrl?: string;
+  title: string;
+  description: string;
+  destructive?: boolean;
+}) {
+  return (
+    <div className="flex-1 flex items-center justify-center relative">
+      <div className="w-full max-w-[420px] px-6 text-center">
+        <div className="flex justify-center mb-4">
+          <MemberAvatar name={memberName} avatarUrl={memberAvatarUrl} type="mycel_agent" size="lg" />
+        </div>
+        <h1 className="text-xl font-medium text-foreground mb-2">{title}</h1>
+        <p className={`text-sm ${destructive ? "text-destructive" : "text-muted-foreground"}`}>
+          {description}
+        </p>
+      </div>
+    </div>
+  );
+}
+
 const PROVIDER_TYPE_LABELS: Record<string, string> = {
   local: "Local",
   daytona: "Daytona",
@@ -472,39 +500,29 @@ export default function NewChatPage({ mode = "member" }: { mode?: "member" | "ne
     ? `复用 ${providerSummaryLabel} 的现有 sandbox`
     : `新建 ${providerSummaryLabel} sandbox · ${recipeSummaryLabel}`;
 
-  if (loading || resolveState === "resolving" || configDefaultsLoading) {
+  // @@@defer-default-config - default config should refine the create form, not block
+  // entry into the no-main-thread UI. If the config fetch stalls, users still need the
+  // create-chat surface with sane local defaults.
+  if (loading || resolveState === "resolving") {
     return (
-      <div className="flex-1 flex items-center justify-center relative">
-        <div className="w-full max-w-[420px] px-6 text-center">
-          <div className="flex justify-center mb-4">
-            <MemberAvatar name={memberName} avatarUrl={memberAvatarUrl} type="mycel_agent" size="lg" />
-          </div>
-          <h1 className="text-xl font-medium text-foreground mb-2">
-            正在检查 {memberName} 的主对话
-          </h1>
-          <p className="text-sm text-muted-foreground">
-            如果没有主对话，这里会进入创建界面。
-          </p>
-        </div>
-      </div>
+      <ResolveStateCard
+        memberName={memberName}
+        memberAvatarUrl={memberAvatarUrl ?? undefined}
+        title={`正在检查 ${memberName} 的主对话`}
+        description="如果没有主对话，这里会进入创建界面。"
+      />
     );
   }
 
   if (resolveState === "error") {
     return (
-      <div className="flex-1 flex items-center justify-center relative">
-        <div className="w-full max-w-[420px] px-6 text-center">
-          <div className="flex justify-center mb-4">
-            <MemberAvatar name={memberName} avatarUrl={memberAvatarUrl} type="mycel_agent" size="lg" />
-          </div>
-          <h1 className="text-xl font-medium text-foreground mb-2">
-            无法检查 {memberName} 的主对话
-          </h1>
-          <p className="text-sm text-destructive">
-            {error ?? "未知错误"}
-          </p>
-        </div>
-      </div>
+      <ResolveStateCard
+        memberName={memberName}
+        memberAvatarUrl={memberAvatarUrl ?? undefined}
+        title={`无法检查 ${memberName} 的主对话`}
+        description={error ?? "未知错误"}
+        destructive
+      />
     );
   }
 
diff --git a/frontend/app/src/pages/RootLayout.test.tsx b/frontend/app/src/pages/RootLayout.test.tsx
new file mode 100644
index 000000000..cb1a1090a
--- /dev/null
+++ b/frontend/app/src/pages/RootLayout.test.tsx
@@ -0,0 +1,72 @@
+// @vitest-environment jsdom
+
+import { fireEvent, render, screen, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { MemoryRouter, Route, Routes } from "react-router-dom";
+import { LoginForm } from "./RootLayout";
+import { useAuthStore } from "../store/auth-store";
+
+vi.mock("zustand/middleware", async () => {
+  const actual = await vi.importActual<typeof import("zustand/middleware")>("zustand/middleware");
+  return {
+    ...actual,
+    persist: ((initializer: unknown) => initializer) as typeof actual.persist,
+  };
+});
+
+describe("LoginForm", () => {
+  beforeEach(() => {
+    useAuthStore.setState({
+      token: null,
+      user: null,
+      agent: null,
+      entityId: null,
+      setupInfo: null,
+      login: vi.fn(async () => {
+        useAuthStore.setState({
+          token: "token",
+          user: { id: "u-1", name: "tester", type: "human", avatar: null },
+          agent: null,
+          entityId: null,
+          setupInfo: null,
+        });
+      }),
+      sendOtp: vi.fn(async () => undefined),
+      verifyOtp: vi.fn(async () => ({ tempToken: "temp" })),
+      completeRegister: vi.fn(async () => undefined),
+      clearSetupInfo: vi.fn(),
+      logout: vi.fn(),
+    });
+  });
+
+  it("redirects to /threads after a successful login", async () => {
+    render(
+      <MemoryRouter initialEntries={["/login"]}>
+        <Routes>
+          <Route
+            path="/login"
+            element={
+              <>
+                <LoginForm />
+                <div>login-page</div>
+              </>
+            }
+          />
+          <Route path="/threads" element={<div>threads-page</div>} />
+        </Routes>
+      </MemoryRouter>,
+    );
+
+    fireEvent.change(screen.getByPlaceholderText("邮箱或 Mycel ID"), {
+      target: { value: "otpfull_1775371370@example.com" },
+    });
+    fireEvent.change(screen.getByPlaceholderText("密码"), {
+      target: { value: "LeonFull123!" },
+    });
+    fireEvent.click(screen.getByRole("button", { name: "登录" }));
+
+    await waitFor(() => {
+      expect(screen.getByText("threads-page")).toBeTruthy();
+    });
+  });
+});
diff --git a/frontend/app/src/pages/RootLayout.tsx b/frontend/app/src/pages/RootLayout.tsx
index d285056f0..db8c4496b 100644
--- a/frontend/app/src/pages/RootLayout.tsx
+++ b/frontend/app/src/pages/RootLayout.tsx
@@ -1,5 +1,5 @@
 import { NavLink, Outlet, useLocation, useNavigate } from "react-router-dom";
-import { MessageSquare, MessagesSquare, Users, ListTodo, Store, Layers, Plug, Settings, Plus, ChevronLeft, ChevronRight, LogOut, Camera, Eye, EyeOff } from "lucide-react";
+import { MessageSquare, MessagesSquare, Users, ListTodo, Store, Layers, Settings, Plus, ChevronLeft, ChevronRight, LogOut, Camera, Eye, EyeOff } from "lucide-react";
 import { useState, useEffect, useCallback, useRef } from "react";
 import { uploadMemberAvatar } from "@/api/client";
 import MemberAvatar from "@/components/MemberAvatar";
@@ -18,7 +18,6 @@ const navItems = [
   { to: "/tasks", icon: ListTodo, label: "Tasks" },
   { to: "/resources", icon: Layers, label: "Resources" },
   { to: "/marketplace", icon: Store, label: "Marketplace" },
-  { to: "/connections", icon: Plug, label: "Connections" },
 ];
 
 const mobileNavItems = [
@@ -65,9 +64,21 @@ function AuthenticatedLayout() {
   }, [authUser]);
 
   const loadAll = useAppStore((s) => s.loadAll);
+  const resetSessionData = useAppStore((s) => s.resetSessionData);
   const storeAddTask = useAppStore((s) => s.addTask);
+  const lastLoadedUserIdRef = useRef<string | null>(null);
 
-  useEffect(() => { loadAll(); }, [loadAll]);
+  useEffect(() => {
+    const userId = authUser?.id ?? null;
+    if (!userId) return;
+    if (lastLoadedUserIdRef.current === userId) return;
+    // @@@auth-session-reset - switching users in the same SPA process must discard
+    // panel caches before reloading, otherwise the next account inherits old
+    // members/tasks and the sidebar mixes identities.
+    lastLoadedUserIdRef.current = userId;
+    resetSessionData();
+    void loadAll();
+  }, [authUser?.id, loadAll, resetSessionData]);
 
   const [expanded, setExpanded] = useState(() => {
     const saved = localStorage.getItem("sidebar-expanded");
@@ -187,7 +198,10 @@ function AuthenticatedLayout() {
       <div className="flex flex-col h-full overflow-hidden bg-background">
         {/* Main content - no top bar, pages have their own headers */}
         <main className="flex-1 overflow-hidden">
-          <div key={location.pathname} className="h-full animate-page-in"><Outlet /></div>
+          {/* @@@outlet-no-route-key - thread switches should not remount the entire
+              outlet tree; RootLayout route keys were re-triggering AppLayout
+              bootstrap fetches on every /threads/:memberId/:threadId hop. */}
+          <div className="h-full animate-page-in"><Outlet /></div>
         </main>
 
         {/* Bottom tab bar */}
@@ -311,7 +325,7 @@ function AuthenticatedLayout() {
       </div>
 
       <main className="flex-1 overflow-hidden">
-        <div key={location.pathname} className="h-full animate-page-in"><Outlet /></div>
+        <div className="h-full animate-page-in"><Outlet /></div>
       </main>
       <CreateMemberDialog open={createMemberOpen} onOpenChange={setCreateMemberOpen} />
       <NewChatDialog open={newChatOpen} onOpenChange={setNewChatOpen} />
@@ -354,7 +368,7 @@ function CreateDropdown({
           <Users className="w-3.5 h-3.5 text-muted-foreground" /> 新建成员
         </button>
         <button onClick={() => onAction("chat")} className="w-full px-3 py-2 text-left text-sm text-foreground hover:bg-muted transition-colors duration-fast flex items-center gap-2.5">
-          <MessageSquare className="w-3.5 h-3.5 text-muted-foreground" /> 发起会话
+          <MessageSquare className="w-3.5 h-3.5 text-muted-foreground" /> 打开成员线程
         </button>
         <button onClick={() => onAction("task")} className="w-full px-3 py-2 text-left text-sm text-foreground hover:bg-muted transition-colors duration-fast flex items-center gap-2.5">
           <ListTodo className="w-3.5 h-3.5 text-muted-foreground" /> 新建任务
@@ -388,10 +402,11 @@ function AuthHeader({ title, subtitle }: { title: string; subtitle?: string }) {
   );
 }
 
-function LoginForm() {
+export function LoginForm() {
   const [step, setStep] = useState<AuthStep>({ type: "login" });
   const [error, setError] = useState<string | null>(null);
   const [loading, setLoading] = useState(false);
+  const navigate = useNavigate();
 
   const login = useAuthStore(s => s.login);
   const sendOtp = useAuthStore(s => s.sendOtp);
@@ -405,6 +420,7 @@ function LoginForm() {
     return <LoginStep
       onSubmit={async (identifier, password) => {
         await login(identifier, password);
+        navigate("/threads", { replace: true });
       }}
       onSwitch={() => reset({ type: "reg_email" })}
       error={error} setError={setError}
@@ -600,12 +616,13 @@ function PasswordInput({ value, onChange, placeholder, autoFocus, autoComplete }
 function SetupNameStep({ userId, defaultName }: { userId: string; defaultName: string }) {
   const [name, setName] = useState(defaultName);
   const [loading, setLoading] = useState(false);
+  const navigate = useNavigate();
   const token = useAuthStore(s => s.token);
   const clearSetupInfo = useAuthStore(s => s.clearSetupInfo);
 
   function done() {
     clearSetupInfo();
-    window.location.href = "/threads";
+    navigate("/threads", { replace: true });
   }
 
   async function handleSubmit(e: React.FormEvent) {
diff --git a/frontend/app/src/pages/TasksPage.tsx b/frontend/app/src/pages/TasksPage.tsx
index 3787678c8..da9ac04fc 100644
--- a/frontend/app/src/pages/TasksPage.tsx
+++ b/frontend/app/src/pages/TasksPage.tsx
@@ -748,14 +748,16 @@ export default function Tasks() {
                                 <span className="text-xs text-muted-foreground font-mono">
                                   Thread: {task.thread_id.slice(0, 8)}…
                                 </span>
-                                <a
-                                  href={`/chat/${task.thread_id}`}
-                                  className="inline-flex items-center gap-1 text-xs text-primary hover:underline"
-                                  onClick={(e) => e.stopPropagation()}
-                                >
-                                  <ExternalLink className="w-3 h-3" />
-                                  查看对话
-                                </a>
+                                {task.member_id && (
+                                  <a
+                                    href={`/threads/${encodeURIComponent(task.member_id)}/${task.thread_id}`}
+                                    className="inline-flex items-center gap-1 text-xs text-primary hover:underline"
+                                    onClick={(e) => e.stopPropagation()}
+                                  >
+                                    <ExternalLink className="w-3 h-3" />
+                                    查看线程
+                                  </a>
+                                )}
                               </div>
                               {cache?.loading ? (
                                 <div className="flex items-center gap-2 text-xs text-muted-foreground">
@@ -922,4 +924,3 @@ export default function Tasks() {
 
 
 
-
diff --git a/frontend/app/src/pages/ThreadsIndexRedirect.tsx b/frontend/app/src/pages/ThreadsIndexRedirect.tsx
index 2fb79079c..025511dfe 100644
--- a/frontend/app/src/pages/ThreadsIndexRedirect.tsx
+++ b/frontend/app/src/pages/ThreadsIndexRedirect.tsx
@@ -1,14 +1,58 @@
 import { useEffect } from "react";
 import { useNavigate } from "react-router-dom";
+import { getMainThread } from "../api/client";
 import { useAuthStore } from "../store/auth-store";
 
+const mainThreadInflight = new Map<string, Promise<Awaited<ReturnType<typeof getMainThread>>>>();
+
+function loadMainThread(memberId: string) {
+  const existing = mainThreadInflight.get(memberId);
+  if (existing) return existing;
+  const pending = getMainThread(memberId).finally(() => {
+    mainThreadInflight.delete(memberId);
+  });
+  mainThreadInflight.set(memberId, pending);
+  return pending;
+}
+
 export default function ThreadsIndexRedirect() {
   const agent = useAuthStore((s) => s.agent);
   const navigate = useNavigate();
 
   useEffect(() => {
     if (!agent?.id) return;
-    navigate(`/threads/${encodeURIComponent(agent.id)}`, { replace: true });
+    const agentId = agent.id;
+
+    let cancelled = false;
+
+    async function redirectToThread() {
+      const memberId = encodeURIComponent(agentId);
+      try {
+        // @@@threads-index-direct-main-route - /threads is a pure entrypoint; resolve the
+        // main thread here so login/setup flows do not bounce through NewChatPage first.
+        // @@@threads-index-inflight-dedup - React StrictMode remounts /threads in dev.
+        // Reuse the first main-thread request and ignore stale callbacks instead of
+        // aborting the first fetch and polluting network/devtools with ERR_ABORTED.
+        const thread = await loadMainThread(agentId);
+        if (cancelled) return;
+        navigate(
+          thread
+            ? `/threads/${memberId}/${encodeURIComponent(thread.thread_id)}`
+            : `/threads/${memberId}`,
+          { replace: true },
+        );
+      } catch (error) {
+        if (cancelled) return;
+        if (error instanceof DOMException && error.name === "AbortError") return;
+        console.error("[ThreadsIndexRedirect] resolve main thread failed:", error);
+        navigate(`/threads/${memberId}`, { replace: true });
+      }
+    }
+
+    void redirectToThread();
+    return () => {
+      cancelled = true;
+    };
   }, [agent?.id, navigate]);
 
   return null;
diff --git a/frontend/app/src/pages/ask-user-question.test.ts b/frontend/app/src/pages/ask-user-question.test.ts
new file mode 100644
index 000000000..899c58006
--- /dev/null
+++ b/frontend/app/src/pages/ask-user-question.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, it } from "vitest";
+import { askUserQuestionSelectionKey, buildAskUserAnswers } from "./ask-user-question";
+import type { AskUserQuestionPrompt } from "../api";
+
+describe("ask-user-question helpers", () => {
+  it("keeps duplicate prompts independently addressable by position", () => {
+    const questions: AskUserQuestionPrompt[] = [
+      {
+        header: "Style",
+        question: "Choose a style",
+        options: [{ label: "Minimal", description: "Keep it simple" }],
+      },
+      {
+        header: "Style",
+        question: "Choose a style",
+        options: [{ label: "Bold", description: "Make it loud" }],
+      },
+    ];
+
+    const answers = buildAskUserAnswers(questions, {
+      [askUserQuestionSelectionKey(0)]: ["Minimal"],
+      [askUserQuestionSelectionKey(1)]: ["Bold"],
+    });
+
+    expect(answers).toEqual([
+      {
+        header: "Style",
+        question: "Choose a style",
+        selected_options: ["Minimal"],
+      },
+      {
+        header: "Style",
+        question: "Choose a style",
+        selected_options: ["Bold"],
+      },
+    ]);
+  });
+});
diff --git a/frontend/app/src/pages/ask-user-question.ts b/frontend/app/src/pages/ask-user-question.ts
new file mode 100644
index 000000000..a1ce5faad
--- /dev/null
+++ b/frontend/app/src/pages/ask-user-question.ts
@@ -0,0 +1,16 @@
+import type { AskUserAnswer, AskUserQuestionPrompt } from "../api";
+
+export function askUserQuestionSelectionKey(questionIndex: number): string {
+  return String(questionIndex);
+}
+
+export function buildAskUserAnswers(
+  questions: AskUserQuestionPrompt[],
+  selections: Record<string, string[]>,
+): AskUserAnswer[] {
+  return questions.map((question, index) => ({
+    header: question.header,
+    question: question.question,
+    selected_options: selections[askUserQuestionSelectionKey(index)] ?? [],
+  }));
+}
diff --git a/frontend/app/src/pages/resources/CapabilityIcons.tsx b/frontend/app/src/pages/resources/CapabilityIcons.tsx
index 886ef02aa..c3c32cbc0 100644
--- a/frontend/app/src/pages/resources/CapabilityIcons.tsx
+++ b/frontend/app/src/pages/resources/CapabilityIcons.tsx
@@ -52,38 +52,3 @@ export function CapabilityStrip({ capabilities }: { capabilities: ProviderCapabi
     </div>
   );
 }
-
-/** Detailed capability tiles for ProviderDetail */
-export function CapabilityGrid({ capabilities }: { capabilities: ProviderCapabilities }) {
-  return (
-    <div className="grid grid-cols-4 gap-2">
-      {CAPABILITY_KEYS.map((key) => {
-        const Icon = CAPABILITY_ICON_MAP[key];
-        const has = capabilities[key];
-        return (
-          <div
-            key={key}
-            className={[
-              "flex items-center gap-2 px-3 py-2 rounded-lg border transition-all duration-fast",
-              has
-                ? "border-foreground/15 bg-foreground/[0.04] shadow-sm"
-                : "border-transparent bg-muted/30 opacity-40",
-            ].join(" ")}
-          >
-            <div
-              className={[
-                "w-6 h-6 rounded-md flex items-center justify-center shrink-0",
-                has ? "bg-foreground/10 text-foreground" : "bg-muted text-muted-foreground",
-              ].join(" ")}
-            >
-              <Icon className="w-3.5 h-3.5" />
-            </div>
-            <span className={`text-xs ${has ? "text-foreground font-medium" : "text-muted-foreground"}`}>
-              {CAPABILITY_LABELS[key]}
-            </span>
-          </div>
-        );
-      })}
-    </div>
-  );
-}
diff --git a/frontend/app/src/router.tsx b/frontend/app/src/router.tsx
index 024478143..b45f6193f 100644
--- a/frontend/app/src/router.tsx
+++ b/frontend/app/src/router.tsx
@@ -15,7 +15,6 @@ import MarketplacePage from './pages/MarketplacePage';
 import MarketplaceDetailPage from './pages/MarketplaceDetailPage';
 import LibraryItemDetailPage from './pages/LibraryItemDetailPage';
 import ResourcesPage from './pages/ResourcesPage';
-import ConnectionsPage from './pages/ConnectionsPage';
 import InviteCodesPage from './pages/InviteCodesPage';
 
 export const router = createBrowserRouter([
@@ -34,23 +33,27 @@ export const router = createBrowserRouter([
       },
       {
         path: 'threads',
-        element: <AppLayout />,
         children: [
           {
             index: true,
             element: <ThreadsIndexRedirect />,
           },
           {
-            path: ':memberId',
-            element: <NewChatPage />,
-          },
-          {
-            path: ':memberId/new',
-            element: <NewChatPage mode="new" />,
-          },
-          {
-            path: ':memberId/:threadId',
-            element: <ChatPage />,
+            element: <AppLayout />,
+            children: [
+              {
+                path: ':memberId',
+                element: <NewChatPage />,
+              },
+              {
+                path: ':memberId/new',
+                element: <NewChatPage mode="new" />,
+              },
+              {
+                path: ':memberId/:threadId',
+                element: <ChatPage />,
+              },
+            ],
           },
         ],
       },
@@ -100,10 +103,6 @@ export const router = createBrowserRouter([
         path: 'library',
         element: <Navigate to="/marketplace" replace />,
       },
-      {
-        path: 'connections',
-        element: <ConnectionsPage />,
-      },
       {
         path: 'invite-codes',
         element: <InviteCodesPage />,
diff --git a/frontend/app/src/store/app-store.test.ts b/frontend/app/src/store/app-store.test.ts
new file mode 100644
index 000000000..350c25ba7
--- /dev/null
+++ b/frontend/app/src/store/app-store.test.ts
@@ -0,0 +1,36 @@
+// @vitest-environment jsdom
+
+import { beforeEach, describe, expect, it } from "vitest";
+import { useAppStore } from "./app-store";
+
+describe("useAppStore", () => {
+  beforeEach(() => {
+    useAppStore.setState({
+      memberList: [],
+      taskList: [],
+      cronJobs: [],
+      librarySkills: [],
+      libraryMcps: [],
+      libraryAgents: [],
+      libraryRecipes: [],
+      userProfile: { name: "User", initials: "U", email: "" },
+      loaded: false,
+      error: null,
+    });
+  });
+
+  it("resets loaded member state when auth identity changes", () => {
+    useAppStore.setState({
+      memberList: [{ id: "m-old", name: "Old", status: "active" } as never],
+      loaded: true,
+      error: "stale",
+    });
+
+    useAppStore.getState().resetSessionData();
+
+    const state = useAppStore.getState();
+    expect(state.memberList).toEqual([]);
+    expect(state.loaded).toBe(false);
+    expect(state.error).toBeNull();
+  });
+});
diff --git a/frontend/app/src/store/app-store.ts b/frontend/app/src/store/app-store.ts
index e54bd1ef5..3cbab9423 100644
--- a/frontend/app/src/store/app-store.ts
+++ b/frontend/app/src/store/app-store.ts
@@ -6,6 +6,7 @@ import type {
 import { useAuthStore } from "./auth-store";
 
 const API = "/api/panel";
+let loadAllInflight: Promise<void> | null = null;
 
 interface AppState {
   // ── Data ──
@@ -23,6 +24,7 @@ interface AppState {
   // ── Init ──
   loadAll: () => Promise<void>;
   retry: () => Promise<void>;
+  resetSessionData: () => void;
 
   // ── Members ──
   fetchMembers: () => Promise<void>;
@@ -71,6 +73,38 @@ interface AppState {
   getResourceUsedBy: (type: string, name: string) => string[];
 }
 
+type LibraryType = "skill" | "mcp" | "agent" | "recipe";
+type LibraryStateKey = "librarySkills" | "libraryMcps" | "libraryAgents" | "libraryRecipes";
+
+const DEFAULT_PROFILE: UserProfile = { name: "User", initials: "U", email: "" };
+const LIBRARY_STATE_KEYS: Record<LibraryType, LibraryStateKey> = {
+  skill: "librarySkills",
+  mcp: "libraryMcps",
+  agent: "libraryAgents",
+  recipe: "libraryRecipes",
+};
+
+function getLibraryStateKey(type: string): LibraryStateKey {
+  const key = LIBRARY_STATE_KEYS[type as LibraryType];
+  if (!key) throw new Error(`Unsupported library type: ${type}`);
+  return key;
+}
+
+function emptySessionState() {
+  return {
+    memberList: [],
+    taskList: [],
+    cronJobs: [],
+    librarySkills: [],
+    libraryMcps: [],
+    libraryAgents: [],
+    libraryRecipes: [],
+    userProfile: DEFAULT_PROFILE,
+    loaded: false,
+    error: null,
+  };
+}
+
 async function api<T = unknown>(path: string, opts?: RequestInit): Promise<T> {
   const token = useAuthStore.getState().token;
   const headers: Record<string, string> = { "Content-Type": "application/json" };
@@ -81,35 +115,42 @@ async function api<T = unknown>(path: string, opts?: RequestInit): Promise<T> {
 }
 
 export const useAppStore = create<AppState>()((set, get) => ({
-  memberList: [],
-  taskList: [],
-  cronJobs: [],
-  librarySkills: [],
-  libraryMcps: [],
-  libraryAgents: [],
-  libraryRecipes: [],
-  userProfile: { name: "User", initials: "U", email: "" },
-  loaded: false,
-  error: null,
+  ...emptySessionState(),
 
   loadAll: async () => {
     if (get().loaded) return;
-    set({ error: null });
+    if (loadAllInflight) return loadAllInflight;
+
+    const pending = (async () => {
+      set({ error: null });
+      try {
+        // @@@load-all-singleflight - RootLayout can mount twice in dev StrictMode and /threads
+        // index redirect now avoids AppLayout, so keep the global panel bootstrap idempotent
+        // instead of firing duplicate members/tasks/library/profile bursts.
+        await Promise.all([
+          get().fetchMembers(),
+          get().fetchTasks(),
+          get().fetchCronJobs(),
+          get().fetchLibrary("skill"),
+          get().fetchLibrary("mcp"),
+          get().fetchLibrary("agent"),
+          get().fetchLibrary("recipe"),
+          get().fetchProfile(),
+        ]);
+        set({ loaded: true });
+      } catch (e) {
+        const msg = e instanceof Error ? e.message : String(e);
+        set({ error: `数据加载失败: ${msg}`, loaded: true });
+      }
+    })();
+
+    loadAllInflight = pending;
     try {
-      await Promise.all([
-        get().fetchMembers(),
-        get().fetchTasks(),
-        get().fetchCronJobs(),
-        get().fetchLibrary("skill"),
-        get().fetchLibrary("mcp"),
-        get().fetchLibrary("agent"),
-        get().fetchLibrary("recipe"),
-        get().fetchProfile(),
-      ]);
-      set({ loaded: true });
-    } catch (e) {
-      const msg = e instanceof Error ? e.message : String(e);
-      set({ error: `数据加载失败: ${msg}`, loaded: true });
+      await pending;
+    } finally {
+      if (loadAllInflight === pending) {
+        loadAllInflight = null;
+      }
     }
   },
 
@@ -118,6 +159,11 @@ export const useAppStore = create<AppState>()((set, get) => ({
     await get().loadAll();
   },
 
+  resetSessionData: () => {
+    loadAllInflight = null;
+    set(emptySessionState());
+  },
+
   // ── Members ──
   fetchMembers: async () => {
     const data = await api<{ items: Member[] }>("/members");
@@ -254,10 +300,8 @@ export const useAppStore = create<AppState>()((set, get) => ({
   // ── Library ──
   fetchLibrary: async (type) => {
     const data = await api<{ items: ResourceItem[] }>(`/library/${type}`);
-    if (type === "skill") set({ librarySkills: data.items });
-    else if (type === "mcp") set({ libraryMcps: data.items });
-    else if (type === "agent") set({ libraryAgents: data.items });
-    else if (type === "recipe") set({ libraryRecipes: data.items });
+    const key = getLibraryStateKey(type);
+    set({ [key]: data.items } as Pick<AppState, typeof key>);
   },
 
   fetchLibraryNames: async (type) => {
@@ -270,10 +314,8 @@ export const useAppStore = create<AppState>()((set, get) => ({
       method: "POST",
       body: JSON.stringify({ name, desc, ...extra }),
     });
-    if (type === "skill") set((s) => ({ librarySkills: [...s.librarySkills, item] }));
-    else if (type === "mcp") set((s) => ({ libraryMcps: [...s.libraryMcps, item] }));
-    else if (type === "agent") set((s) => ({ libraryAgents: [...s.libraryAgents, item] }));
-    else set((s) => ({ libraryRecipes: [...s.libraryRecipes, item] }));
+    const key = getLibraryStateKey(type);
+    set((s) => ({ [key]: [...s[key], item] }) as Pick<AppState, typeof key>);
     return item;
   },
 
@@ -282,23 +324,23 @@ export const useAppStore = create<AppState>()((set, get) => ({
       method: "PUT",
       body: JSON.stringify(fields),
     });
-    const updater = (list: ResourceItem[]) => list.map((x) => (x.id === id ? updated : x));
-    if (type === "skill") set((s) => ({ librarySkills: updater(s.librarySkills) }));
-    else if (type === "mcp") set((s) => ({ libraryMcps: updater(s.libraryMcps) }));
-    else if (type === "agent") set((s) => ({ libraryAgents: updater(s.libraryAgents) }));
-    else set((s) => ({ libraryRecipes: updater(s.libraryRecipes) }));
+    const key = getLibraryStateKey(type);
+    set((s) => ({
+      [key]: s[key].map((item) => (item.id === id ? updated : item)),
+    }) as Pick<AppState, typeof key>);
   },
 
   deleteResource: async (type, id) => {
     await api(`/library/${type}/${id}`, { method: "DELETE" });
-    const filter = (list: ResourceItem[]) => list.filter((x) => x.id !== id);
-    if (type === "skill") set((s) => ({ librarySkills: filter(s.librarySkills) }));
-    else if (type === "mcp") set((s) => ({ libraryMcps: filter(s.libraryMcps) }));
-    else if (type === "agent") set((s) => ({ libraryAgents: filter(s.libraryAgents) }));
-    else {
+    if (type === "recipe") {
       const data = await api<{ items: ResourceItem[] }>(`/library/${type}`);
       set({ libraryRecipes: data.items });
+      return;
     }
+    const key = getLibraryStateKey(type);
+    set((s) => ({
+      [key]: s[key].filter((item) => item.id !== id),
+    }) as Pick<AppState, typeof key>);
   },
 
   fetchResourceContent: async (type, id) => {
diff --git a/frontend/app/src/store/auth-store.ts b/frontend/app/src/store/auth-store.ts
index fb0d7b1d8..f782dac72 100644
--- a/frontend/app/src/store/auth-store.ts
+++ b/frontend/app/src/store/auth-store.ts
@@ -1,15 +1,11 @@
 /**
  * Auth store — JWT token, user identity, login/register/logout.
  * Persisted to localStorage via Zustand persist middleware.
- *
- * Set VITE_DEV_SKIP_AUTH=true in .env.development to bypass login during dev.
  */
 
 import { create } from "zustand";
 import { persist } from "zustand/middleware";
 
-const DEV_SKIP_AUTH = import.meta.env.VITE_DEV_SKIP_AUTH === "true";
-
 // Allow overriding the API origin at runtime via window.__MYCEL_CONFIG__.apiBase
 // (injected by docker-entrypoint.sh), falling back to the Vite build-time variable.
 // Relative URLs are used when neither is set (same-origin / local dev).
@@ -18,7 +14,6 @@ const API_BASE = (
   ?? import.meta.env.VITE_API_BASE
   ?? ""
 ).replace(/\/$/, "");
-
 export interface AuthIdentity {
   id: string;
   name: string;
@@ -62,15 +57,13 @@ async function apiPost(endpoint: string, body: Record<string, string>) {
   return res.json();
 }
 
-const DEV_MOCK_USER: AuthIdentity = { id: "dev-user", name: "Dev", type: "human" };
-
 export const useAuthStore = create<AuthState>()(
   persist(
     (set) => ({
-      token: DEV_SKIP_AUTH ? "dev-skip-auth" : null,
-      user: DEV_SKIP_AUTH ? DEV_MOCK_USER : null,
+      token: null,
+      user: null,
       agent: null,
-      entityId: DEV_SKIP_AUTH ? "dev-user" : null,
+      entityId: null,
       setupInfo: null,
 
       login: async (identifier, password) => {
@@ -81,7 +74,6 @@ export const useAuthStore = create<AuthState>()(
           agent: data.agent,
           entityId: data.user?.id ?? null,
         });
-        window.location.href = "/threads";
       },
 
       sendOtp: async (email, password, inviteCode) => {
@@ -117,10 +109,6 @@ export const useAuthStore = create<AuthState>()(
     }),
     {
       name: "leon-auth",
-      ...(DEV_SKIP_AUTH && {
-        // In skip-auth mode, never let persisted null overwrite the mock identity
-        merge: (_persisted: unknown, current: AuthState) => current,
-      }),
     },
   ),
 );
@@ -140,7 +128,7 @@ export async function authFetch(url: string, init?: RequestInit): Promise<Respon
   // Prepend API_BASE for relative URLs when configured
   const resolvedUrl = API_BASE && url.startsWith("/") ? `${API_BASE}${url}` : url;
   const res = await fetch(resolvedUrl, { ...init, headers });
-  if (res.status === 401 && !DEV_SKIP_AUTH) {
+  if (res.status === 401) {
     useAuthStore.getState().logout();
   }
   return res;
diff --git a/frontend/app/src/store/types.ts b/frontend/app/src/store/types.ts
index ecb6c56f4..b306e2148 100644
--- a/frontend/app/src/store/types.ts
+++ b/frontend/app/src/store/types.ts
@@ -67,6 +67,7 @@ export interface Task {
   created_at: number;
   // New fields
   thread_id: string;
+  member_id?: string;
   source: TaskSource;
   cron_job_id: string;
   result: string;
diff --git a/frontend/app/vite.config.ts b/frontend/app/vite.config.ts
index 00b97f2a6..a6c152626 100644
--- a/frontend/app/vite.config.ts
+++ b/frontend/app/vite.config.ts
@@ -17,7 +17,7 @@ const frontendPort = parseInt(process.env.LEON_FRONTEND_PORT || getWorktreePort(
 
 // https://vite.dev/config/
 export default defineConfig({
-  base: './',
+  base: '/',
   plugins: [inspectAttr(), react()],
   server: {
     host: "0.0.0.0",
diff --git a/pyproject.toml b/pyproject.toml
index fed480c59..b5b70d914 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,8 @@ dependencies = [
     "croniter>=6.0.0",
     "uvicorn>=0.30.0",
     "sse-starlette>=1.6.0",
+    "multilspy>=0.0.15",
+    "pyright>=1.1.0",
     "supabase>=2.28.3",
     "fastapi>=0.118.0",
     "langgraph-checkpoint-postgres>=3.0.5",
@@ -91,6 +93,7 @@ packages = [
     "core.tools.filesystem",
     "core.tools.filesystem.read",
     "core.tools.filesystem.read.readers",
+    "core.tools.lsp",
     "core.tools.search",
     "core.tools.skills",
     "core.tools.task",
diff --git a/sandbox/base.py b/sandbox/base.py
index 0a423f25a..2ae32a676 100644
--- a/sandbox/base.py
+++ b/sandbox/base.py
@@ -9,6 +9,7 @@
 
 import asyncio
 import logging
+import threading
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -70,6 +71,49 @@ def __getattr__(self, name: str):
         return getattr(self._remote._get_capability().command, name)
 
 
+def _cached_capability_is_stale(manager, thread_id: str, capability) -> bool:
+    session = getattr(capability, "_session", None)
+    if session is None:
+        return True
+    if getattr(session, "status", None) in {"closed", "failed", "paused"}:
+        return True
+    # @@@capability-cache-session-liveness - cached wrappers outlive session teardown;
+    # always confirm the cached session still exists as the current active session.
+    current = manager.session_manager.get(thread_id, session.terminal.terminal_id)
+    if current is None:
+        return True
+    return current.session_id != session.session_id
+
+
+def _run_coroutine_blocking(coro, *, timeout: float | None = None):
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        return asyncio.run(coro)
+
+    result: dict[str, object] = {}
+    error: dict[str, BaseException] = {}
+    done = threading.Event()
+
+    # @@@same-loop-init-bridge - init commands can run while the web request event loop is already active;
+    # running run_coroutine_threadsafe(...).result() on that same loop deadlocks, so bridge through a helper thread.
+    def _runner() -> None:
+        try:
+            result["value"] = asyncio.run(coro)
+        except BaseException as exc:  # pragma: no cover - defensive relay
+            error["value"] = exc
+        finally:
+            done.set()
+
+    thread = threading.Thread(target=_runner, daemon=True)
+    thread.start()
+    if not done.wait(timeout):
+        raise TimeoutError(f"Coroutine timed out after {timeout}s")
+    if "value" in error:
+        raise error["value"]
+    return result.get("value")
+
+
 class RemoteSandbox(Sandbox):
     """Concrete sandbox for all provider-backed environments (AgentBay, Docker, E2B, Daytona)."""
 
@@ -103,26 +147,30 @@ def _get_capability(self) -> SandboxCapability:
         thread_id = get_current_thread_id()
         if not thread_id:
             raise RuntimeError("No thread_id set. Call set_current_thread_id first.")
+        print(f"[RemoteSandbox._get_capability] provider={self._provider.name} thread_id={thread_id}", flush=True)
+        cached = self._capability_cache.get(thread_id)
+        if cached is not None and _cached_capability_is_stale(self._manager, thread_id, cached):
+            self._capability_cache.pop(thread_id, None)
         if thread_id not in self._capability_cache:
+            print(
+                f"[RemoteSandbox._get_capability] provider={self._provider.name} thread_id={thread_id} cache=miss",
+                flush=True,
+            )
             capability = self._manager.get_sandbox(thread_id)
             if self._config.init_commands and thread_id not in self._init_commands_run:
                 self._run_init_commands(capability)
                 self._init_commands_run.add(thread_id)
             self._capability_cache[thread_id] = capability
+        else:
+            print(
+                f"[RemoteSandbox._get_capability] provider={self._provider.name} thread_id={thread_id} cache=hit",
+                flush=True,
+            )
         return self._capability_cache[thread_id]
 
     def _run_init_commands(self, capability: SandboxCapability) -> None:
         for i, cmd in enumerate(self._config.init_commands, 1):
-            try:
-                loop = asyncio.get_running_loop()
-            except RuntimeError:
-                loop = None
-
-            if loop:
-                future = asyncio.run_coroutine_threadsafe(capability.command.execute(cmd), loop)
-                result = future.result(timeout=30)
-            else:
-                result = asyncio.run(capability.command.execute(cmd))
+            result = _run_coroutine_blocking(capability.command.execute(cmd), timeout=30)
 
             if result.exit_code != 0:
                 raise RuntimeError(
@@ -229,6 +277,9 @@ def _get_capability(self) -> SandboxCapability:
         thread_id = get_current_thread_id()
         if not thread_id:
             raise RuntimeError("No thread_id set. Call set_current_thread_id first.")
+        cached = self._capability_cache.get(thread_id)
+        if cached is not None and _cached_capability_is_stale(self._manager, thread_id, cached):
+            self._capability_cache.pop(thread_id, None)
         if thread_id not in self._capability_cache:
             self._capability_cache[thread_id] = self._manager.get_sandbox(thread_id)
         return self._capability_cache[thread_id]
diff --git a/sandbox/capability.py b/sandbox/capability.py
index 4b278742a..b5269a30f 100644
--- a/sandbox/capability.py
+++ b/sandbox/capability.py
@@ -9,7 +9,7 @@
 
 import shlex
 import uuid
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from typing import TYPE_CHECKING
 
 from sandbox.interfaces.executor import BaseExecutor
@@ -36,7 +36,7 @@ class SandboxCapability:
     def __init__(self, session: ChatSession, manager: SandboxManager | None = None):
         self._session = session
         self._command_wrapper = _CommandWrapper(session, manager=manager)
-        self._fs_wrapper = _FileSystemWrapper(session)
+        self._fs_wrapper = _FileSystemWrapper(session, manager=manager)
 
     @property
     def command(self) -> BaseExecutor:
@@ -95,6 +95,14 @@ async def execute(self, command: str, cwd: str | None = None, timeout: float | N
         self._session.touch()
         # @@@command-context - CommandMiddleware passes Cwd/env; preserve that context for remote runtimes.
         wrapped, _ = self._wrap_command(command, cwd, env)
+        print(
+            "[_CommandWrapper.execute] "
+            f"thread_id={self._session.thread_id} "
+            f"terminal_id={self._session.terminal.terminal_id} "
+            f"command={command[:200]!r} "
+            f"cwd={cwd!r} timeout={timeout}",
+            flush=True,
+        )
         return await self._session.runtime.execute(wrapped, timeout)
 
     async def execute_async(self, command: str, cwd: str | None = None, env: dict[str, str] | None = None):
@@ -178,8 +186,9 @@ class _FileSystemWrapper(FileSystemBackend):
 
     is_remote = True
 
-    def __init__(self, session: ChatSession):
+    def __init__(self, session: ChatSession, manager: SandboxManager | None = None):
         self._session = session
+        self._manager = manager
 
     def _get_provider(self):
         """Get provider from session's lease."""
@@ -193,7 +202,14 @@ def _get_instance_id(self) -> str:
         # @@@lease-convergence - File operations can also wake paused instances; always converge through lease.
         provider = getattr(self._session.runtime, "provider", None)
         if provider is not None:
-            instance = self._session.lease.ensure_active_instance(provider)
+            try:
+                instance = self._session.lease.ensure_active_instance(provider)
+            except RuntimeError:
+                if self._manager is None or getattr(self._session.lease, "observed_state", None) != "paused":
+                    raise
+                if not self._manager.resume_session(self._session.thread_id, source="auto_resume"):
+                    raise
+                instance = self._session.lease.ensure_active_instance(provider)
         else:
             instance = self._session.lease.get_instance()
             if not instance:
@@ -242,7 +258,30 @@ def file_mtime(self, path: str) -> float | None:
         return None
 
     def file_size(self, path: str) -> int | None:
-        """Not available for remote sandbox."""
+        """Best-effort size lookup via parent directory listing."""
+        self._session.touch()
+        provider = self._get_provider()
+        instance_id = self._get_instance_id()
+
+        target = PurePosixPath(path)
+        if not target.name:
+            return None
+
+        parent = str(target.parent) or "/"
+        try:
+            entries = provider.list_dir(instance_id, parent)
+        except Exception:
+            return None
+
+        for entry in entries or []:
+            if entry.get("name") != target.name:
+                continue
+            size = entry.get("size")
+            if isinstance(size, int):
+                return size
+            if isinstance(size, float):
+                return int(size)
+            return None
         return None
 
     def is_dir(self, path: str) -> bool:
diff --git a/sandbox/manager.py b/sandbox/manager.py
index 29f380b0a..54237f710 100644
--- a/sandbox/manager.py
+++ b/sandbox/manager.py
@@ -10,6 +10,7 @@
 from pathlib import Path
 from typing import Any
 
+from config.user_paths import user_home_path
 from sandbox.capability import SandboxCapability
 from sandbox.chat_session import ChatSessionManager, ChatSessionPolicy
 from sandbox.lease import lease_from_row
@@ -20,7 +21,7 @@
 from storage.providers.sqlite.kernel import SQLiteDBRole, resolve_role_db_path
 from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
 from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
+from storage.runtime import build_storage_container, build_thread_repo
 
 logger = logging.getLogger(__name__)
 
@@ -53,6 +54,76 @@ def lookup_sandbox_for_thread(thread_id: str, db_path: Path | None = None) -> st
         lease_repo.close()
 
 
+def resolve_existing_lease_cwd(
+    lease_id: str,
+    fallback_cwd: str | None = None,
+    db_path: Path | None = None,
+) -> str:
+    if fallback_cwd:
+        return fallback_cwd
+
+    target_db = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
+    terminal_repo = SQLiteTerminalRepo(db_path=target_db)
+    try:
+        row = terminal_repo.get_latest_by_lease(lease_id)
+    finally:
+        terminal_repo.close()
+    if row and row.get("cwd"):
+        return str(row["cwd"])
+    return str(Path.home())
+
+
+def bind_thread_to_existing_lease(
+    thread_id: str,
+    lease_id: str,
+    *,
+    cwd: str | None = None,
+    db_path: Path | None = None,
+) -> str:
+    target_db = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
+    terminal_repo = SQLiteTerminalRepo(db_path=target_db)
+    try:
+        existing = terminal_repo.get_active(thread_id)
+        if existing is not None:
+            return str(existing["cwd"])
+        initial_cwd = resolve_existing_lease_cwd(lease_id, cwd, db_path=target_db)
+        terminal_repo.create(
+            terminal_id=f"term-{uuid.uuid4().hex[:12]}",
+            thread_id=thread_id,
+            lease_id=lease_id,
+            initial_cwd=initial_cwd,
+        )
+        return initial_cwd
+    finally:
+        terminal_repo.close()
+
+
+def bind_thread_to_existing_thread_lease(
+    thread_id: str,
+    source_thread_id: str,
+    *,
+    cwd: str | None = None,
+    db_path: Path | None = None,
+) -> str | None:
+    target_db = db_path or resolve_role_db_path(SQLiteDBRole.SANDBOX)
+    terminal_repo = SQLiteTerminalRepo(db_path=target_db)
+    try:
+        source_terminal = terminal_repo.get_active(source_thread_id)
+    finally:
+        terminal_repo.close()
+    if source_terminal is None:
+        return None
+    # @@@subagent-lease-reuse
+    # Child threads need their own terminal/session state, but must attach
+    # to the parent's existing lease instead of silently provisioning a new one.
+    return bind_thread_to_existing_lease(
+        thread_id,
+        str(source_terminal["lease_id"]),
+        cwd=cwd,
+        db_path=target_db,
+    )
+
+
 class SandboxManager:
     def __init__(
         self,
@@ -105,27 +176,82 @@ def get_lease(self, lease_id: str):
     def _default_terminal_cwd(self) -> str:
         return resolve_provider_cwd(self.provider)
 
+    def _sandbox_volume_repo(self):
+        # @@@volume-repo-align - thread creation persists volume metadata through the
+        # active storage container; sandbox startup must read the same repo instead
+        # of hardcoding SQLite or Supabase-backed threads lose their volume row.
+        container = build_storage_container(main_db_path=resolve_role_db_path(SQLiteDBRole.MAIN))
+        return container.sandbox_volume_repo()
+
+    def _requires_volume_bootstrap(self) -> bool:
+        # @@@local-shell-no-volume-gate - local runtimes execute directly on the host
+        # and should not fail to start a shell just because file-channel volume
+        # metadata is absent or stored in a different backend.
+        return self.provider_capability.runtime_kind != "local"
+
+    def _ensure_thread_volume(self, thread_id: str, lease) -> None:
+        if not self._requires_volume_bootstrap() or lease.volume_id:
+            return
+
+        volume_id = str(uuid.uuid4())
+        self._create_volume_entry(thread_id, volume_id)
+
+        # @@@remote-volume-self-heal - legacy threads can lose their eager-created lease row
+        # and get rebound through manager recovery; persist a replacement volume_id before mount/sync.
+        self.lease_store.set_volume_id(lease.lease_id, volume_id)
+        lease.volume_id = volume_id
+
+    def _create_volume_entry(self, thread_id: str, volume_id: str) -> None:
+        import json
+        import os
+
+        from sandbox.volume_source import HostVolume
+
+        now_str = datetime.now().isoformat()
+        volume_root = Path(os.environ.get("LEON_SANDBOX_VOLUME_ROOT", str(user_home_path("volumes")))).expanduser().resolve()
+        volume_root.mkdir(parents=True, exist_ok=True)
+        source = HostVolume(volume_root / volume_id)
+
+        repo = self._sandbox_volume_repo()
+        try:
+            repo.create(volume_id, json.dumps(source.serialize()), f"vol-{thread_id}", now_str)
+        finally:
+            repo.close()
+
+    def _resolve_volume_entry(self, thread_id: str, lease) -> dict[str, Any]:
+        repo = self._sandbox_volume_repo()
+        try:
+            entry = repo.get(lease.volume_id)
+        finally:
+            repo.close()
+        if entry:
+            return entry
+        # @@@missing-volume-row-self-heal - old remote threads can retain a live lease.volume_id
+        # after the sandbox volume row was pruned; recreate the row in place before mount/sync.
+        self._create_volume_entry(thread_id, lease.volume_id)
+        repo = self._sandbox_volume_repo()
+        try:
+            entry = repo.get(lease.volume_id)
+        finally:
+            repo.close()
+        if not entry:
+            raise ValueError(f"Volume not found: {lease.volume_id}")
+        return entry
+
     def _setup_mounts(self, thread_id: str) -> dict:
         """Mount the lease's volume into the sandbox. Pure sandbox-layer operation."""
         import json
 
         from sandbox.volume_source import DaytonaVolume, deserialize_volume_source
-        from storage.providers.sqlite.sandbox_volume_repo import SQLiteSandboxVolumeRepo
 
         terminal = self._get_active_terminal(thread_id)
         if not terminal:
             raise ValueError(f"No active terminal for thread {thread_id}")
         lease = self._get_lease(terminal.lease_id)
-        if not lease or not lease.volume_id:
+        if not lease:
             raise ValueError(f"No volume for thread {thread_id}")
-
-        repo = SQLiteSandboxVolumeRepo()
-        try:
-            entry = repo.get(lease.volume_id)
-        finally:
-            repo.close()
-        if not entry:
-            raise ValueError(f"Volume not found: {lease.volume_id}")
+        self._ensure_thread_volume(thread_id, lease)
+        entry = self._resolve_volume_entry(thread_id, lease)
 
         source = deserialize_volume_source(json.loads(entry["source"]))
         volume_id = lease.volume_id
@@ -152,11 +278,10 @@ def _upgrade_to_daytona_volume(self, thread_id: str, current_source, volume_id:
         import json
 
         from sandbox.volume_source import DaytonaVolume
-        from storage.providers.sqlite.sandbox_volume_repo import SQLiteSandboxVolumeRepo
 
         # @@@member-id-for-volume-naming - read from thread config in leon.db
         member_id = "unknown"
-        thread_repo = SQLiteThreadRepo(resolve_role_db_path(SQLiteDBRole.MAIN))
+        thread_repo = build_thread_repo(main_db_path=resolve_role_db_path(SQLiteDBRole.MAIN))
         try:
             row = thread_repo.get_by_id(thread_id)
             if row:
@@ -172,6 +297,7 @@ def _upgrade_to_daytona_volume(self, thread_id: str, current_source, volume_id:
             if "already exists" in str(e):
                 volume_name = f"leon-volume-{member_id}"
                 logger.info("Daytona volume already exists: %s, reusing", volume_name)
+                self.provider.wait_managed_volume_ready(volume_name)
             else:
                 raise
 
@@ -180,7 +306,7 @@ def _upgrade_to_daytona_volume(self, thread_id: str, current_source, volume_id:
             volume_name=volume_name,
         )
 
-        repo = SQLiteSandboxVolumeRepo()
+        repo = self._sandbox_volume_repo()
         try:
             repo.update_source(volume_id, json.dumps(new_source.serialize()))
         finally:
@@ -251,30 +377,35 @@ def resolve_volume_source(self, thread_id: str):
         import json
 
         from sandbox.volume_source import deserialize_volume_source
-        from storage.providers.sqlite.sandbox_volume_repo import SQLiteSandboxVolumeRepo
 
         terminal = self._get_active_terminal(thread_id)
         if not terminal:
             raise ValueError(f"No active terminal for thread {thread_id}")
         lease = self._get_lease(terminal.lease_id)
-        if not lease or not lease.volume_id:
+        if not lease:
             raise ValueError(f"No volume for thread {thread_id}")
-        repo = SQLiteSandboxVolumeRepo()
-        try:
-            entry = repo.get(lease.volume_id)
-        finally:
-            repo.close()
-        if not entry:
-            raise ValueError(f"Volume not found: {lease.volume_id}")
+        self._ensure_thread_volume(thread_id, lease)
+        entry = self._resolve_volume_entry(thread_id, lease)
         return deserialize_volume_source(json.loads(entry["source"]))
 
+    def _skip_volume_sync_for_local_lease(self, lease) -> bool:
+        # @@@local-no-volume-sync - local sessions may execute directly in host cwd with no sandbox volume row.
+        # In that shape there is nothing to upload/download, so sync paths must no-op instead of inventing one.
+        return lease is not None and not self._requires_volume_bootstrap() and not lease.volume_id
+
     def _sync_to_sandbox(self, thread_id: str, instance_id: str, source=None, files: list[str] | None = None) -> None:
         if source is None:
+            lease = self._get_thread_lease(thread_id)
+            if self._skip_volume_sync_for_local_lease(lease):
+                return
             source = self.resolve_volume_source(thread_id)
         self.volume.sync_upload(thread_id, instance_id, source, self.volume.resolve_mount_path(), files=files)
 
     def _sync_from_sandbox(self, thread_id: str, instance_id: str, source=None) -> None:
         if source is None:
+            lease = self._get_thread_lease(thread_id)
+            if self._skip_volume_sync_for_local_lease(lease):
+                return
             source = self.resolve_volume_source(thread_id)
         self.volume.sync_download(thread_id, instance_id, source, self.volume.resolve_mount_path())
 
@@ -307,7 +438,7 @@ def get_sandbox(self, thread_id: str, bind_mounts: list | None = None) -> Sandbo
         if session:
             self._assert_lease_provider(session.lease, thread_id)
             # @@@activity-resume - Any new activity against a paused thread must resume before command execution.
-            if session.status == "paused":
+            if session.status == "paused" or getattr(session.lease, "observed_state", None) == "paused":
                 if not self.resume_session(thread_id, source="auto_resume"):
                     raise RuntimeError(f"Failed to resume paused session for thread {thread_id}")
                 session = self.session_manager.get(thread_id, session.terminal.terminal_id)
@@ -339,13 +470,29 @@ def get_sandbox(self, thread_id: str, bind_mounts: list | None = None) -> Sandbo
             if not lease:
                 lease = self._create_lease(terminal.lease_id, self.provider.name)
             self._assert_lease_provider(lease, thread_id)
+            if lease.observed_state == "paused":
+                # @@@paused-lease-rehydrate - a persisted thread can lose its in-memory chat session
+                # while the lease stays paused in storage; resume before reconstructing capability.
+                if not self.resume_session(thread_id, source="auto_resume"):
+                    raise RuntimeError(f"Failed to resume paused session for thread {thread_id}")
+                session = self.session_manager.get(thread_id, terminal.terminal_id)
+                if session:
+                    self._assert_lease_provider(session.lease, thread_id)
+                    self._ensure_bound_instance(session.lease)
+                    return SandboxCapability(session, manager=self)
+                lease = self._get_lease(terminal.lease_id)
+                if not lease:
+                    raise RuntimeError(f"Lease disappeared after resume for thread {thread_id}")
+                self._assert_lease_provider(lease, thread_id)
 
         # Stamp bind_mounts on lease so lazy creation paths pick them up
         if bind_mounts:
             lease.bind_mounts = bind_mounts
 
-        # @@@volume-strategy-gate - mount volume into sandbox
-        storage = self._setup_mounts(thread_id)
+        storage = None
+        if self._requires_volume_bootstrap():
+            # @@@volume-strategy-gate - remote runtimes need volume mount/sync before first command.
+            storage = self._setup_mounts(thread_id)
 
         self._ensure_bound_instance(lease)
 
@@ -375,7 +522,7 @@ def get_sandbox(self, thread_id: str, bind_mounts: list | None = None) -> Sandbo
             lease=lease,
         )
 
-        if instance:
+        if instance and storage is not None:
             # @@@workspace-upload - sync files to sandbox after creation
             self._sync_to_sandbox(thread_id, instance.instance_id, source=storage["source"])
             self._fire_session_ready(instance.instance_id, "create")
@@ -518,15 +665,26 @@ def enforce_idle_timeouts(self) -> int:
                     if self._lease_is_busy(lease.lease_id):
                         continue
                     status = lease.refresh_instance_status(self.provider)
-                    # Only pause remote providers (local sandbox doesn't need pause)
+                    capability = self.provider.get_capability()
+                    # @@@idle-reaper-reclaim-contract - idle timeout must reclaim remote resources; providers
+                    # that cannot pause should destroy instead of repeatedly throwing unsupported-operation noise.
                     if status == "running" and self.provider.name != "local":
                         try:
-                            paused = lease.pause_instance(self.provider, source="idle_reaper")
+                            if capability.can_pause:
+                                reclaimed = lease.pause_instance(self.provider, source="idle_reaper")
+                            elif capability.can_destroy:
+                                reclaimed = lease.destroy_instance(self.provider, source="idle_reaper") is None
+                            else:
+                                print(
+                                    f"[idle-reaper] provider {self.provider.name} cannot reclaim expired lease "
+                                    f"{lease.lease_id} for thread {thread_id}"
+                                )
+                                continue
                         except Exception as exc:
-                            print(f"[idle-reaper] failed to pause expired lease {lease.lease_id} for thread {thread_id}: {exc}")
+                            print(f"[idle-reaper] failed to reclaim expired lease {lease.lease_id} for thread {thread_id}: {exc}")
                             continue
-                        if not paused:
-                            print(f"[idle-reaper] failed to pause expired lease {lease.lease_id} for thread {thread_id}")
+                        if not reclaimed:
+                            print(f"[idle-reaper] failed to reclaim expired lease {lease.lease_id} for thread {thread_id}")
                             continue
 
             self.session_manager.delete(session_id, reason="idle_timeout")
@@ -596,6 +754,10 @@ def resume_session(self, thread_id: str, source: str = "user_resume") -> bool:
         for terminal in terminals:
             session = self.session_manager.get(thread_id, terminal.terminal_id)
             if session:
+                session.lease = lease
+                runtime = getattr(session, "runtime", None)
+                if runtime is not None:
+                    runtime.lease = lease
                 self.session_manager.resume(session.session_id)
                 resumed_any = True
 
diff --git a/sandbox/provider.py b/sandbox/provider.py
index fc298afed..d96524206 100644
--- a/sandbox/provider.py
+++ b/sandbox/provider.py
@@ -260,6 +260,10 @@ def delete_managed_volume(self, backend_ref: str) -> None:
         """Delete provider-managed persistent volume."""
         raise NotImplementedError(f"{self.name} does not support managed volumes")
 
+    def wait_managed_volume_ready(self, backend_ref: str) -> None:
+        """Block until a previously created managed volume is reusable."""
+        return None
+
     def set_thread_bind_mounts(self, thread_id: str, mounts: list) -> None:
         """Set per-thread bind mounts for next create_session(). No-op for providers without mount support."""
         pass
diff --git a/sandbox/providers/agentbay.py b/sandbox/providers/agentbay.py
index 4f3e7c996..95e16da05 100644
--- a/sandbox/providers/agentbay.py
+++ b/sandbox/providers/agentbay.py
@@ -6,9 +6,14 @@
 
 from __future__ import annotations
 
+import json
+import time
 from dataclasses import replace
+from types import SimpleNamespace
 from typing import TYPE_CHECKING, Any
 
+import requests
+
 from sandbox.provider import (
     Metrics,
     ProviderCapability,
@@ -100,7 +105,7 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
         if not result.success:
             raise RuntimeError(f"Failed to create session: {result.error_message}")
 
-        session = result.session
+        session = self._hydrate_direct_call_session(result.session)
         self._sessions[session.session_id] = session
 
         return SessionInfo(
@@ -111,7 +116,10 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
 
     def destroy_session(self, session_id: str, sync: bool = True) -> bool:
         session = self._get_session(session_id)
-        result = session.delete(sync_context=sync)
+        # @@@agentbay-destroy-without-pause - some AgentBay account tiers wire delete(sync_context=True)
+        # through pause/sync first; when pause is unsupported, destroy must skip sync_context entirely.
+        effective_sync = sync and self.get_capability().can_pause
+        result = session.delete(sync_context=effective_sync)
         if result.success:
             self._sessions.pop(session_id, None)
         return result.success
@@ -161,17 +169,65 @@ def execute(
     ) -> ProviderExecResult:
         session = self._get_session(session_id)
         timeout_ms = min(timeout_ms, 50000)
-
-        result = session.command.execute_command(
-            command=command,
-            timeout_ms=timeout_ms,
-            cwd=cwd or self.default_context_path,
+        exec_args = {
+            "command": command,
+            "timeout_ms": timeout_ms,
+            "cwd": cwd or self.default_context_path,
+        }
+        shell_server = self._resolve_shell_server(session)
+        session_tools = getattr(session, "mcpTools", None) or getattr(session, "mcp_tools", None) or []
+        print(
+            "[AgentBay.execute] "
+            f"session_id={session_id} "
+            f"has_link_url={bool(getattr(session, 'link_url', ''))} "
+            f"has_token={bool(getattr(session, 'token', ''))} "
+            f"shell_server={shell_server!r} "
+            f"tool_count={len(session_tools)} "
+            f"timeout_ms={timeout_ms}",
+            flush=True,
         )
 
-        if not result.success:
-            return ProviderExecResult(output="", error=result.error_message)
+        if getattr(session, "link_url", "") and getattr(session, "token", "") and shell_server:
+            # @@@agentbay-shell-link-route - shared staging proved shell can degrade into the API path
+            # despite hydrated direct-call metadata; take the explicit LinkUrl route when shell server is known.
+            result = self._call_link_url_tool(session, "shell", exec_args, shell_server)
+            print(
+                "[AgentBay.execute] "
+                f"session_id={session_id} path=link_url exit_code={result.exit_code} "
+                f"error={result.error!r} output_len={len(result.output or '')}",
+                flush=True,
+            )
+            return result
 
-        return ProviderExecResult(output=result.output or "")
+        print(f"[AgentBay.execute] session_id={session_id} path=sdk_command_execute", flush=True)
+        try:
+            result = session.command.execute_command(**exec_args)
+        except Exception as exc:
+            print(
+                f"[AgentBay.execute] session_id={session_id} path=sdk_command_execute raised={exc.__class__.__name__}: {exc}",
+                flush=True,
+            )
+            raise
+
+        if not result.success:
+            print(
+                "[AgentBay.execute] "
+                f"session_id={session_id} path=sdk_command_execute success=False "
+                f"exit_code={getattr(result, 'exit_code', None)} "
+                f"error={getattr(result, 'error_message', None)!r} "
+                f"output_len={len(getattr(result, 'output', '') or '')}",
+                flush=True,
+            )
+            return ProviderExecResult(output=result.output or "", exit_code=result.exit_code or 1, error=result.error_message)
+
+        print(
+            "[AgentBay.execute] "
+            f"session_id={session_id} path=sdk_command_execute success=True "
+            f"exit_code={getattr(result, 'exit_code', None)} "
+            f"output_len={len(getattr(result, 'output', '') or '')}",
+            flush=True,
+        )
+        return ProviderExecResult(output=result.output or "", exit_code=result.exit_code or 0)
 
     def read_file(self, session_id: str, path: str) -> str:
         session = self._get_session(session_id)
@@ -246,7 +302,168 @@ def _get_session(self, session_id: str):
             if not result.success:
                 raise RuntimeError(f"Session not found: {session_id}")
             self._sessions[session_id] = result.session
-        return self._sessions[session_id]
+        cached = self._sessions[session_id]
+        hydrated = self._hydrate_direct_call_session(cached)
+        self._sessions[session_id] = hydrated
+        return hydrated
+
+    def _hydrate_direct_call_session(self, session: Any):
+        """Ensure cached session carries LinkUrl/token/tool metadata for direct shell calls."""
+        if not self._session_needs_direct_call_refresh(session):
+            return session
+        session_id = str(getattr(session, "session_id", "") or "")
+        if not session_id:
+            raise RuntimeError("AgentBay session missing session_id")
+        refreshed = self.client.get(session_id)
+        if not refreshed.success:
+            raise RuntimeError(f"Failed to hydrate AgentBay session {session_id}: {refreshed.error_message}")
+        hydrated = refreshed.session
+        if self._session_needs_direct_call_refresh(hydrated):
+            metadata = self._fetch_direct_call_metadata(session_id)
+            self._apply_direct_call_metadata(hydrated, metadata)
+        return hydrated
+
+    @staticmethod
+    def _resolve_shell_server(session: Any) -> str | None:
+        for resolver_name in ("_get_mcp_server_for_tool", "_find_server_for_tool"):
+            resolver = getattr(session, resolver_name, None)
+            if callable(resolver):
+                try:
+                    server_name = resolver("shell")
+                except Exception:
+                    continue
+                if server_name:
+                    return str(server_name)
+        for tools_attr in ("mcpTools", "mcp_tools"):
+            tools = getattr(session, tools_attr, None) or []
+            for tool in tools:
+                if getattr(tool, "name", None) == "shell":
+                    server_name = getattr(tool, "server", "") or ""
+                    if server_name:
+                        return str(server_name)
+        return None
+
+    @staticmethod
+    def _provider_exec_result_from_tool_result(tool_result: Any) -> ProviderExecResult:
+        if not getattr(tool_result, "success", False):
+            error_message = getattr(tool_result, "error_message", "") or "Failed to execute command"
+            return ProviderExecResult(output="", exit_code=1, error=error_message)
+        data = getattr(tool_result, "data", "")
+        try:
+            payload = json.loads(data) if isinstance(data, str) else data
+        except json.JSONDecodeError:
+            payload = None
+        if isinstance(payload, dict):
+            stdout = str(payload.get("stdout", "") or "")
+            stderr = str(payload.get("stderr", "") or "")
+            exit_code = int(payload.get("exit_code", 0) or 0)
+            error = stderr or None
+            return ProviderExecResult(output=stdout + stderr, exit_code=exit_code, error=error)
+        return ProviderExecResult(output=str(data or ""), exit_code=0)
+
+    def _call_link_url_tool(
+        self,
+        session: Any,
+        tool_name: str,
+        args: dict[str, Any],
+        server_name: str,
+    ) -> ProviderExecResult:
+        link_url = str(getattr(session, "link_url", "") or "")
+        token = str(getattr(session, "token", "") or "")
+        if not link_url or not token:
+            return ProviderExecResult(output="", exit_code=1, error="LinkUrl/token not available")
+
+        try:
+            response = requests.post(
+                link_url.rstrip("/") + "/callTool",
+                json={
+                    "args": args,
+                    "server": server_name,
+                    "requestId": f"link-{int(time.time() * 1000)}",
+                    "tool": tool_name,
+                    "token": token,
+                },
+                headers={
+                    "Content-Type": "application/json",
+                    "X-Access-Token": token,
+                },
+                timeout=max(int(args.get("timeout_ms", 30000) or 30000) / 1000.0, 30.0),
+            )
+        except requests.RequestException as exc:
+            return ProviderExecResult(output="", exit_code=1, error=f"HTTP request failed: {exc}")
+        if response.status_code < 200 or response.status_code >= 300:
+            return ProviderExecResult(output="", exit_code=1, error=f"HTTP request failed with code: {response.status_code}")
+
+        outer = response.json()
+        data_field = outer.get("data")
+        if data_field is None:
+            return ProviderExecResult(output="", exit_code=1, error="No data field in LinkUrl response")
+        parsed_data = json.loads(data_field) if isinstance(data_field, str) else data_field
+        if not isinstance(parsed_data, dict):
+            return ProviderExecResult(output="", exit_code=1, error="Invalid data field type in LinkUrl response")
+
+        result_field = parsed_data.get("result", {})
+        if not isinstance(result_field, dict):
+            return ProviderExecResult(output="", exit_code=1, error="No result field in LinkUrl response data")
+
+        content = result_field.get("content", [])
+        text_content = ""
+        if isinstance(content, list) and content:
+            first = content[0]
+            if isinstance(first, str):
+                text_content = first
+            elif isinstance(first, dict):
+                text_content = str(first.get("text") or first.get("blob") or first.get("data") or "")
+        elif isinstance(content, str):
+            text_content = content
+
+        if result_field.get("isError", False):
+            error_message = text_content or json.dumps(result_field, ensure_ascii=False)
+            return ProviderExecResult(output="", exit_code=1, error=error_message)
+
+        return self._provider_exec_result_from_tool_result(SimpleNamespace(success=True, data=text_content, error_message=""))
+
+    @staticmethod
+    def _session_needs_direct_call_refresh(session: Any) -> bool:
+        # @@@agentbay-direct-call-hydration - shared staging may return a create-session object
+        # without token/link_url/mcpTools; refresh once so shell execution stays on the richer LinkUrl path.
+        if not getattr(session, "token", ""):
+            return True
+        if not getattr(session, "link_url", ""):
+            return True
+        tools = getattr(session, "mcpTools", None) or getattr(session, "mcp_tools", None)
+        return not bool(tools)
+
+    def _fetch_direct_call_metadata(self, session_id: str) -> dict[str, Any]:
+        from agentbay.api.models import GetSessionRequest
+
+        # @@@agentbay-raw-get-session - the SDK Session object drops LinkUrl/ToolList for this account tier,
+        # but the raw GetSession response still carries them. Pull that response directly and patch the session.
+        request = GetSessionRequest(authorization=f"Bearer {self.client.api_key}", session_id=session_id)
+        response = self.client.client.get_session(request)
+        body = response.to_map().get("body", {})
+        data = body.get("Data", {}) or {}
+        return {
+            "link_url": data.get("LinkUrl", "") or "",
+            "token": data.get("Token", "") or "",
+            "mcp_tools": [
+                SimpleNamespace(name=str(tool.get("Name", "") or ""), server=str(tool.get("Server", "") or ""))
+                for tool in (data.get("ToolList", []) or [])
+            ],
+        }
+
+    @staticmethod
+    def _apply_direct_call_metadata(session: Any, metadata: dict[str, Any]) -> None:
+        link_url = str(metadata.get("link_url", "") or "")
+        if link_url:
+            setattr(session, "link_url", link_url)
+        token = str(metadata.get("token", "") or "")
+        if token:
+            setattr(session, "token", token)
+        tools = metadata.get("mcp_tools", []) or []
+        if tools:
+            setattr(session, "mcp_tools", tools)
+            setattr(session, "mcpTools", tools)
 
     def create_runtime(self, terminal: AbstractTerminal, lease: SandboxLease) -> PhysicalTerminalRuntime:
         from sandbox.runtime import RemoteWrappedRuntime
diff --git a/sandbox/providers/daytona.py b/sandbox/providers/daytona.py
index def0f865f..f314d5621 100644
--- a/sandbox/providers/daytona.py
+++ b/sandbox/providers/daytona.py
@@ -15,6 +15,7 @@
 import uuid
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
+from urllib.parse import urlparse, urlunparse
 
 import httpx
 
@@ -107,6 +108,13 @@ def __init__(
         os.environ["DAYTONA_API_KEY"] = api_key
         os.environ["DAYTONA_API_URL"] = api_url
         self.client = Daytona()
+        original_get_proxy_toolbox_url = self.client._get_proxy_toolbox_url
+
+        def _wrapped_get_proxy_toolbox_url(sandbox_id: str, region_id: str) -> str:
+            raw_url = original_get_proxy_toolbox_url(sandbox_id, region_id)
+            return self._normalize_toolbox_proxy_url(raw_url)
+
+        self.client._get_proxy_toolbox_url = _wrapped_get_proxy_toolbox_url
         self._sandboxes: dict[str, Any] = {}
         self._thread_bind_mounts: dict[str, list[MountSpec]] = {}  # thread_id -> bind_mounts
         self._volume_mounts: dict[str, tuple[str, str]] = {}  # thread_id -> (volume_id, mount_path)
@@ -123,13 +131,17 @@ def create_managed_volume(self, member_id: str, mount_path: str) -> str:
         logger.info("Creating managed volume: %s", volume_name)
         # @@@volume-ready - volume transitions pending_create → ready (~6s)
         self.client.volume.create(volume_name)
+        self.wait_managed_volume_ready(volume_name)
+        return volume_name
+
+    def wait_managed_volume_ready(self, backend_ref: str) -> None:
         for _ in range(30):
-            vol = self.client.volume.get(volume_name)
+            vol = self.client.volume.get(backend_ref)
             if vol.state == "ready":
-                logger.info("Managed volume ready: %s (id=%s)", volume_name, vol.id)
-                return volume_name
+                logger.info("Managed volume ready: %s (id=%s)", backend_ref, vol.id)
+                return
             time.sleep(1)
-        raise RuntimeError(f"Volume {volume_name} did not become ready within 30s")
+        raise RuntimeError(f"Volume {backend_ref} did not become ready within 30s")
 
     def set_managed_volume_mount(self, thread_id: str, backend_ref: str, mount_path: str) -> None:
         self._volume_mounts[thread_id] = (backend_ref, mount_path)
@@ -390,6 +402,19 @@ def _get_sandbox(self, session_id: str):
             self._sandboxes[session_id] = self.client.find_one(session_id)
         return self._sandboxes[session_id]
 
+    def _normalize_toolbox_proxy_url(self, raw_url: str) -> str:
+        api_host = (urlparse(self.api_url).hostname or "").lower()
+        if api_host not in {"localhost", "127.0.0.1"}:
+            return raw_url
+
+        parsed = urlparse(raw_url)
+        if (parsed.hostname or "").lower() != "172.18.0.1":
+            return raw_url
+
+        # @@@local-toolbox-loopback - self-host Daytona local dev reaches toolbox through
+        # the SSH-forwarded loopback proxy on :4000, not the server-side docker bridge gateway.
+        return urlunparse(parsed._replace(netloc=f"127.0.0.1:{parsed.port or 4000}"))
+
     def get_runtime_sandbox(self, session_id: str):
         """Expose native SDK sandbox for runtime-level persistent terminal handling."""
         return self._get_sandbox(session_id)
diff --git a/sandbox/providers/e2b.py b/sandbox/providers/e2b.py
index 5827b124b..482f66cdf 100644
--- a/sandbox/providers/e2b.py
+++ b/sandbox/providers/e2b.py
@@ -68,6 +68,10 @@ def __init__(
         timeout: int = 300,
         provider_name: str | None = None,
     ):
+        # @@@e2b-sdk-presence - staging inventory must fail loudly when the SDK is absent,
+        # otherwise provider catalog/create-thread gates can overclaim e2b availability.
+        from e2b import Sandbox  # noqa: F401
+
         if provider_name:
             self.name = provider_name
         self.api_key = api_key
@@ -88,6 +92,16 @@ def create_session(self, context_id: str | None = None, thread_id: str | None =
             api_key=self.api_key,
         )
         self._sandboxes[sandbox.sandbox_id] = sandbox
+        # @@@e2b-workspace-bootstrap - fresh E2B sandboxes do not guarantee our sync root exists.
+        # Create it eagerly so upload/download and file hints target a real path contract.
+        bootstrap = sandbox.commands.run(
+            f"mkdir -p {self.WORKSPACE_ROOT}/files",
+            cwd=self.default_cwd,
+            timeout=10,
+        )
+        if getattr(bootstrap, "exit_code", 0) != 0:
+            error = getattr(bootstrap, "stderr", "") or getattr(bootstrap, "stdout", "") or "unknown error"
+            raise RuntimeError(f"Failed to bootstrap E2B workspace root: {error}")
 
         return SessionInfo(
             session_id=sandbox.sandbox_id,
diff --git a/sandbox/providers/local.py b/sandbox/providers/local.py
index a8c6c6f02..b5766b9c9 100644
--- a/sandbox/providers/local.py
+++ b/sandbox/providers/local.py
@@ -7,6 +7,7 @@
 import shlex
 import subprocess
 import threading
+import time
 import uuid
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -171,6 +172,12 @@ def list_dir(self, session_id: str, path: str) -> list[dict]:
         return items
 
     def get_metrics(self, session_id: str) -> Metrics | None:
+        if platform.system() == "Linux":
+            metrics = self._get_metrics_via_procfs()
+            if metrics is not None:
+                return metrics
+            return self.get_metrics_via_commands(session_id)
+
         if platform.system() != "Darwin":
             return self.get_metrics_via_commands(session_id)
 
@@ -222,6 +229,59 @@ def get_metrics(self, session_id: str) -> Metrics | None:
         except Exception:
             return None
 
+    def _get_metrics_via_procfs(self) -> Metrics | None:
+        try:
+            cpu_percent = self._sample_linux_cpu_percent()
+
+            meminfo: dict[str, int] = {}
+            with open("/proc/meminfo") as fh:
+                for line in fh:
+                    key, _, raw = line.partition(":")
+                    value = raw.strip().split()[0] if raw.strip() else ""
+                    if value.isdigit():
+                        meminfo[key] = int(value)
+
+            total_kb = meminfo.get("MemTotal")
+            available_kb = meminfo.get("MemAvailable")
+            memory_total_mb = (total_kb / 1024.0) if total_kb is not None else None
+            memory_used_mb = ((total_kb - available_kb) / 1024.0) if total_kb is not None and available_kb is not None else None
+
+            stat = os.statvfs("/")
+            total_bytes = stat.f_blocks * stat.f_frsize
+            free_bytes = stat.f_bavail * stat.f_frsize
+            disk_total_gb = total_bytes / (1024.0**3)
+            disk_used_gb = (total_bytes - free_bytes) / (1024.0**3)
+
+            return Metrics(
+                cpu_percent=cpu_percent,
+                memory_used_mb=memory_used_mb,
+                memory_total_mb=memory_total_mb,
+                disk_used_gb=disk_used_gb,
+                disk_total_gb=disk_total_gb,
+            )
+        except Exception:
+            return None
+
+    def _sample_linux_cpu_percent(self) -> float | None:
+        first_total, first_idle = self._read_linux_cpu_totals()
+        time.sleep(0.1)
+        second_total, second_idle = self._read_linux_cpu_totals()
+        total_delta = second_total - first_total
+        idle_delta = second_idle - first_idle
+        if total_delta <= 0:
+            return None
+        busy_delta = total_delta - idle_delta
+        return max(0.0, min(100.0, (busy_delta / total_delta) * 100.0))
+
+    def _read_linux_cpu_totals(self) -> tuple[int, int]:
+        with open("/proc/stat") as fh:
+            first = fh.readline().strip()
+        parts = first.split()
+        values = [int(value) for value in parts[1:9]]
+        total = sum(values)
+        idle = values[3] + values[4]
+        return total, idle
+
     def create_runtime(self, terminal: AbstractTerminal, lease: SandboxLease) -> PhysicalTerminalRuntime:
         from sandbox.providers.local import LocalPersistentShellRuntime
 
diff --git a/sandbox/runtime.py b/sandbox/runtime.py
index 87cecd024..d68a747ff 100644
--- a/sandbox/runtime.py
+++ b/sandbox/runtime.py
@@ -749,6 +749,8 @@ def _looks_like_infra_error(text: str) -> bool:
             "no such session",
             "session does not exist",
             "failed to create pty session",
+            "failed to send input to pty",
+            "pty control error",
             "no ip address found",
             "is the sandbox started",
             "is paused",
@@ -758,6 +760,9 @@ def _looks_like_infra_error(text: str) -> bool:
             "websocket",
             "close frame",
             "no close frame",
+            "internal error",
+            "1011",
+            "broken pipe",
             "transport",
             "unreachable",
             "timed out",
@@ -806,6 +811,17 @@ def _execute_once(self, command: str, timeout: float | None = None) -> ExecuteRe
         instance = self.lease.ensure_active_instance(self.provider)
         state = self.terminal.get_state()
         timeout_ms = int(timeout * 1000) if timeout else 30000
+        print(
+            "[RemoteWrappedRuntime._execute_once] "
+            f"thread_id={self.terminal.thread_id} "
+            f"lease_id={self.lease.lease_id} "
+            f"instance_id={instance.instance_id} "
+            f"provider={getattr(self.provider, 'name', '?')} "
+            f"cwd={state.cwd!r} "
+            f"timeout_ms={timeout_ms} "
+            f"command={command[:200]!r}",
+            flush=True,
+        )
         # @@@ _build_state_snapshot_cmd returns (start, end, cmd) but RemoteWrappedRuntime
         # builds its own inline block to interleave cd/exports/command, so the pre-built cmd is unused.
         start_marker, end_marker, _ = _build_state_snapshot_cmd()
@@ -832,14 +848,32 @@ def _execute_once(self, command: str, timeout: float | None = None) -> ExecuteRe
             cwd=state.cwd,
         )
         raw_output = result.output or ""
-
-        new_cwd, env_map, raw_output = _extract_state_from_output(
-            raw_output,
-            start_marker,
-            end_marker,
-            cwd_fallback=state.cwd,
-            env_fallback=state.env_delta,
+        print(
+            "[RemoteWrappedRuntime._execute_once] "
+            f"thread_id={self.terminal.thread_id} "
+            f"provider_exit={result.exit_code} "
+            f"provider_error={result.error!r} "
+            f"output_len={len(raw_output)}",
+            flush=True,
         )
+
+        try:
+            new_cwd, env_map, raw_output = _extract_state_from_output(
+                raw_output,
+                start_marker,
+                end_marker,
+                cwd_fallback=state.cwd,
+                env_fallback=state.env_delta,
+            )
+        except Exception as exc:
+            print(
+                "[RemoteWrappedRuntime._execute_once] "
+                f"thread_id={self.terminal.thread_id} "
+                f"state_parse_failed={exc.__class__.__name__}: {exc} "
+                f"raw_output_preview={raw_output[:400]!r}",
+                flush=True,
+            )
+            raise
         from sandbox.terminal import TerminalState
 
         self.update_terminal_state(TerminalState(cwd=new_cwd, env_delta=env_map))
diff --git a/sandbox/thread_context.py b/sandbox/thread_context.py
index d52ba7ef1..d98e9895c 100644
--- a/sandbox/thread_context.py
+++ b/sandbox/thread_context.py
@@ -3,10 +3,14 @@
 from __future__ import annotations
 
 from contextvars import ContextVar
+from typing import Any
 
 _current_thread_id: ContextVar[str] = ContextVar("sandbox_thread_id", default="")
 # @@@run-context - groups file ops per execution unit: checkpoint_id in TUI, run_id in web mode.
 _current_run_id: ContextVar[str] = ContextVar("sandbox_run_id", default="")
+# Parent conversation messages — set by QueryLoop before tool execution; read by AgentService
+# for forkContext=True sub-agent spawning.
+_current_messages: ContextVar[list[Any]] = ContextVar("current_messages", default=[])
 
 
 def set_current_thread_id(thread_id: str) -> None:
@@ -25,3 +29,11 @@ def set_current_run_id(run_id: str) -> None:
 def get_current_run_id() -> str | None:
     value = _current_run_id.get()
     return value if value else None
+
+
+def set_current_messages(messages: list[Any]) -> None:
+    _current_messages.set(list(messages))
+
+
+def get_current_messages() -> list[Any]:
+    return _current_messages.get()
diff --git a/storage/providers/sqlite/agent_registry_repo.py b/storage/providers/sqlite/agent_registry_repo.py
index 02aa62aeb..a9a2c0e87 100644
--- a/storage/providers/sqlite/agent_registry_repo.py
+++ b/storage/providers/sqlite/agent_registry_repo.py
@@ -59,11 +59,27 @@ def get_by_id(self, agent_id: str) -> tuple | None:
                 (agent_id,),
             ).fetchone()
 
+    def list_running_by_name(self, name: str) -> list[tuple]:
+        with self._conn() as conn:
+            return conn.execute(
+                "SELECT agent_id, name, thread_id, status, parent_agent_id, subagent_type "
+                "FROM agents WHERE name=? AND status='running' ORDER BY created_at DESC, agent_id DESC",
+                (name,),
+            ).fetchall()
+
     def update_status(self, agent_id: str, status: str) -> None:
         with self._conn() as conn:
             conn.execute("UPDATE agents SET status=? WHERE agent_id=?", (status, agent_id))
             conn.commit()
 
+    def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> tuple | None:
+        with self._conn() as conn:
+            return conn.execute(
+                "SELECT agent_id, name, thread_id, status, parent_agent_id, subagent_type "
+                "FROM agents WHERE name=? AND parent_agent_id IS ? ORDER BY created_at DESC, agent_id DESC LIMIT 1",
+                (name, parent_agent_id),
+            ).fetchone()
+
     def list_running(self) -> list[tuple]:
         with self._conn() as conn:
             return conn.execute(
diff --git a/storage/providers/sqlite/chat_repo.py b/storage/providers/sqlite/chat_repo.py
index f761c6e5a..37ca68ad7 100644
--- a/storage/providers/sqlite/chat_repo.py
+++ b/storage/providers/sqlite/chat_repo.py
@@ -172,6 +172,12 @@ def _ensure_table(self) -> None:
             )
             """
         )
+        # @@@entity-id-to-user-id-migration - old chat dbs still used entity_id.
+        # Rename first so later index creation does not explode on missing user_id.
+        try:
+            self._conn.execute("ALTER TABLE chat_entities RENAME COLUMN entity_id TO user_id")
+        except sqlite3.OperationalError:
+            pass  # column already named user_id, or table is new
         # @@@chat-entity-migration - add muted/mute_until if table already exists
         try:
             self._conn.execute("ALTER TABLE chat_entities ADD COLUMN muted INTEGER NOT NULL DEFAULT 0")
@@ -183,11 +189,6 @@ def _ensure_table(self) -> None:
             pass
         # @@@chat-entity-index — speeds up find_chat_between and list_chats_for_user
         self._conn.execute("CREATE INDEX IF NOT EXISTS idx_chat_entities_user ON chat_entities(user_id, chat_id)")
-        # @@@entity-id-to-user-id-migration — rename column for existing databases
-        try:
-            self._conn.execute("ALTER TABLE chat_entities RENAME COLUMN entity_id TO user_id")
-        except sqlite3.OperationalError:
-            pass  # column already named user_id, or table is new
         self._conn.commit()
 
 
diff --git a/storage/providers/sqlite/lease_repo.py b/storage/providers/sqlite/lease_repo.py
index f0ab745c9..de9f7663e 100644
--- a/storage/providers/sqlite/lease_repo.py
+++ b/storage/providers/sqlite/lease_repo.py
@@ -250,6 +250,20 @@ def mark_needs_refresh(self, lease_id: str, hint_at: datetime | None = None) ->
             self._conn.commit()
             return cursor.rowcount > 0
 
+    def set_volume_id(self, lease_id: str, volume_id: str) -> bool:
+        with self._lock:
+            cursor = self._conn.execute(
+                """
+                UPDATE sandbox_leases
+                SET volume_id = ?,
+                    updated_at = ?
+                WHERE lease_id = ?
+                """,
+                (volume_id, datetime.now().isoformat(), lease_id),
+            )
+            self._conn.commit()
+            return cursor.rowcount > 0
+
     def delete(self, lease_id: str) -> None:
         with self._lock:
             self._conn.execute("DELETE FROM sandbox_instances WHERE lease_id = ?", (lease_id,))
diff --git a/storage/providers/supabase/agent_registry_repo.py b/storage/providers/supabase/agent_registry_repo.py
index 8aaccd1d0..31bca5506 100644
--- a/storage/providers/supabase/agent_registry_repo.py
+++ b/storage/providers/supabase/agent_registry_repo.py
@@ -55,6 +55,22 @@ def get_by_id(self, agent_id: str) -> tuple | None:
     def update_status(self, agent_id: str, status: str) -> None:
         self._table().update({"status": status}).eq("agent_id", agent_id).execute()
 
+    def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None) -> tuple | None:
+        query = self._table().select("agent_id,name,thread_id,status,parent_agent_id,subagent_type").eq("name", name)
+        if parent_agent_id is None:
+            query = query.is_("parent_agent_id", "null")
+        else:
+            query = query.eq("parent_agent_id", parent_agent_id)
+        rows = q.rows(
+            query.order("created_at", desc=True).limit(1).execute(),
+            _REPO,
+            "get_latest_by_name_and_parent",
+        )
+        if not rows:
+            return None
+        r = rows[0]
+        return (r["agent_id"], r["name"], r["thread_id"], r["status"], r.get("parent_agent_id"), r.get("subagent_type"))
+
     def list_running(self) -> list[tuple]:
         rows = q.rows(
             self._table().select("agent_id,name,thread_id,status,parent_agent_id,subagent_type").eq("status", "running").execute(),
diff --git a/storage/providers/supabase/chat_repo.py b/storage/providers/supabase/chat_repo.py
index d0cfaa0ab..401fb3726 100644
--- a/storage/providers/supabase/chat_repo.py
+++ b/storage/providers/supabase/chat_repo.py
@@ -212,6 +212,16 @@ def count_unread(self, chat_id: str, user_id: str) -> int:
         raw = q.rows(response, _REPO_MSG, "count_unread")
         return len(raw)
 
+    def has_unread_mention(self, chat_id: str, user_id: str) -> bool:
+        resp_ce = self._client.table(_TABLE_CHAT_ENTITIES).select("last_read_at").eq("chat_id", chat_id).eq("user_id", user_id).execute()
+        ce_rows = q.rows(resp_ce, _REPO_MSG, "has_unread_mention(last_read_at)")
+        if not ce_rows:
+            return False
+        for message in self.list_unread(chat_id, user_id):
+            if user_id in message.mentioned_ids:
+                return True
+        return False
+
     def list_by_time_range(
         self,
         chat_id: str,
diff --git a/storage/providers/supabase/entity_repo.py b/storage/providers/supabase/entity_repo.py
index cb2e0dc84..b4ecc1dc7 100644
--- a/storage/providers/supabase/entity_repo.py
+++ b/storage/providers/supabase/entity_repo.py
@@ -43,6 +43,13 @@ def get_by_member_id(self, member_id: str) -> list[EntityRow]:
         rows = q.rows(response, _REPO, "get_by_member_id")
         return [EntityRow.model_validate(r) for r in rows]
 
+    def get_by_thread_id(self, thread_id: str) -> EntityRow | None:
+        response = self._t().select("*").eq("thread_id", thread_id).execute()
+        rows = q.rows(response, _REPO, "get_by_thread_id")
+        if not rows:
+            return None
+        return EntityRow.model_validate(rows[0])
+
     def list_all(self) -> list[EntityRow]:
         query = q.order(self._t().select("*"), "created_at", desc=False, repo=_REPO, operation="list_all")
         rows = q.rows(query.execute(), _REPO, "list_all")
diff --git a/storage/runtime.py b/storage/runtime.py
index 0a2d1b394..a522fe3da 100644
--- a/storage/runtime.py
+++ b/storage/runtime.py
@@ -59,6 +59,68 @@ def build_storage_container(
     )
 
 
+def build_thread_repo(
+    *,
+    main_db_path: str | Path | None = None,
+    strategy: str | None = None,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    env: Mapping[str, str] | None = None,
+):
+    env_map = env if env is not None else os.environ
+    resolved_strategy = _resolve_strategy(strategy if strategy is not None else env_map.get("LEON_STORAGE_STRATEGY"))
+    if resolved_strategy == "supabase":
+        client = supabase_client
+        if client is None:
+            factory_ref = supabase_client_factory if supabase_client_factory is not None else env_map.get("LEON_SUPABASE_CLIENT_FACTORY")
+            if not factory_ref:
+                raise RuntimeError(
+                    "Supabase thread repo requires runtime config. "
+                    "Set LEON_SUPABASE_CLIENT_FACTORY=<module>:<callable> "
+                    "or inject supabase_client explicitly."
+                )
+            client = _load_factory(factory_ref)()
+        _ensure_supabase_client(client)
+        from storage.providers.supabase.thread_repo import SupabaseThreadRepo
+
+        return SupabaseThreadRepo(client)
+
+    from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
+
+    return SQLiteThreadRepo(db_path=main_db_path)
+
+
+def build_member_repo(
+    *,
+    main_db_path: str | Path | None = None,
+    strategy: str | None = None,
+    supabase_client: Any | None = None,
+    supabase_client_factory: str | None = None,
+    env: Mapping[str, str] | None = None,
+):
+    env_map = env if env is not None else os.environ
+    resolved_strategy = _resolve_strategy(strategy if strategy is not None else env_map.get("LEON_STORAGE_STRATEGY"))
+    if resolved_strategy == "supabase":
+        client = supabase_client
+        if client is None:
+            factory_ref = supabase_client_factory if supabase_client_factory is not None else env_map.get("LEON_SUPABASE_CLIENT_FACTORY")
+            if not factory_ref:
+                raise RuntimeError(
+                    "Supabase member repo requires runtime config. "
+                    "Set LEON_SUPABASE_CLIENT_FACTORY=<module>:<callable> "
+                    "or inject supabase_client explicitly."
+                )
+            client = _load_factory(factory_ref)()
+        _ensure_supabase_client(client)
+        from storage.providers.supabase.member_repo import SupabaseMemberRepo
+
+        return SupabaseMemberRepo(client)
+
+    from storage.providers.sqlite.member_repo import SQLiteMemberRepo
+
+    return SQLiteMemberRepo(db_path=main_db_path)
+
+
 def _resolve_strategy(raw: str | None) -> StorageStrategy:
     value = (raw or "sqlite").strip().lower()
     if value in {"", "sqlite"}:
diff --git a/tests/config/conftest.py b/tests/Config/conftest.py
similarity index 100%
rename from tests/config/conftest.py
rename to tests/Config/conftest.py
diff --git a/tests/config/test_loader.py b/tests/Config/test_loader.py
similarity index 74%
rename from tests/config/test_loader.py
rename to tests/Config/test_loader.py
index f3671fa09..c0874f38d 100644
--- a/tests/config/test_loader.py
+++ b/tests/Config/test_loader.py
@@ -1,11 +1,13 @@
 """Comprehensive tests for config.loader module."""
 
+import json
 import os
 import sys
+from pathlib import Path
 
 import pytest
 
-from config.loader import ConfigLoader, load_config
+from config.loader import AgentLoader, ConfigLoader, load_config
 from config.schema import LeonSettings
 
 
@@ -157,6 +159,27 @@ def test_expand_env_vars_nested(self):
         assert result["paths"] == ["/base/path1", "/base/path2"]
         assert result["config"]["root"] == "/base"
 
+    def test_discover_mcp_preserves_explicit_transport(self, tmp_path):
+        path = tmp_path / ".mcp.json"
+        path.write_text(
+            json.dumps(
+                {
+                    "mcpServers": {
+                        "wsdemo": {
+                            "transport": "websocket",
+                            "url": "ws://example.test/mcp",
+                        }
+                    }
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        result = ConfigLoader._discover_mcp(tmp_path)
+
+        assert result["wsdemo"].transport == "websocket"
+        assert result["wsdemo"].url == "ws://example.test/mcp"
+
 
 class TestLoadConfigFunction:
     """Tests for load_config convenience function."""
@@ -169,3 +192,32 @@ def test_load_config_with_workspace(self, tmp_path, monkeypatch):
 
         settings = load_config(workspace_root=str(project_dir))
         assert isinstance(settings, LeonSettings)
+
+
+def test_project_agent_file_does_not_claim_bundle_source_dir(tmp_path: Path):
+    agents_dir = tmp_path / ".leon" / "agents"
+    agents_dir.mkdir(parents=True)
+    (agents_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: project-model\n---\nproject prompt\n",
+        encoding="utf-8",
+    )
+
+    agent = AgentLoader(workspace_root=tmp_path).load_all_agents()["explore"]
+
+    assert agent.model == "project-model"
+    assert agent.source_dir is None
+
+
+def test_member_agent_retains_bundle_source_dir(tmp_path: Path, monkeypatch):
+    home_root = tmp_path
+    monkeypatch.setattr("config.loader.user_home_read_candidates", lambda *parts: (home_root.joinpath(*parts),))
+    member_dir = home_root / "members" / "alice"
+    member_dir.mkdir(parents=True)
+    (member_dir / "agent.md").write_text(
+        '---\nname: alice\ntools:\n  - "*"\n---\nmember prompt\n',
+        encoding="utf-8",
+    )
+
+    agent = AgentLoader(workspace_root=tmp_path).load_all_agents()["alice"]
+
+    assert agent.source_dir == member_dir.resolve()
diff --git a/tests/config/test_loader_skill_dir_bootstrap.py b/tests/Config/test_loader_skill_dir_bootstrap.py
similarity index 100%
rename from tests/config/test_loader_skill_dir_bootstrap.py
rename to tests/Config/test_loader_skill_dir_bootstrap.py
diff --git a/tests/Fix/test_auth_entity_resolution.py b/tests/Fix/test_auth_entity_resolution.py
new file mode 100644
index 000000000..c445b566f
--- /dev/null
+++ b/tests/Fix/test_auth_entity_resolution.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.core import dependencies
+
+
+class _Request:
+    def __init__(self, *, token: str, payload: dict, member_exists: bool = True) -> None:
+        self.headers = {"Authorization": f"Bearer {token}"}
+        self.app = SimpleNamespace(
+            state=SimpleNamespace(
+                auth_service=SimpleNamespace(verify_token=lambda seen: payload if seen == token else None),
+                member_repo=SimpleNamespace(get_by_id=lambda _user_id: object() if member_exists else None),
+            )
+        )
+
+
+@pytest.mark.asyncio
+async def test_get_current_entity_id_derives_human_entity_when_jwt_has_no_entity_id():
+    request = _Request(token="tok-1", payload={"user_id": "user-123"})
+
+    entity_id = await dependencies.get_current_entity_id(request)
+
+    assert entity_id == "user-123-1"
+
+
+@pytest.mark.asyncio
+async def test_get_current_entity_id_keeps_explicit_entity_id_when_present():
+    request = _Request(token="tok-1", payload={"user_id": "user-123", "entity_id": "custom-entity"})
+
+    entity_id = await dependencies.get_current_entity_id(request)
+
+    assert entity_id == "custom-entity"
+
+
+@pytest.mark.asyncio
+async def test_get_current_user_id_still_rejects_deleted_user():
+    request = _Request(token="tok-1", payload={"user_id": "ghost-user"}, member_exists=False)
+
+    with pytest.raises(HTTPException) as exc_info:
+        await dependencies.get_current_user_id(request)
+
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail == "User no longer exists — please re-login"
diff --git a/tests/Fix/test_auth_service_token_verification.py b/tests/Fix/test_auth_service_token_verification.py
new file mode 100644
index 000000000..f145b7bd6
--- /dev/null
+++ b/tests/Fix/test_auth_service_token_verification.py
@@ -0,0 +1,246 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from backend.web.services.auth_service import AuthService
+
+
+class _FakeSupabaseAuth:
+    def __init__(self, user_id: str = "user-1") -> None:
+        self.user_id = user_id
+        self.tokens: list[str] = []
+
+    def get_user(self, token: str):
+        self.tokens.append(token)
+        return SimpleNamespace(user=SimpleNamespace(id=self.user_id))
+
+
+class _FakeSupabaseClient:
+    def __init__(self, user_id: str = "user-1") -> None:
+        self.auth = _FakeSupabaseAuth(user_id=user_id)
+
+
+class _FakeLoginAuth:
+    def __init__(self) -> None:
+        self.calls: list[dict[str, str]] = []
+
+    def sign_in_with_password(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="tok-1"),
+        )
+
+
+class _FakeAuthClient:
+    def __init__(self) -> None:
+        self.auth = _FakeLoginAuth()
+
+
+class _FactoryBackedLoginAuth:
+    def __init__(self, owner: _FactoryBackedAuthClient) -> None:
+        self._owner = owner
+
+    def sign_in_with_password(self, payload: dict[str, str]):
+        self._owner.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="tok-1"),
+        )
+
+    def get_user(self, token: str):
+        self._owner.tokens.append(token)
+        return SimpleNamespace(user=SimpleNamespace(id="user-1"))
+
+
+class _FactoryBackedAuthClient:
+    def __init__(self) -> None:
+        self.calls: list[dict[str, str]] = []
+        self.tokens: list[str] = []
+        self.auth = _FactoryBackedLoginAuth(self)
+
+
+class _DirectAuthClient:
+    def __init__(self) -> None:
+        self.calls: list[dict[str, str]] = []
+        self.tokens: list[str] = []
+
+    def sign_in_with_password(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="tok-1"),
+        )
+
+    def get_user(self, token: str):
+        self.tokens.append(token)
+        return SimpleNamespace(user=SimpleNamespace(id="user-1"))
+
+    def sign_up(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(user=SimpleNamespace(id="user-1"), session=None)
+
+    def verify_otp(self, payload: dict[str, str]):
+        self.calls.append(payload)
+        return SimpleNamespace(
+            user=SimpleNamespace(id="user-1"),
+            session=SimpleNamespace(access_token="temp-token-1"),
+        )
+
+
+def _service(
+    *,
+    supabase_client=None,
+    supabase_auth_client=None,
+    supabase_auth_client_factory=None,
+    member_repo=None,
+    entity_repo=None,
+    invite_codes=None,
+) -> AuthService:
+    return AuthService(
+        members=member_repo or SimpleNamespace(),
+        accounts=SimpleNamespace(),
+        entities=entity_repo or SimpleNamespace(),
+        supabase_client=supabase_client,
+        supabase_auth_client=supabase_auth_client,
+        supabase_auth_client_factory=supabase_auth_client_factory,
+        invite_codes=invite_codes,
+    )
+
+
+def test_verify_token_prefers_supabase_get_user_over_local_jwt_secret(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+    sb = _FakeSupabaseClient(user_id="user-supabase")
+
+    payload = _service(supabase_auth_client=sb).verify_token("tok-live")
+
+    assert sb.auth.tokens == ["tok-live"]
+    assert payload == {"user_id": "user-supabase", "entity_id": None}
+
+
+def test_verify_token_without_supabase_client_still_fails_loudly_when_secret_missing(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+
+    with pytest.raises(RuntimeError, match="SUPABASE_JWT_SECRET env var required"):
+        _service().verify_token("tok-live")
+
+
+def test_login_uses_dedicated_auth_client_instead_of_storage_client():
+    auth_client = _FakeAuthClient()
+    member_repo = SimpleNamespace(
+        get_by_id=lambda _user_id: SimpleNamespace(name="codex", mycel_id=10001, email="codex@example.com", avatar=None),
+        list_by_owner_user_id=lambda _user_id: [],
+    )
+    entity_repo = SimpleNamespace(get_by_member_id=lambda _user_id: [SimpleNamespace(id="user-1-1", type="human")])
+
+    result = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+        member_repo=member_repo,
+        entity_repo=entity_repo,
+    ).login("codex@example.com", "pw-1")
+
+    assert auth_client.auth.calls == [{"email": "codex@example.com", "password": "pw-1"}]
+    assert result["token"] == "tok-1"
+
+
+def test_login_uses_fresh_auth_client_from_factory_per_call():
+    created: list[_FactoryBackedAuthClient] = []
+
+    def factory() -> _FactoryBackedAuthClient:
+        client = _FactoryBackedAuthClient()
+        created.append(client)
+        return client
+
+    member_repo = SimpleNamespace(
+        get_by_id=lambda _user_id: SimpleNamespace(name="codex", mycel_id=10001, email="codex@example.com", avatar=None),
+        list_by_owner_user_id=lambda _user_id: [],
+    )
+    entity_repo = SimpleNamespace(get_by_member_id=lambda _user_id: [SimpleNamespace(id="user-1-1", type="human")])
+    service = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client_factory=factory,
+        member_repo=member_repo,
+        entity_repo=entity_repo,
+    )
+
+    service.login("codex@example.com", "pw-1")
+    service.login("codex@example.com", "pw-2")
+
+    assert len(created) == 2
+    assert created[0].calls == [{"email": "codex@example.com", "password": "pw-1"}]
+    assert created[1].calls == [{"email": "codex@example.com", "password": "pw-2"}]
+
+
+def test_verify_token_uses_fresh_auth_client_from_factory_per_call(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+    created: list[_FactoryBackedAuthClient] = []
+
+    def factory() -> _FactoryBackedAuthClient:
+        client = _FactoryBackedAuthClient()
+        created.append(client)
+        return client
+
+    service = _service(supabase_auth_client_factory=factory)
+
+    assert service.verify_token("tok-1") == {"user_id": "user-1", "entity_id": None}
+    assert service.verify_token("tok-2") == {"user_id": "user-1", "entity_id": None}
+    assert len(created) == 2
+    assert created[0].tokens == ["tok-1"]
+    assert created[1].tokens == ["tok-2"]
+
+
+def test_login_accepts_direct_gotrue_client_without_auth_wrapper():
+    auth_client = _DirectAuthClient()
+    member_repo = SimpleNamespace(
+        get_by_id=lambda _user_id: SimpleNamespace(name="codex", mycel_id=10001, email="codex@example.com", avatar=None),
+        list_by_owner_user_id=lambda _user_id: [],
+    )
+    entity_repo = SimpleNamespace(get_by_member_id=lambda _user_id: [SimpleNamespace(id="user-1-1", type="human")])
+
+    result = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+        member_repo=member_repo,
+        entity_repo=entity_repo,
+    ).login("codex@example.com", "pw-1")
+
+    assert auth_client.calls == [{"email": "codex@example.com", "password": "pw-1"}]
+    assert result["token"] == "tok-1"
+
+
+def test_verify_token_accepts_direct_gotrue_client_without_auth_wrapper(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delenv("SUPABASE_JWT_SECRET", raising=False)
+    auth_client = _DirectAuthClient()
+
+    payload = _service(supabase_auth_client=auth_client).verify_token("tok-direct")
+
+    assert auth_client.tokens == ["tok-direct"]
+    assert payload == {"user_id": "user-1", "entity_id": None}
+
+
+def test_send_otp_accepts_direct_gotrue_client_without_auth_wrapper():
+    auth_client = _DirectAuthClient()
+    invite_codes = SimpleNamespace(is_valid=lambda code: code == "invite-1")
+
+    _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+        invite_codes=invite_codes,
+    ).send_otp("fresh@example.com", "pw-1", "invite-1")
+
+    assert auth_client.calls == [{"email": "fresh@example.com", "password": "pw-1"}]
+
+
+def test_verify_register_otp_accepts_direct_gotrue_client_without_auth_wrapper():
+    auth_client = _DirectAuthClient()
+
+    result = _service(
+        supabase_client=SimpleNamespace(auth=None),
+        supabase_auth_client=auth_client,
+    ).verify_register_otp("fresh@example.com", "123456")
+
+    assert auth_client.calls == [{"email": "fresh@example.com", "token": "123456", "type": "signup"}]
+    assert result == {"temp_token": "temp-token-1"}
diff --git a/tests/Fix/test_background_task_cleanup.py b/tests/Fix/test_background_task_cleanup.py
new file mode 100644
index 000000000..3b088bd38
--- /dev/null
+++ b/tests/Fix/test_background_task_cleanup.py
@@ -0,0 +1,493 @@
+"""Integration tests for background task cleanup across command/agent surfaces."""
+
+import asyncio
+import json
+import shutil
+import sys
+from pathlib import Path
+
+import pytest
+from langchain_core.messages import AIMessage
+
+from core.agents.registry import AgentEntry, AgentRegistry
+from core.agents.service import AgentService
+from core.runtime.middleware.queue import MessageQueueManager
+from core.runtime.middleware.queue.middleware import SteeringMiddleware
+from core.runtime.registry import ToolRegistry
+from core.tools.command.bash.executor import BashExecutor
+from core.tools.command.service import CommandService
+from sandbox.thread_context import set_current_thread_id
+
+
+class _FakeAgentRegistry:
+    async def register(self, entry):
+        self.entry = entry
+
+    async def update_status(self, agent_id: str, status: str):
+        self.last_status = (agent_id, status)
+
+
+class _SlowChildAgent:
+    def __init__(self, first_text: str, release_event: asyncio.Event, started_event: asyncio.Event):
+        self._first_text = first_text
+        self._release_event = release_event
+        self._started_event = started_event
+        self._agent_service = type(
+            "_ChildService",
+            (),
+            {"cleanup_background_runs": self._cleanup_background_runs},
+        )()
+        self.agent = type("_InnerAgent", (), {"astream": self._astream})()
+        self.closed = False
+
+    async def ainit(self):
+        return None
+
+    async def _astream(self, *args, **kwargs):
+        self._started_event.set()
+        yield {"agent": {"messages": [AIMessage(content=self._first_text)]}}
+        await self._release_event.wait()
+
+    async def _cleanup_background_runs(self):
+        return None
+
+    def close(self):
+        self.closed = True
+        return None
+
+
+class _CompleteChildAgent:
+    def __init__(self, text: str):
+        self._text = text
+        self._agent_service = type(
+            "_ChildService",
+            (),
+            {"cleanup_background_runs": self._cleanup_background_runs},
+        )()
+        self.agent = type("_InnerAgent", (), {"astream": self._astream})()
+        self.closed = False
+
+    async def ainit(self):
+        return None
+
+    async def _astream(self, *args, **kwargs):
+        yield {"agent": {"messages": [AIMessage(content=self._text)]}}
+
+    async def _cleanup_background_runs(self):
+        return None
+
+    def close(self):
+        self.closed = True
+        return None
+
+
+class _FailingInitChildAgent:
+    def __init__(self, error: Exception):
+        self._error = error
+
+    async def ainit(self):
+        raise self._error
+
+
+def _agent_tool_json(result) -> dict:
+    content = getattr(result, "content", result)
+    return json.loads(content)
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32" or shutil.which("bash") is None,
+    reason="bash background cleanup integration requires Unix-compatible bash",
+)
+def test_taskstop_terminates_real_background_bash_run(tmp_path):
+    async def run():
+        registry = ToolRegistry()
+        shared_runs: dict[str, object] = {}
+        executor = BashExecutor(default_cwd=str(tmp_path))
+        command_service = CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            executor=executor,
+            background_runs=shared_runs,
+        )
+        agent_service = AgentService(
+            tool_registry=registry,
+            agent_registry=_FakeAgentRegistry(),
+            workspace_root=Path(tmp_path),
+            model_name="gpt-test",
+            shared_runs=shared_runs,
+        )
+
+        result = await command_service._execute_async(
+            "sleep 30",
+            str(tmp_path),
+            30.0,
+            description="integration bash cleanup",
+        )
+        assert "task_id:" in result
+        assert len(shared_runs) == 1
+
+        task_id, running = next(iter(shared_runs.items()))
+        assert running.is_done is False
+
+        stop_result = await agent_service._handle_task_stop(task_id)
+
+        assert stop_result == f"Task {task_id} cancelled"
+        assert task_id not in shared_runs
+        assert running._cmd.process.returncode is not None
+
+    asyncio.run(run())
+
+
+def test_sendmessage_search_hint_uses_queue_naming(tmp_path):
+    registry = ToolRegistry()
+    AgentService(
+        tool_registry=registry,
+        agent_registry=_FakeAgentRegistry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+    )
+
+    entry = registry.get("SendMessage")
+
+    assert entry is not None
+    assert "queue" in entry.search_hint
+    assert "mailbox" not in entry.search_hint
+
+
+@pytest.mark.asyncio
+async def test_sendmessage_enqueues_real_agent_notification_for_target_thread(tmp_path):
+    registry = ToolRegistry()
+    agent_registry = AgentRegistry(db_path=tmp_path / "agents.db")
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=agent_registry,
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-1",
+            name="worker-1",
+            thread_id="thread-worker-1",
+            status="running",
+        )
+    )
+
+    result = await service._handle_send_message(
+        target_name="worker-1",
+        message="hello from coordinator",
+        sender_name="coordinator",
+    )
+
+    assert result == "Message sent to worker-1."
+    items = queue_manager.drain_all("thread-worker-1")
+    assert len(items) == 1
+    assert items[0].notification_type == "agent"
+    assert items[0].sender_name == "coordinator"
+    assert "hello from coordinator" in items[0].content
+
+
+@pytest.mark.asyncio
+async def test_sendmessage_reaches_target_next_turn_via_steering_middleware(tmp_path):
+    registry = ToolRegistry()
+    agent_registry = AgentRegistry(db_path=tmp_path / "agents.db")
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=agent_registry,
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-1",
+            name="worker-1",
+            thread_id="thread-worker-1",
+            status="running",
+        )
+    )
+
+    await service._handle_send_message(
+        target_name="worker-1",
+        message="queue payload",
+        sender_name="coordinator",
+    )
+
+    injected = SteeringMiddleware(queue_manager=queue_manager).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "thread-worker-1"}},
+    )
+
+    assert injected is not None
+    messages = injected["messages"]
+    assert len(messages) == 1
+    assert "queue payload" in str(messages[0].content)
+    assert messages[0].metadata["notification_type"] == "agent"
+    assert messages[0].metadata["sender_name"] == "coordinator"
+
+
+@pytest.mark.asyncio
+async def test_sendmessage_rejects_ambiguous_running_agent_names(tmp_path):
+    registry = ToolRegistry()
+    agent_registry = AgentRegistry(db_path=tmp_path / "agents.db")
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=agent_registry,
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-1",
+            name="worker",
+            thread_id="thread-worker-1",
+            status="running",
+        )
+    )
+    await agent_registry.register(
+        AgentEntry(
+            agent_id="agent-2",
+            name="worker",
+            thread_id="thread-worker-2",
+            status="running",
+        )
+    )
+
+    result = await service._handle_send_message(
+        target_name="worker",
+        message="hello dup",
+        sender_name="coordinator",
+    )
+
+    assert "ambiguous" in result
+    assert queue_manager.drain_all("thread-worker-1") == []
+    assert queue_manager.drain_all("thread-worker-2") == []
+
+
+@pytest.mark.asyncio
+async def test_background_agent_progress_notification_reaches_parent_next_turn(tmp_path, monkeypatch):
+    started = asyncio.Event()
+    release = asyncio.Event()
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _SlowChildAgent("Inspecting repository", release, started)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=_FakeAgentRegistry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+        background_progress_interval_s=0.02,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="do work",
+            name="worker-1",
+            description="Investigating repository",
+            run_in_background=True,
+        )
+        task_id = _agent_tool_json(raw)["task_id"]
+        await asyncio.wait_for(started.wait(), timeout=1)
+        await asyncio.sleep(0.05)
+
+        injected = SteeringMiddleware(queue_manager=queue_manager).before_model(
+            state={},
+            runtime=None,
+            config={"configurable": {"thread_id": "parent-thread"}},
+        )
+
+        assert injected is not None
+        text = str(injected["messages"][0].content)
+        assert "<worker-progress>" in text
+        assert f"<agent-id>{task_id}</agent-id>" in text
+        assert "Inspecting repository" in text
+    finally:
+        release.set()
+        await service.cleanup_background_runs()
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_background_agent_completion_notification_waits_for_followthrough_run(tmp_path, monkeypatch):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CompleteChildAgent("Finished indexing")
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=_FakeAgentRegistry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+        background_progress_interval_s=0.02,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="do work",
+            name="worker-1",
+            description="Index repository",
+            run_in_background=True,
+        )
+        task_id = _agent_tool_json(raw)["task_id"]
+        running = service._tasks[task_id]
+        await asyncio.wait_for(running.task, timeout=1)
+
+        injected = SteeringMiddleware(queue_manager=queue_manager).before_model(
+            state={},
+            runtime=None,
+            config={"configurable": {"thread_id": "parent-thread"}},
+        )
+
+        assert injected is None
+        queued = queue_manager.list_queue("parent-thread")
+        assert len(queued) == 1
+        text = queued[0]["content"]
+        assert "<task-notification>" in text
+        assert f"<run-id>{task_id}</run-id>" in text
+        assert "<status>completed</status>" in text
+        assert "Finished indexing" in text
+    finally:
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_mixed_success_and_init_failure_background_agents_queue_both_terminal_notifications(tmp_path, monkeypatch):
+    created = 0
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        nonlocal created
+        created += 1
+        if created == 1:
+            return _CompleteChildAgent("GOOD:BASE:2")
+        return _FailingInitChildAgent(RuntimeError("bad child init"))
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    service = AgentService(
+        tool_registry=registry,
+        agent_registry=_FakeAgentRegistry(),
+        workspace_root=Path(tmp_path),
+        model_name="gpt-test",
+        queue_manager=queue_manager,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw_good = await service._handle_agent(
+            prompt="good child",
+            name="good-child",
+            description="good child",
+            run_in_background=True,
+        )
+        raw_bad = await service._handle_agent(
+            prompt="bad child",
+            name="bad-child",
+            description="bad child",
+            run_in_background=True,
+        )
+
+        await asyncio.wait_for(service._tasks[_agent_tool_json(raw_good)["task_id"]].task, timeout=1)
+        with pytest.raises(RuntimeError, match="bad child init"):
+            await asyncio.wait_for(service._tasks[_agent_tool_json(raw_bad)["task_id"]].task, timeout=1)
+
+        queued = queue_manager.list_queue("parent-thread")
+
+        assert len(queued) == 2
+        contents = [item["content"] for item in queued]
+        assert any("<status>completed</status>" in content and "GOOD:BASE:2" in content for content in contents)
+        assert any("<status>error</status>" in content and "Agent failed" in content for content in contents)
+    finally:
+        set_current_thread_id("")
+
+
+def test_terminal_background_notification_waits_for_followup_run_during_owner_turn(tmp_path):
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        "parent-thread",
+        notification_type="agent",
+        source="system",
+    )
+
+    runtime = type("_Runtime", (), {"current_run_source": "owner"})()
+    injected = SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "parent-thread"}},
+    )
+
+    assert injected is None
+    queued = queue_manager.list_queue("parent-thread")
+    assert len(queued) == 1
+    assert "<task-notification>" in queued[0]["content"]
+
+
+def test_terminal_background_notification_waits_for_followup_run_during_system_turn(tmp_path):
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>completed</status><result>BG1:STEP1:2</result></task-notification></system-reminder>",
+        "parent-thread",
+        notification_type="agent",
+        source="system",
+    )
+
+    runtime = type("_Runtime", (), {"current_run_source": "system"})()
+    injected = SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "parent-thread"}},
+    )
+
+    assert injected is None
+    queued = queue_manager.list_queue("parent-thread")
+    assert len(queued) == 1
+    assert "<task-notification>" in queued[0]["content"]
+
+
+def test_steer_injection_emits_phase_boundary_events(tmp_path):
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop the current plan and summarize status.",
+        "parent-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+
+    class _Runtime:
+        def __init__(self) -> None:
+            self.events: list[dict[str, str]] = []
+
+        def emit_activity_event(self, event: dict[str, str]) -> None:
+            self.events.append(event)
+
+    runtime = _Runtime()
+    injected = SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime).before_model(
+        state={},
+        runtime=None,
+        config={"configurable": {"thread_id": "parent-thread"}},
+    )
+
+    assert injected is not None
+    assert str(injected["messages"][0].content) == "Stop the current plan and summarize status."
+    assert [event["event"] for event in runtime.events] == ["run_done", "run_start"]
diff --git a/tests/test_followup_requeue.py b/tests/Fix/test_followup_requeue.py
similarity index 97%
rename from tests/test_followup_requeue.py
rename to tests/Fix/test_followup_requeue.py
index 7a798aa7d..f19fa1b68 100644
--- a/tests/test_followup_requeue.py
+++ b/tests/Fix/test_followup_requeue.py
@@ -192,7 +192,7 @@ async def _run():
         asyncio.run(_run())
 
     def test_transition_failure_skips_start(self, mock_agent, mock_app, queue_manager):
-        """When runtime.transition returns False, start_agent_run is not called."""
+        """When runtime.transition returns False, followup stays queued."""
         queue_manager.enqueue("wont run", "thread-1")
         mock_agent.runtime.transition.return_value = False
 
@@ -203,7 +203,8 @@ async def _run():
                 await _consume_followup_queue(mock_agent, "thread-1", mock_app)
                 mock_start.assert_not_called()
 
-            # Message was consumed (dequeued) but not re-enqueued since no exception
-            assert queue_manager.dequeue("thread-1") is None
+            item = queue_manager.dequeue("thread-1")
+            assert item is not None
+            assert item.content == "wont run"
 
         asyncio.run(_run())
diff --git a/tests/Fix/test_monitor_resource_overview_uniqueness.py b/tests/Fix/test_monitor_resource_overview_uniqueness.py
new file mode 100644
index 000000000..dfcf08ba8
--- /dev/null
+++ b/tests/Fix/test_monitor_resource_overview_uniqueness.py
@@ -0,0 +1,323 @@
+from backend.web.services import resource_service
+
+
+class _FakeRepo:
+    def __init__(self, rows, lease_threads=None):
+        self._rows = rows
+        self._lease_threads = lease_threads or {}
+
+    def list_sessions_with_leases(self):
+        return list(self._rows)
+
+    def query_lease_threads(self, lease_id: str):
+        return [{"thread_id": tid} for tid in self._lease_threads.get(lease_id, [])]
+
+    def close(self):
+        pass
+
+
+class _FakeThreadRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def get_by_id(self, thread_id: str):
+        return self._rows.get(thread_id)
+
+    def close(self):
+        pass
+
+
+class _FakeMember:
+    def __init__(self, member_id: str, name: str, avatar: str | None = None):
+        self.id = member_id
+        self.name = name
+        self.avatar = avatar
+
+
+class _FakeMemberRepo:
+    def __init__(self, members):
+        self._members = members
+
+    def list_all(self):
+        return list(self._members)
+
+    def close(self):
+        pass
+
+
+def test_list_resource_providers_deduplicates_terminal_fallback_rows(monkeypatch):
+    rows = [
+        {
+            "provider": "local",
+            "session_id": None,
+            "thread_id": "thread-1",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+        {
+            "provider": "local",
+            "session_id": None,
+            "thread_id": "thread-1",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "local", "available": True}],
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Toad", "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    local = payload["providers"][0]
+
+    assert local["telemetry"]["running"]["used"] == 1
+    assert local["sessions"] == [
+        {
+            "id": "lease-1:thread-1",
+            "leaseId": "lease-1",
+            "threadId": "thread-1",
+            "memberId": "member-1",
+            "memberName": "Toad",
+            "avatarUrl": None,
+            "status": "running",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
+
+
+def test_list_resource_providers_resolves_owner_metadata_from_runtime_storage(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona",
+            "session_id": "sess-1",
+            "thread_id": "thread-supabase",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "build_thread_repo",
+        lambda **_kwargs: _FakeThreadRepo({"thread-supabase": {"member_id": "member-1"}}),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "build_member_repo",
+        lambda **_kwargs: _FakeMemberRepo([_FakeMember("member-1", "Toad")]),
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+
+    assert payload["providers"][0]["sessions"] == [
+        {
+            "id": "lease-1:thread-supabase",
+            "leaseId": "lease-1",
+            "threadId": "thread-supabase",
+            "memberId": "member-1",
+            "memberName": "Toad",
+            "avatarUrl": None,
+            "status": "running",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
+
+
+def test_list_resource_providers_hides_subagent_threads(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona",
+            "session_id": "sess-parent",
+            "thread_id": "thread-parent",
+            "lease_id": "lease-parent",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+        {
+            "provider": "daytona",
+            "session_id": "sess-child",
+            "thread_id": "subagent-deadbeef",
+            "lease_id": "lease-child",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:01",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": tid, "member_name": tid, "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    sessions = payload["providers"][0]["sessions"]
+
+    assert [session["threadId"] for session in sessions] == ["thread-parent"]
+    assert payload["summary"]["running_sessions"] == 1
+
+
+def test_list_resource_providers_projects_visible_parent_when_raw_monitor_row_is_subagent(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona_selfhost",
+            "session_id": None,
+            "thread_id": "subagent-deadbeef",
+            "lease_id": "lease-1",
+            "observed_state": "paused",
+            "desired_state": "paused",
+            "created_at": "2026-04-04T00:00:00",
+        },
+    ]
+
+    monkeypatch.setattr(
+        resource_service,
+        "make_sandbox_monitor_repo",
+        lambda: _FakeRepo(rows, lease_threads={"lease-1": ["subagent-deadbeef", "thread-parent"]}),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona_selfhost", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Morel", "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    sessions = payload["providers"][0]["sessions"]
+
+    assert sessions == [
+        {
+            "id": "lease-1:thread-parent",
+            "leaseId": "lease-1",
+            "threadId": "thread-parent",
+            "memberId": "member-1",
+            "memberName": "Morel",
+            "avatarUrl": None,
+            "status": "paused",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
+
+
+def test_list_resource_providers_deduplicates_same_lease_thread_even_with_distinct_session_ids(monkeypatch):
+    rows = [
+        {
+            "provider": "daytona_selfhost",
+            "session_id": "sess-a",
+            "thread_id": "thread-parent",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:00",
+        },
+        {
+            "provider": "daytona_selfhost",
+            "session_id": "sess-b",
+            "thread_id": "thread-parent",
+            "lease_id": "lease-1",
+            "observed_state": "running",
+            "desired_state": "running",
+            "created_at": "2026-04-04T00:00:01",
+        },
+    ]
+
+    monkeypatch.setattr(resource_service, "make_sandbox_monitor_repo", lambda: _FakeRepo(rows))
+    monkeypatch.setattr(
+        resource_service,
+        "available_sandbox_types",
+        lambda: [{"name": "daytona_selfhost", "available": True}],
+    )
+    monkeypatch.setattr(resource_service, "resolve_provider_name", lambda *_args, **_kwargs: "daytona")
+    monkeypatch.setattr(resource_service, "_resolve_console_url", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        resource_service,
+        "_resolve_instance_capabilities",
+        lambda _config_name: (resource_service._empty_capabilities(), None),
+    )
+    monkeypatch.setattr(
+        resource_service,
+        "_thread_owners",
+        lambda thread_ids: {tid: {"member_id": "member-1", "member_name": "Toad", "avatar_url": None} for tid in thread_ids},
+    )
+    monkeypatch.setattr(resource_service, "list_resource_snapshots", lambda _lease_ids: {})
+
+    payload = resource_service.list_resource_providers()
+    sessions = payload["providers"][0]["sessions"]
+
+    assert sessions == [
+        {
+            "id": "lease-1:thread-parent",
+            "leaseId": "lease-1",
+            "threadId": "thread-parent",
+            "memberId": "member-1",
+            "memberName": "Toad",
+            "avatarUrl": None,
+            "status": "running",
+            "startedAt": "2026-04-04T00:00:00",
+            "metrics": None,
+        }
+    ]
diff --git a/tests/Fix/test_panel_auth_shell_coherence.py b/tests/Fix/test_panel_auth_shell_coherence.py
new file mode 100644
index 000000000..5a915b3c0
--- /dev/null
+++ b/tests/Fix/test_panel_auth_shell_coherence.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from backend.web.routers import panel as panel_router
+from backend.web.services import member_service, profile_service
+from storage.contracts import MemberRow, MemberType
+
+
+@pytest.mark.asyncio
+async def test_panel_members_uses_injected_member_repo_for_owner_scope(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+    now = 1_775_278_000.0
+    agent = MemberRow(
+        id="agent-1",
+        name="Toad",
+        type=MemberType.MYCEL_AGENT,
+        owner_user_id="user-1",
+        created_at=now,
+    )
+    seen: list[str] = []
+    monkeypatch.setattr(
+        member_service,
+        "_member_to_dict",
+        lambda _member_dir: {
+            "id": "agent-1",
+            "name": "Toad",
+            "avatar_url": "avatars/agent-1.png",
+            "config": {},
+        },
+    )
+    member_dir = tmp_path / "agent-1"
+    member_dir.mkdir()
+    (member_dir / "agent.md").write_text("stub", encoding="utf-8")
+    monkeypatch.setattr(member_service, "MEMBERS_DIR", tmp_path)
+
+    fake_repo = SimpleNamespace(
+        list_by_owner_user_id=lambda owner_user_id: seen.append(owner_user_id) or [agent],
+    )
+
+    result = await panel_router.list_members(
+        user_id="user-1",
+        request=SimpleNamespace(app=SimpleNamespace(state=SimpleNamespace(member_repo=fake_repo))),
+    )
+
+    assert seen == ["user-1"]
+    assert result["items"] == [{"id": "agent-1", "name": "Toad", "avatar_url": "avatars/agent-1.png", "config": {}}]
+
+
+def test_profile_service_prefers_authenticated_member_over_config_defaults():
+    member = MemberRow(
+        id="user-1",
+        name="codex",
+        type=MemberType.HUMAN,
+        email="codex@example.com",
+        created_at=1.0,
+    )
+
+    profile = profile_service.get_profile(member=member)
+
+    assert profile == {"name": "codex", "initials": "CO", "email": "codex@example.com"}
+
+
+def test_builtin_member_surface_exposes_chat_tools():
+    member = member_service._leon_builtin()
+    tools = {item["name"]: item for item in member["config"]["tools"]}
+
+    for tool_name in ("list_chats", "read_messages", "send_message", "search_messages"):
+        assert tool_name in tools
+        assert tools[tool_name]["enabled"] is True
+        assert tools[tool_name]["group"] == "chat"
+
+    for removed_name in ("chats", "read_message", "search_message", "directory", "wechat_send", "wechat_contacts"):
+        assert removed_name not in tools
diff --git a/tests/Fix/test_sandbox_provider_availability.py b/tests/Fix/test_sandbox_provider_availability.py
new file mode 100644
index 000000000..5b12fb2b6
--- /dev/null
+++ b/tests/Fix/test_sandbox_provider_availability.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+from backend.web.services import sandbox_service
+from sandbox.providers.local import LocalSessionProvider
+
+
+def test_available_sandbox_types_marks_configured_but_unavailable_provider(monkeypatch, tmp_path: Path) -> None:
+    local_provider = LocalSessionProvider(default_cwd=str(tmp_path))
+    (tmp_path / "daytona.json").write_text("{}")
+
+    monkeypatch.setattr(sandbox_service, "SANDBOXES_DIR", tmp_path)
+    monkeypatch.setattr(
+        sandbox_service,
+        "init_providers_and_managers",
+        lambda: ({"local": local_provider}, {}),
+    )
+    monkeypatch.setattr(
+        sandbox_service.SandboxConfig,
+        "load",
+        classmethod(lambda cls, name: SimpleNamespace(provider="daytona", name=name)),
+    )
+
+    types = sandbox_service.available_sandbox_types()
+    daytona = next(item for item in types if item["name"] == "daytona")
+
+    assert daytona["provider"] == "daytona"
+    assert daytona["available"] is False
+    assert "unavailable in the current process" in daytona["reason"]
+
+
+def test_available_sandbox_types_marks_e2b_unavailable_when_sdk_missing(monkeypatch, tmp_path: Path) -> None:
+    local_provider = LocalSessionProvider(default_cwd=str(tmp_path))
+    (tmp_path / "e2b.json").write_text("{}")
+
+    monkeypatch.setattr(sandbox_service, "SANDBOXES_DIR", tmp_path)
+    monkeypatch.setattr(
+        sandbox_service,
+        "init_providers_and_managers",
+        lambda: ({"local": local_provider}, {}),
+    )
+    monkeypatch.setattr(
+        sandbox_service.SandboxConfig,
+        "load",
+        classmethod(lambda cls, name: SimpleNamespace(provider="e2b", name=name)),
+    )
+
+    types = sandbox_service.available_sandbox_types()
+    e2b = next(item for item in types if item["name"] == "e2b")
+
+    assert e2b["provider"] == "e2b"
+    assert e2b["available"] is False
+    assert "unavailable in the current process" in e2b["reason"]
+
+
+def test_build_providers_and_managers_passes_agentbay_pause_capability_overrides(monkeypatch, tmp_path: Path) -> None:
+    (tmp_path / "agentbay.json").write_text("{}")
+    monkeypatch.setattr(sandbox_service, "SANDBOXES_DIR", tmp_path)
+
+    captured: dict[str, object] = {}
+
+    class _FakeAgentBayProvider:
+        def __init__(self, **kwargs) -> None:
+            captured.update(kwargs)
+            self.name = kwargs["provider_name"]
+
+        def get_capability(self):
+            return SimpleNamespace(can_pause=False, can_resume=False, can_destroy=True)
+
+    class _FakeSandboxManager:
+        def __init__(self, provider, db_path=None) -> None:
+            self.provider = provider
+            self.db_path = db_path
+
+    monkeypatch.setattr(sandbox_service, "SandboxManager", _FakeSandboxManager)
+    monkeypatch.setattr(
+        sandbox_service.SandboxConfig,
+        "load",
+        classmethod(
+            lambda cls, name: SimpleNamespace(
+                provider="agentbay",
+                agentbay=SimpleNamespace(
+                    api_key="test-key",
+                    region_id="ap-southeast-1",
+                    context_path="/home/wuying",
+                    image_id=None,
+                    supports_pause=False,
+                    supports_resume=False,
+                ),
+            )
+        ),
+    )
+
+    import sandbox.providers.agentbay as agentbay_module
+
+    monkeypatch.setattr(agentbay_module, "AgentBayProvider", _FakeAgentBayProvider)
+
+    providers, managers = sandbox_service._build_providers_and_managers()
+
+    assert "agentbay" in providers
+    assert "agentbay" in managers
+    assert captured["supports_pause"] is False
+    assert captured["supports_resume"] is False
diff --git a/tests/Fix/test_sandbox_user_leases.py b/tests/Fix/test_sandbox_user_leases.py
new file mode 100644
index 000000000..158fa423f
--- /dev/null
+++ b/tests/Fix/test_sandbox_user_leases.py
@@ -0,0 +1,117 @@
+from types import SimpleNamespace
+
+from backend.web.services import sandbox_service
+
+
+class _FakeMonitorRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def list_leases_with_threads(self):
+        return list(self._rows)
+
+    def close(self):
+        pass
+
+
+class _FakeThreadRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def get_by_id(self, thread_id: str):
+        return self._rows.get(thread_id)
+
+    def close(self):
+        pass
+
+
+class _FakeMemberRepo:
+    def __init__(self, rows):
+        self._rows = rows
+
+    def get_by_id(self, member_id: str):
+        return self._rows.get(member_id)
+
+    def close(self):
+        pass
+
+
+def test_list_user_leases_hides_subagent_threads_and_deduplicates_visible_agents(monkeypatch):
+    rows = [
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe_json": None,
+            "observed_state": "running",
+            "desired_state": "running",
+            "cwd": "/home/daytona/files/app",
+            "thread_id": "thread-parent",
+        },
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe_json": None,
+            "observed_state": "running",
+            "desired_state": "running",
+            "cwd": "/home/daytona/files/app",
+            "thread_id": "subagent-deadbeef",
+        },
+    ]
+    thread_repo = _FakeThreadRepo(
+        {
+            "thread-parent": {"member_id": "member-1"},
+            "subagent-deadbeef": {"member_id": "member-1"},
+        }
+    )
+    member_repo = _FakeMemberRepo(
+        {
+            "member-1": SimpleNamespace(id="member-1", name="Morel", avatar="x", owner_user_id="owner-1"),
+        }
+    )
+
+    monkeypatch.setattr(sandbox_service, "make_sandbox_monitor_repo", lambda: _FakeMonitorRepo(rows))
+
+    leases = sandbox_service.list_user_leases(
+        "owner-1",
+        thread_repo=thread_repo,
+        member_repo=member_repo,
+    )
+
+    assert leases == [
+        {
+            "lease_id": "lease-1",
+            "provider_name": "daytona_selfhost",
+            "recipe_id": "daytona:default",
+            "recipe": {
+                "id": "daytona:default",
+                "name": "Daytona Default",
+                "desc": "Default recipe for daytona",
+                "provider_type": "daytona",
+                "features": {"lark_cli": False},
+                "configurable_features": {"lark_cli": True},
+                "feature_options": [
+                    {
+                        "key": "lark_cli",
+                        "name": "Lark CLI",
+                        "description": "在 sandbox 初始化时懒安装并校验。",
+                        "icon": "feishu",
+                    }
+                ],
+                "builtin": True,
+            },
+            "observed_state": "running",
+            "desired_state": "running",
+            "cwd": "/home/daytona/files/app",
+            "thread_ids": ["thread-parent"],
+            "agents": [
+                {
+                    "member_id": "member-1",
+                    "member_name": "Morel",
+                    "avatar_url": "/api/members/member-1/avatar",
+                }
+            ],
+            "recipe_name": "Daytona Default",
+        }
+    ]
diff --git a/tests/test_session_file_operations_cleanup.py b/tests/Fix/test_session_file_operations_cleanup.py
similarity index 100%
rename from tests/test_session_file_operations_cleanup.py
rename to tests/Fix/test_session_file_operations_cleanup.py
diff --git a/tests/test_storage_import_boundary.py b/tests/Fix/test_storage_import_boundary.py
similarity index 100%
rename from tests/test_storage_import_boundary.py
rename to tests/Fix/test_storage_import_boundary.py
diff --git a/tests/Fix/test_thread_request_model.py b/tests/Fix/test_thread_request_model.py
new file mode 100644
index 000000000..1bfe188be
--- /dev/null
+++ b/tests/Fix/test_thread_request_model.py
@@ -0,0 +1,25 @@
+from backend.web.models.requests import CreateThreadRequest
+
+
+def test_create_thread_request_accepts_legacy_sandbox_type_key() -> None:
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox_type": "daytona_selfhost",
+            "model": "gpt-5.4-mini",
+        }
+    )
+
+    assert payload.sandbox == "daytona_selfhost"
+
+
+def test_create_thread_request_prefers_primary_sandbox_key() -> None:
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox": "local",
+            "sandbox_type": "daytona_selfhost",
+        }
+    )
+
+    assert payload.sandbox == "local"
diff --git a/tests/Integration/test_auth_router.py b/tests/Integration/test_auth_router.py
new file mode 100644
index 000000000..51d2f9ee2
--- /dev/null
+++ b/tests/Integration/test_auth_router.py
@@ -0,0 +1,170 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+from fastapi import HTTPException
+
+from backend.web.routers import auth as auth_router
+from backend.web.routers import chats as chats_router
+
+
+class _FakeAuthService:
+    def __init__(self) -> None:
+        self.send_otp_calls: list[tuple[str, str, str]] = []
+        self.verify_otp_calls: list[tuple[str, str]] = []
+        self.complete_register_calls: list[tuple[str, str]] = []
+        self.login_calls: list[tuple[str, str]] = []
+        self.verify_otp_result = {"temp_token": "temp-otp"}
+        self.complete_register_result = {"token": "tok-register"}
+        self.login_result = {"token": "tok-login"}
+        self.send_otp_error: Exception | None = None
+        self.verify_otp_error: Exception | None = None
+        self.complete_register_error: Exception | None = None
+        self.login_error: Exception | None = None
+
+    def send_otp(self, email: str, password: str, invite_code: str) -> None:
+        self.send_otp_calls.append((email, password, invite_code))
+        if self.send_otp_error is not None:
+            raise self.send_otp_error
+
+    def verify_register_otp(self, email: str, token: str) -> dict:
+        self.verify_otp_calls.append((email, token))
+        if self.verify_otp_error is not None:
+            raise self.verify_otp_error
+        return self.verify_otp_result
+
+    def complete_register(self, temp_token: str, invite_code: str) -> dict:
+        self.complete_register_calls.append((temp_token, invite_code))
+        if self.complete_register_error is not None:
+            raise self.complete_register_error
+        return self.complete_register_result
+
+    def login(self, identifier: str, password: str) -> dict:
+        self.login_calls.append((identifier, password))
+        if self.login_error is not None:
+            raise self.login_error
+        return self.login_result
+
+
+@pytest.mark.asyncio
+async def test_send_otp_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.send_otp(
+        auth_router.SendOtpRequest(email="fresh@example.com", password="pass1234", invite_code="invite-1"),
+        app,
+    )
+
+    assert result == {"ok": True}
+    assert service.send_otp_calls == [("fresh@example.com", "pass1234", "invite-1")]
+
+
+@pytest.mark.asyncio
+async def test_send_otp_maps_value_error_to_bad_request():
+    service = _FakeAuthService()
+    service.send_otp_error = ValueError("邀请码无效或已过期")
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    with pytest.raises(HTTPException) as exc_info:
+        await auth_router.send_otp(
+            auth_router.SendOtpRequest(email="fresh@example.com", password="pass1234", invite_code="invite-1"),
+            app,
+        )
+
+    assert exc_info.value.status_code == 400
+    assert "邀请码无效" in str(exc_info.value.detail)
+
+
+@pytest.mark.asyncio
+async def test_verify_otp_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.verify_otp(
+        auth_router.VerifyOtpRequest(email="fresh@example.com", token="123456"),
+        app,
+    )
+
+    assert result == {"temp_token": "temp-otp"}
+    assert service.verify_otp_calls == [("fresh@example.com", "123456")]
+
+
+@pytest.mark.asyncio
+async def test_complete_register_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.complete_register(
+        auth_router.CompleteRegisterRequest(temp_token="temp-otp", invite_code="invite-1"),
+        app,
+    )
+
+    assert result == {"token": "tok-register"}
+    assert service.complete_register_calls == [("temp-otp", "invite-1")]
+
+
+@pytest.mark.asyncio
+async def test_login_calls_auth_service_directly():
+    service = _FakeAuthService()
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    result = await auth_router.login(auth_router.LoginRequest(identifier="fresh@example.com", password="pass1234"), app)
+
+    assert result == {"token": "tok-login"}
+    assert service.login_calls == [("fresh@example.com", "pass1234")]
+
+
+@pytest.mark.asyncio
+async def test_login_maps_value_error_to_unauthorized():
+    service = _FakeAuthService()
+    service.login_error = ValueError("Invalid username or password")
+    app = SimpleNamespace(state=SimpleNamespace(auth_service=service))
+
+    with pytest.raises(HTTPException) as exc_info:
+        await auth_router.login(auth_router.LoginRequest(identifier="fresh@example.com", password="pass1234"), app)
+
+    assert exc_info.value.status_code == 401
+    assert "Invalid username or password" in str(exc_info.value.detail)
+
+
+class _VerifyOnlyAuthService:
+    def __init__(self) -> None:
+        self.tokens: list[str] = []
+
+    def verify_token(self, token: str) -> dict:
+        self.tokens.append(token)
+        return {"user_id": "user-1"}
+
+
+@pytest.mark.asyncio
+async def test_chat_events_requires_token():
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            auth_service=_VerifyOnlyAuthService(),
+            chat_event_bus=SimpleNamespace(subscribe=lambda _chat_id: None),
+        )
+    )
+
+    with pytest.raises(HTTPException) as exc_info:
+        await chats_router.stream_chat_events("chat-1", token=None, app=app)
+
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail == "Missing token"
+
+
+@pytest.mark.asyncio
+async def test_chat_events_verifies_provided_token():
+    auth_service = _VerifyOnlyAuthService()
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            auth_service=auth_service,
+            chat_event_bus=SimpleNamespace(subscribe=lambda _chat_id: None),
+        )
+    )
+
+    response = await chats_router.stream_chat_events("chat-1", token="tok-chat", app=app)
+
+    assert auth_service.tokens == ["tok-chat"]
+    assert response.media_type == "text/event-stream"
diff --git a/tests/Integration/test_child_thread_live_bridge.py b/tests/Integration/test_child_thread_live_bridge.py
new file mode 100644
index 000000000..84d1d26d7
--- /dev/null
+++ b/tests/Integration/test_child_thread_live_bridge.py
@@ -0,0 +1,635 @@
+from __future__ import annotations
+
+import asyncio
+import json
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+
+from backend.web.routers import threads as threads_router
+from backend.web.services.display_builder import DisplayBuilder
+from backend.web.services.event_buffer import ThreadEventBuffer
+from backend.web.services.streaming_service import run_child_thread_live
+from backend.web.utils.serializers import serialize_message
+from core.runtime.middleware.monitor import AgentState
+from core.runtime.middleware.queue.manager import MessageQueueManager
+
+
+class _FakeRuntime:
+    def __init__(self) -> None:
+        self.current_state = AgentState.IDLE
+        self._event_callback = None
+        self._activity_sink = None
+        self.state = SimpleNamespace(flags=SimpleNamespace(is_compacting=False))
+        self.calls = 0
+        self.tokens = 0
+        self.cost = 0.0
+        self.ctx_percent = 0.0
+
+    def transition(self, new_state: AgentState) -> bool:
+        self.current_state = new_state
+        return True
+
+    def set_event_callback(self, callback) -> None:
+        self._event_callback = callback
+
+    def bind_thread(self, activity_sink) -> None:
+        self._activity_sink = activity_sink
+
+    def unbind_thread(self) -> None:
+        self._activity_sink = None
+
+    def get_compact_dict(self) -> dict:
+        return {
+            "state": self.current_state.value,
+            "tokens": self.tokens,
+            "cost": self.cost,
+            "calls": self.calls,
+            "ctx_percent": self.ctx_percent,
+        }
+
+    def get_status_dict(self) -> dict:
+        return {
+            "state": {"state": self.current_state.value, "flags": {}},
+            "tokens": {"total": self.tokens},
+            "context": {"percent": self.ctx_percent},
+            "calls": self.calls,
+            "cost": self.cost,
+        }
+
+
+class _BlockingChildGraph:
+    def __init__(self) -> None:
+        self.messages: list = []
+        self.started = asyncio.Event()
+        self.release = asyncio.Event()
+        self.system_prompt = None
+
+    async def aget_state(self, _config):
+        return SimpleNamespace(values={"messages": list(self.messages)})
+
+    async def aupdate_state(self, _config, input_data, as_node=None):
+        self.messages.extend(input_data.get("messages", []))
+
+    async def astream(self, input_data, config=None, stream_mode=None):
+        if input_data is not None:
+            self.messages.extend(input_data.get("messages", []))
+        self.started.set()
+        await self.release.wait()
+        yield ("messages", (SimpleNamespace(__class__=SimpleNamespace(__name__="AIMessageChunk")), {}))
+        ai = AIMessage(content="CHILD_DONE")
+        ai.id = "ai-child-1"
+        self.messages.append(ai)
+        yield ("updates", {"agent": {"messages": [ai]}})
+
+
+class _BlockingChildAgent:
+    def __init__(self) -> None:
+        self.runtime = _FakeRuntime()
+        self.agent = _BlockingChildGraph()
+
+
+def _prime_agent_turn(
+    builder: DisplayBuilder,
+    thread_id: str,
+    *,
+    tool_call_id: str = "tc-agent-1",
+    args: dict | None = None,
+    run_id: str = "run-1",
+) -> None:
+    builder.apply_event(
+        thread_id,
+        "run_start",
+        {"run_id": run_id, "source": "owner", "showing": True},
+    )
+    builder.apply_event(
+        thread_id,
+        "tool_call",
+        {
+            "id": tool_call_id,
+            "name": "Agent",
+            "args": args or {"prompt": "do work"},
+            "showing": True,
+        },
+    )
+
+
+def _set_single_subagent_entry(
+    builder: DisplayBuilder,
+    thread_id: str,
+    *,
+    task_id: str,
+    thread_ref: str,
+    status: str,
+    result: str,
+    description: str = "inspect workspace",
+) -> None:
+    builder.set_entries(
+        thread_id,
+        [
+            {"id": "u1", "role": "user", "content": "do work", "timestamp": 1},
+            {
+                "id": "a1",
+                "role": "assistant",
+                "timestamp": 2,
+                "segments": [
+                    {
+                        "type": "tool",
+                        "step": {
+                            "id": "call-agent-1",
+                            "name": "Agent",
+                            "args": {"description": description},
+                            "status": "done",
+                            "result": result,
+                            "subagent_stream": {
+                                "task_id": task_id,
+                                "thread_id": thread_ref,
+                                "description": description,
+                                "text": "",
+                                "tool_calls": [],
+                                "status": status,
+                            },
+                        },
+                    }
+                ],
+            },
+        ],
+    )
+
+
+def _make_router_app(
+    builder: DisplayBuilder,
+    thread_id: str,
+    monkeypatch: pytest.MonkeyPatch,
+) -> SimpleNamespace:
+    fake_agent = SimpleNamespace(runtime=SimpleNamespace(current_state=AgentState.ACTIVE), agent=SimpleNamespace(aget_state=None))
+    monkeypatch.setattr(threads_router, "get_or_create_agent", AsyncMock(return_value=fake_agent))
+    return SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=builder,
+            agent_pool={},
+            thread_sandbox={thread_id: "local"},
+        )
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_child_thread_live_rebinds_from_parent_sink_and_surfaces_runtime_and_detail_before_completion():
+    child_thread_id = "subagent-live-1"
+    agent = _BlockingChildAgent()
+    parent_events: list[dict] = []
+
+    async def _parent_sink(event: dict) -> None:
+        parent_events.append(event)
+
+    agent.runtime.bind_thread(_parent_sink)
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            queue_manager=MessageQueueManager(),
+            _event_loop=asyncio.get_running_loop(),
+            thread_event_buffers={},
+            thread_tasks={},
+            thread_last_active={},
+            agent_pool={},
+            thread_sandbox={child_thread_id: "local"},
+            thread_cwd={},
+            thread_repo=SimpleNamespace(get_by_id=lambda thread_id: {"model": "gpt-live"} if thread_id == child_thread_id else None),
+        )
+    )
+
+    task = asyncio.create_task(
+        run_child_thread_live(
+            agent,
+            child_thread_id,
+            "child prompt",
+            app,
+            input_messages=[HumanMessage(content="child prompt")],
+        )
+    )
+
+    await agent.agent.started.wait()
+
+    runtime = await threads_router.get_thread_runtime(child_thread_id, stream=False, user_id="owner-1", app=app)
+    detail = await threads_router.get_thread_messages(child_thread_id, user_id="owner-1", app=app)
+
+    assert runtime["state"]["state"] == "active"
+    assert detail["entries"]
+    assert detail["entries"][0]["role"] == "user"
+    assert detail["entries"][0]["content"] == "child prompt"
+    assert isinstance(app.state.thread_event_buffers[child_thread_id], ThreadEventBuffer)
+    assert app.state.agent_pool[f"{child_thread_id}:local"] is agent
+    assert agent.runtime._activity_sink is not _parent_sink
+    assert parent_events == []
+
+    agent.agent.release.set()
+    result = await task
+
+    assert result == "CHILD_DONE"
+
+
+@pytest.mark.asyncio
+async def test_run_child_thread_live_raises_when_child_run_emits_error_event(monkeypatch):
+    child_thread_id = "subagent-live-error"
+    agent = _BlockingChildAgent()
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            queue_manager=MessageQueueManager(),
+            _event_loop=asyncio.get_running_loop(),
+            thread_event_buffers={},
+            thread_tasks={},
+            thread_last_active={},
+            agent_pool={},
+            thread_sandbox={child_thread_id: "local"},
+            thread_cwd={},
+            thread_repo=SimpleNamespace(get_by_id=lambda thread_id: {"model": "gpt-live"} if thread_id == child_thread_id else None),
+        )
+    )
+
+    def fake_start_agent_run(agent, thread_id, message, app, enable_trajectory=False, message_metadata=None, input_messages=None):
+        async def _fake_run():
+            thread_buf = app.state.thread_event_buffers[thread_id]
+            await thread_buf.put({"event": "error", "data": json.dumps({"error": "child model init failed"})})
+            return ""
+
+        app.state.thread_tasks[thread_id] = asyncio.create_task(_fake_run())
+        return "run-error-1"
+
+    monkeypatch.setattr("backend.web.services.streaming_service.start_agent_run", fake_start_agent_run)
+
+    with pytest.raises(RuntimeError, match="child model init failed"):
+        await run_child_thread_live(
+            agent,
+            child_thread_id,
+            "child prompt",
+            app,
+            input_messages=[HumanMessage(content="child prompt")],
+        )
+
+
+@pytest.mark.asyncio
+async def test_run_child_thread_live_raises_when_child_never_makes_a_model_call(monkeypatch):
+    child_thread_id = "subagent-live-no-call"
+    agent = _BlockingChildAgent()
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            queue_manager=MessageQueueManager(),
+            _event_loop=asyncio.get_running_loop(),
+            thread_event_buffers={},
+            thread_tasks={},
+            thread_last_active={},
+            agent_pool={},
+            thread_sandbox={child_thread_id: "local"},
+            thread_cwd={},
+            thread_repo=SimpleNamespace(get_by_id=lambda thread_id: {"model": "gpt-live"} if thread_id == child_thread_id else None),
+        )
+    )
+
+    def fake_start_agent_run(agent, thread_id, message, app, enable_trajectory=False, message_metadata=None, input_messages=None):
+        async def _fake_run():
+            return ""
+
+        app.state.thread_tasks[thread_id] = asyncio.create_task(_fake_run())
+        return "run-no-call-1"
+
+    monkeypatch.setattr("backend.web.services.streaming_service.start_agent_run", fake_start_agent_run)
+
+    with pytest.raises(RuntimeError, match="before first model call"):
+        await run_child_thread_live(
+            agent,
+            child_thread_id,
+            "child prompt",
+            app,
+            input_messages=[HumanMessage(content="child prompt")],
+        )
+
+
+def test_live_tool_result_restores_subagent_stream_from_agent_background_json():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(builder, thread_id, args={"prompt": "do work", "run_in_background": True})
+
+    delta = builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-1",
+            "name": "Agent",
+            "content": (
+                '{"task_id":"task-123","agent_name":"agent-task-123",'
+                '"thread_id":"subagent-task-123","status":"running",'
+                '"message":"Agent started in background. Use TaskOutput to get result."}'
+            ),
+            "metadata": {},
+            "showing": True,
+        },
+    )
+
+    seg = builder.get_entries(thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == "running"
+
+
+def test_live_tool_result_restores_subagent_stream_from_blocking_agent_metadata():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(builder, thread_id)
+
+    delta = builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-1",
+            "name": "Agent",
+            "content": "CHILD_DONE",
+            "metadata": {
+                "task_id": "task-456",
+                "subagent_thread_id": "subagent-task-456",
+                "description": "blocking child",
+            },
+            "showing": True,
+        },
+    )
+
+    seg = builder.get_entries(thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-456"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-456"
+    assert seg["step"]["subagent_stream"]["status"] == "completed"
+
+
+def test_task_start_can_patch_background_agent_after_tool_result_race():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(
+        builder,
+        thread_id,
+        tool_call_id="tc-agent-race",
+        args={"prompt": "do work", "run_in_background": True},
+    )
+    builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-race",
+            "name": "Agent",
+            "content": "Agent started in background.",
+            "metadata": {},
+            "showing": True,
+        },
+    )
+
+    delta = builder.apply_event(
+        thread_id,
+        "task_start",
+        {
+            "task_id": "task-race",
+            "thread_id": "subagent-task-race",
+            "description": "late task start",
+        },
+    )
+
+    seg = builder.get_entries(thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["status"] == "done"
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-race"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-race"
+    assert seg["step"]["subagent_stream"]["status"] == "running"
+
+
+@pytest.mark.parametrize("task_status", ["completed", "error", "cancelled"])
+def test_live_notice_reconciles_subagent_stream_status_from_terminal_notification(task_status: str):
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+    _prime_agent_turn(builder, thread_id, args={"prompt": "do work", "run_in_background": True})
+    builder.apply_event(
+        thread_id,
+        "tool_result",
+        {
+            "tool_call_id": "tc-agent-1",
+            "name": "Agent",
+            "content": (
+                '{"task_id":"task-123","agent_name":"agent-task-123",'
+                '"thread_id":"subagent-task-123","status":"running",'
+                '"message":"Agent started in background. Use TaskOutput to get result."}'
+            ),
+            "metadata": {},
+            "showing": True,
+        },
+    )
+
+    delta = builder.apply_event(
+        thread_id,
+        "notice",
+        {
+            "content": (
+                "<system-reminder>\n"
+                "<task-notification>\n"
+                "  <run-id>task-123</run-id>\n"
+                f"  <status>{task_status}</status>\n"
+                "  <description>child task</description>\n"
+                "  <summary>child task</summary>\n"
+                "  <result>CHILD_DONE</result>\n"
+                "</task-notification>\n"
+                "</system-reminder>"
+            ),
+            "source": "system",
+            "notification_type": "agent",
+        },
+    )
+
+    seg = builder.get_entries(thread_id)[0]["segments"][0]
+    assert delta is not None
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == task_status
+
+
+def test_checkpoint_rebuild_reconciles_subagent_stream_status_from_terminal_notification():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+
+    ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Agent", "args": {"prompt": "do work", "run_in_background": True}, "id": "tc-agent-1"}],
+    )
+    tool = ToolMessage(
+        content=(
+            '{"task_id":"task-123","agent_name":"agent-task-123",'
+            '"thread_id":"subagent-task-123","status":"running",'
+            '"message":"Agent started in background. Use TaskOutput to get result."}'
+        ),
+        name="Agent",
+        tool_call_id="tc-agent-1",
+    )
+    notice = HumanMessage(
+        content=(
+            "<system-reminder>\n"
+            "<task-notification>\n"
+            "  <run-id>task-123</run-id>\n"
+            "  <status>completed</status>\n"
+            "  <description>child task</description>\n"
+            "  <summary>child task</summary>\n"
+            "  <result>CHILD_DONE</result>\n"
+            "</task-notification>\n"
+            "</system-reminder>"
+        )
+    )
+    notice.metadata = {"source": "system", "notification_type": "agent"}
+
+    entries = builder.build_from_checkpoint(
+        thread_id,
+        [serialize_message(ai), serialize_message(tool), serialize_message(notice)],
+    )
+
+    seg = entries[0]["segments"][0]
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == "completed"
+
+
+def test_checkpoint_rebuild_restores_blocking_subagent_stream_from_tool_result_meta():
+    builder = DisplayBuilder()
+    thread_id = "parent-thread"
+
+    ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Agent", "args": {"prompt": "do work"}, "id": "tc-agent-1"}],
+    )
+    tool = ToolMessage(
+        content="CHILD_DONE",
+        name="Agent",
+        tool_call_id="tc-agent-1",
+        additional_kwargs={
+            "tool_result_meta": {
+                "task_id": "task-456",
+                "subagent_thread_id": "subagent-task-456",
+                "description": "blocking child",
+                "kind": "success",
+                "source": "local",
+            }
+        },
+    )
+
+    entries = builder.build_from_checkpoint(
+        thread_id,
+        [serialize_message(ai), serialize_message(tool)],
+    )
+
+    seg = entries[0]["segments"][0]
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-456"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-456"
+    assert seg["step"]["subagent_stream"]["status"] == "completed"
+
+
+@pytest.mark.asyncio
+async def test_list_tasks_includes_subagent_stream_from_display_entries():
+    thread_id = "parent-thread-tasks"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-123",
+        thread_ref="subagent-task-123",
+        status="completed",
+        result="workspace looks empty",
+    )
+    monkeypatch = pytest.MonkeyPatch()
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    tasks = await threads_router.list_tasks(thread_id, request=SimpleNamespace(app=app))
+
+    assert tasks == [
+        {
+            "task_id": "task-123",
+            "task_type": "agent",
+            "status": "completed",
+            "command_line": None,
+            "description": "inspect workspace",
+            "exit_code": None,
+            "error": None,
+        }
+    ]
+    monkeypatch.undo()
+
+
+@pytest.mark.asyncio
+async def test_get_task_returns_subagent_stream_result_from_display_entries():
+    thread_id = "parent-thread-task-detail"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-123",
+        thread_ref="subagent-task-123",
+        status="completed",
+        result="workspace looks empty",
+    )
+    monkeypatch = pytest.MonkeyPatch()
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    task = await threads_router.get_task(thread_id, "task-123", request=SimpleNamespace(app=app))
+
+    assert task == {
+        "task_id": "task-123",
+        "task_type": "agent",
+        "status": "completed",
+        "command_line": None,
+        "result": "workspace looks empty",
+        "text": "workspace looks empty",
+    }
+    monkeypatch.undo()
+
+
+@pytest.mark.asyncio
+async def test_blocking_subagent_done_state_overrides_stale_running_stream_on_detail_and_tasks(monkeypatch):
+    thread_id = "parent-thread-stale-running-completed"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-stale-completed",
+        thread_ref="subagent-task-stale-completed",
+        status="running",
+        result="workspace looks empty",
+    )
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    detail = await threads_router.get_thread_messages(thread_id, user_id="owner-1", app=app)
+    tasks = await threads_router.list_tasks(thread_id, request=SimpleNamespace(app=app))
+    task = await threads_router.get_task(thread_id, "task-stale-completed", request=SimpleNamespace(app=app))
+
+    stream = detail["entries"][1]["segments"][0]["step"]["subagent_stream"]
+    assert stream["status"] == "completed"
+    assert tasks[0]["status"] == "completed"
+    assert task["status"] == "completed"
+
+
+@pytest.mark.asyncio
+async def test_blocking_subagent_error_overrides_stale_running_stream_on_detail_and_tasks(monkeypatch):
+    thread_id = "parent-thread-stale-running-error"
+    builder = DisplayBuilder()
+    _set_single_subagent_entry(
+        builder,
+        thread_id,
+        task_id="task-stale-error",
+        thread_ref="subagent-task-stale-error",
+        status="running",
+        result="<tool_use_error>Agent failed: bad child model</tool_use_error>",
+    )
+    app = _make_router_app(builder, thread_id, monkeypatch)
+
+    detail = await threads_router.get_thread_messages(thread_id, user_id="owner-1", app=app)
+    tasks = await threads_router.list_tasks(thread_id, request=SimpleNamespace(app=app))
+    task = await threads_router.get_task(thread_id, "task-stale-error", request=SimpleNamespace(app=app))
+
+    stream = detail["entries"][1]["segments"][0]["step"]["subagent_stream"]
+    assert stream["status"] == "error"
+    assert tasks[0]["status"] == "error"
+    assert task["status"] == "error"
diff --git a/tests/test_daytona_e2e.py b/tests/Integration/test_daytona_e2e.py
similarity index 100%
rename from tests/test_daytona_e2e.py
rename to tests/Integration/test_daytona_e2e.py
diff --git a/tests/test_e2e_backend_api.py b/tests/Integration/test_e2e_backend_api.py
similarity index 100%
rename from tests/test_e2e_backend_api.py
rename to tests/Integration/test_e2e_backend_api.py
diff --git a/tests/test_e2e_providers.py b/tests/Integration/test_e2e_providers.py
similarity index 100%
rename from tests/test_e2e_providers.py
rename to tests/Integration/test_e2e_providers.py
diff --git a/tests/test_e2e_summary_persistence.py b/tests/Integration/test_e2e_summary_persistence.py
similarity index 100%
rename from tests/test_e2e_summary_persistence.py
rename to tests/Integration/test_e2e_summary_persistence.py
diff --git a/tests/Integration/test_entities_router.py b/tests/Integration/test_entities_router.py
new file mode 100644
index 000000000..5e7254497
--- /dev/null
+++ b/tests/Integration/test_entities_router.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from backend.web.routers import entities as entities_router
+from storage.contracts import EntityRow, MemberRow
+
+
+@pytest.mark.asyncio
+async def test_list_entities_excludes_child_agent_branches_from_chat_discovery():
+    now = 1_775_223_756.0
+    user = MemberRow(id="u1", name="owner", type="human", created_at=now)
+    other_human = MemberRow(id="u2", name="other", type="human", created_at=now)
+    main_agent_member = MemberRow(
+        id="a-main",
+        name="Toad",
+        type="mycel_agent",
+        owner_user_id="u2",
+        created_at=now,
+    )
+    child_agent_member = MemberRow(
+        id="a-child",
+        name="Toad Branch",
+        type="mycel_agent",
+        owner_user_id="u2",
+        created_at=now,
+    )
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            entity_repo=SimpleNamespace(
+                list_by_type=lambda entity_type: (
+                    [
+                        EntityRow(id="a-main-1", type="agent", member_id="a-main", name="Toad", thread_id="thread-main", created_at=now),
+                        EntityRow(
+                            id="a-child-1",
+                            type="agent",
+                            member_id="a-child",
+                            name="Toad · 分身1",
+                            thread_id="thread-child",
+                            created_at=now,
+                        ),
+                    ]
+                    if entity_type == "agent"
+                    else []
+                )
+            ),
+            member_repo=SimpleNamespace(list_all=lambda: [user, other_human, main_agent_member, child_agent_member]),
+            thread_repo=SimpleNamespace(
+                get_by_id=lambda thread_id: (
+                    {"is_main": True, "branch_index": 0} if thread_id == "thread-main" else {"is_main": False, "branch_index": 1}
+                )
+            ),
+        )
+    )
+
+    result = await entities_router.list_entities(user_id="u1", app=app)
+
+    assert [item["id"] for item in result] == ["u2", "a-main-1"]
diff --git a/tests/Integration/test_leon_agent.py b/tests/Integration/test_leon_agent.py
new file mode 100644
index 000000000..e410f7df4
--- /dev/null
+++ b/tests/Integration/test_leon_agent.py
@@ -0,0 +1,1206 @@
+"""Integration tests for LeonAgent with QueryLoop.
+
+Uses mock model to verify the full astream pipeline without real API calls.
+"""
+
+import json
+import os
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, SystemMessage, ToolMessage
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _mock_model(text="Integration test response"):
+    """Create a mock LangChain model that returns a plain AIMessage."""
+    ai_msg = AIMessage(content=text)
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(return_value=ai_msg)
+    # configurable_fields support
+    model.configurable_fields.return_value = model
+    model.with_config.return_value = model
+    return model
+
+
+def _empty_stream_model():
+    class _EmptyStreamModel:
+        def bind_tools(self, tools):
+            return self
+
+        def configurable_fields(self, **kwargs):
+            return self
+
+        def with_config(self, **kwargs):
+            return self
+
+        async def astream(self, messages):
+            if False:
+                yield AIMessageChunk(content="")
+
+    return _EmptyStreamModel()
+
+
+def _patch_env_api_key():
+    """Ensure ANTHROPIC_API_KEY is set for LeonAgent init (uses a fake value)."""
+    return patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-test-integration"})
+
+
+class _MemoryCheckpointer:
+    def __init__(self):
+        self.store = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        self.store[cfg["configurable"]["thread_id"]] = checkpoint
+
+
+class _DirectCompactionProbeModel:
+    def __init__(self):
+        self.summary_calls = 0
+        self.turn_calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, **kwargs):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        first_content = getattr(messages[0], "content", "") if messages else ""
+        if isinstance(first_content, str) and "summarizing conversations" in first_content:
+            self.summary_calls += 1
+            return AIMessage(
+                content=(
+                    "1. Request/Intent — summarize\n"
+                    "2. Technical Concepts — compaction\n"
+                    "3. Files/Code — none\n"
+                    "4. Errors — none\n"
+                    "5. Problem Solving — keep going\n"
+                    "6. User Messages — large payloads\n"
+                    "7. Pending Tasks — continue\n"
+                    "8. Current Work — compacting\n"
+                    "9. Next Step — answer user"
+                )
+            )
+
+        self.turn_calls += 1
+        return AIMessage(content=f"OK_{self.turn_calls}")
+
+
+class _MessageCaptureModel:
+    def __init__(self, text: str = "captured"):
+        self.calls: list[list[object]] = []
+        self.text = text
+
+    def bind_tools(self, tools):
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, **kwargs):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls.append(list(messages))
+        return AIMessage(content=self.text)
+
+
+def test_leon_agent_destructor_does_not_reenable_skipped_sandbox_cleanup():
+    """Explicit child close(cleanup_sandbox=False) must stay final under __del__."""
+    from core.runtime.agent import LeonAgent
+
+    agent = object.__new__(LeonAgent)
+    agent._session_started = False
+    agent._mark_terminated = MagicMock()
+    agent._cleanup_mcp_client = MagicMock()
+    agent._cleanup_sqlite_connection = MagicMock()
+    agent._cleanup_sandbox = MagicMock()
+
+    LeonAgent.close(agent, cleanup_sandbox=False)
+    LeonAgent.__del__(agent)
+
+    agent._cleanup_sandbox.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Integration Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_simple_run(tmp_path):
+    """LeonAgent with mock model: astream completes and yields chunks."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Hello from integration test")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        results = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "hello"}]},
+            config={"configurable": {"thread_id": "test-integration-1"}},
+            stream_mode="updates",
+        ):
+            results.append(chunk)
+
+        assert len(results) > 0
+        # At least one agent chunk
+        agent_chunks = [c for c in results if "agent" in c]
+        assert len(agent_chunks) >= 1
+        # Agent message content matches mock
+        first_ai_msgs = agent_chunks[0]["agent"]["messages"]
+        assert any("integration test" in str(m.content) for m in first_ai_msgs)
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_interface_compatible(tmp_path):
+    """astream yields dicts with 'agent' key — compatible with LangGraph stream_mode=updates."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Compatible response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "test"}]},
+            config={"configurable": {"thread_id": "test-integration-2"}},
+            stream_mode="updates",
+        ):
+            chunks.append(chunk)
+
+        # All chunks are dicts
+        assert all(isinstance(c, dict) for c in chunks)
+        # All keys are one of "agent" or "tools"
+        for c in chunks:
+            assert set(c.keys()).issubset({"agent", "tools"})
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_messages_updates_mode_yields_langgraph_tuples(tmp_path):
+    """messages+updates mode must yield LangGraph-style (mode, data) tuples for SSE consumers."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Tuple compatible response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "tuple"}]},
+            config={"configurable": {"thread_id": "test-integration-tuples"}},
+            stream_mode=["messages", "updates"],
+        ):
+            chunks.append(chunk)
+
+        assert chunks
+        assert all(isinstance(chunk, tuple) and len(chunk) == 2 for chunk in chunks)
+        assert any(mode == "messages" for mode, _ in chunks)
+        assert any(mode == "updates" for mode, _ in chunks)
+
+        message_chunks = [data for mode, data in chunks if mode == "messages"]
+        first_msg_chunk, first_metadata = message_chunks[0]
+        assert isinstance(first_msg_chunk, AIMessageChunk)
+        assert "Tuple compatible response" in str(first_msg_chunk.content)
+        assert isinstance(first_metadata, dict)
+
+        update_chunks = [data for mode, data in chunks if mode == "updates"]
+        assert any("agent" in update for update in update_chunks)
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_raises_loudly_on_empty_stream(tmp_path):
+    """Empty streaming responses should surface as errors, not silent empty iterators."""
+    from core.runtime.agent import LeonAgent
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=_empty_stream_model()),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        with pytest.raises(RuntimeError, match="streaming model returned no AIMessageChunk"):
+            async for _ in agent.astream(
+                "test",
+                thread_id="test-empty-stream",
+                stream_mode=["messages", "updates"],
+            ):
+                pass
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_bundle_dir_registers_mcp_resource_tools(tmp_path):
+    """Member bundle MCP config should surface MCP resource tools in the live registry."""
+    from core.runtime.agent import LeonAgent
+
+    member_dir = tmp_path / "members" / "toad"
+    member_dir.mkdir(parents=True)
+    (member_dir / "agent.md").write_text(
+        "---\nname: Toad\ndescription: Demo member\n---\nYou are Toad.\n",
+        encoding="utf-8",
+    )
+    (member_dir / ".mcp.json").write_text(
+        '{"mcpServers":{"nu50demo":{"transport":"stdio","command":"uv","args":["run","python","/tmp/nu50_mcp_server.py"]}}}',
+        encoding="utf-8",
+    )
+
+    mock_model = _mock_model("Bundle MCP response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(
+            workspace_root=str(tmp_path),
+            bundle_dir=str(member_dir),
+            api_key="sk-test-integration",
+        )
+        await agent.ainit()
+
+        assert agent._tool_registry.get("ListMcpResources") is not None
+        assert agent._tool_registry.get("ReadMcpResource") is not None
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_announces_mcp_instruction_delta_once_and_reannounces_on_change(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    member_dir = tmp_path / "members" / "toad"
+    member_dir.mkdir(parents=True)
+    (member_dir / "agent.md").write_text(
+        "---\nname: Toad\ndescription: Demo member\n---\nYou are Toad.\n",
+        encoding="utf-8",
+    )
+
+    def _write_mcp(instructions: str) -> None:
+        (member_dir / ".mcp.json").write_text(
+            json.dumps(
+                {
+                    "mcpServers": {
+                        "nu50demo": {
+                            "transport": "stdio",
+                            "command": "uv",
+                            "args": ["run", "python", "/tmp/nu50_mcp_server.py"],
+                            "instructions": instructions,
+                        }
+                    }
+                }
+            ),
+            encoding="utf-8",
+        )
+
+    def _message_text(message: object) -> str:
+        content = getattr(message, "content", "")
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            return "\n".join(str(block.get("text", "")) for block in content if isinstance(block, dict))
+        return str(content)
+
+    def _delta_messages(messages: list[object]) -> list[str]:
+        hits: list[str] = []
+        for message in messages:
+            content = _message_text(message)
+            if "<mcp_instructions_delta>" in content:
+                hits.append(content)
+        return hits
+
+    _write_mcp("Use nu50demo carefully.")
+    first_model = _MessageCaptureModel("First MCP delta response")
+    checkpointer = _MemoryCheckpointer()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=first_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(
+            workspace_root=str(tmp_path),
+            bundle_dir=str(member_dir),
+            api_key="sk-test-integration",
+        )
+        await agent.ainit()
+        agent.checkpointer = checkpointer
+        agent.agent.checkpointer = checkpointer
+
+        await agent.ainvoke("first turn", thread_id="mcp-delta-thread")
+        assert first_model.calls
+        first_messages = first_model.calls[0]
+        first_deltas = _delta_messages(first_messages)
+        assert len(first_deltas) == 1
+        assert "Use nu50demo carefully." in first_deltas[0]
+
+        second_call_index = len(first_model.calls)
+        await agent.ainvoke("second turn", thread_id="mcp-delta-thread")
+        assert len(first_model.calls) > second_call_index
+        second_messages = first_model.calls[second_call_index]
+        second_deltas = _delta_messages(second_messages)
+        assert len(second_deltas) == 1
+        assert second_deltas[0] == first_deltas[0]
+
+        agent.close()
+
+    _write_mcp("Use nu50demo only for trusted reads.")
+    second_model = _MessageCaptureModel("Second MCP delta response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=second_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(
+            workspace_root=str(tmp_path),
+            bundle_dir=str(member_dir),
+            api_key="sk-test-integration",
+        )
+        await agent.ainit()
+        agent.checkpointer = checkpointer
+        agent.agent.checkpointer = checkpointer
+
+        await agent.ainvoke("third turn", thread_id="mcp-delta-thread")
+        assert second_model.calls
+        third_messages = second_model.calls[0]
+        third_deltas = _delta_messages(third_messages)
+        assert len(third_deltas) == 2
+        assert "Use nu50demo carefully." in third_deltas[0]
+        assert "Use nu50demo only for trusted reads." in third_deltas[1]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_memoizes_prompt_sections_between_builds(tmp_path):
+    """Pattern 6: prompt sections should be cached across repeated prompt assembly."""
+    from core.runtime import prompts as prompt_builders
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Prompt cache response")
+    original_context = prompt_builders.build_context_section
+    original_rules = prompt_builders.build_rules_section
+    counts = {"context": 0, "rules": 0}
+
+    def counted_context(*args, **kwargs):
+        counts["context"] += 1
+        return original_context(*args, **kwargs)
+
+    def counted_rules(*args, **kwargs):
+        counts["rules"] += 1
+        return original_rules(*args, **kwargs)
+
+    with (
+        patch("core.runtime.prompts.build_context_section", side_effect=counted_context),
+        patch("core.runtime.prompts.build_rules_section", side_effect=counted_rules),
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        first = agent._compose_system_prompt()
+        second = agent._compose_system_prompt()
+
+        assert first == second
+        assert counts == {"context": 1, "rules": 1}
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_clear_thread_invalidates_prompt_section_cache(tmp_path):
+    """Pattern 6: clear should invalidate cached prompt sections before rebuilding."""
+    from core.runtime import prompts as prompt_builders
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Prompt clear response")
+    original_context = prompt_builders.build_context_section
+    original_rules = prompt_builders.build_rules_section
+    counts = {"context": 0, "rules": 0}
+
+    def counted_context(*args, **kwargs):
+        counts["context"] += 1
+        return original_context(*args, **kwargs)
+
+    def counted_rules(*args, **kwargs):
+        counts["rules"] += 1
+        return original_rules(*args, **kwargs)
+
+    with (
+        patch("core.runtime.prompts.build_context_section", side_effect=counted_context),
+        patch("core.runtime.prompts.build_rules_section", side_effect=counted_rules),
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.agent.aclear = AsyncMock()
+
+        assert counts == {"context": 1, "rules": 1}
+
+        await agent.aclear_thread("prompt-clear-thread")
+
+        assert counts == {"context": 2, "rules": 2}
+
+        agent.close()
+
+
+def test_build_rules_section_unifies_core_risk_and_tool_preferences():
+    from core.runtime.prompts import build_rules_section
+
+    rules = build_rules_section(
+        is_sandbox=False,
+        working_dir="/repo",
+        workspace_root="/repo",
+    )
+
+    assert "**Workspace**" in rules
+    assert "**Absolute Paths**" in rules
+    assert "**Security**" in rules
+    assert "**Tool Priority**" in rules
+    assert "Do not guess URLs" in rules
+    assert "Do not add features, refactor code, or make speculative abstractions" in rules
+    assert "Don't create helpers, utilities, or abstractions for one-time operations" in rules
+    assert "Don't add error handling, fallbacks, or validation for scenarios that can't happen" in rules
+    assert "Prefer dedicated tools over `Bash`" in rules
+    assert "Use `Read` instead of `cat`, `head`, or `tail`." in rules
+    assert "Use `Glob`/`Grep` for file discovery and content search before falling back to `Bash`." in rules
+    assert "Ask before destructive, hard-to-reverse, or shared-state actions" in rules
+    assert (
+        "Examples: deleting files, force-pushing, dropping tables, killing unfamiliar processes, modifying shared infrastructure." in rules
+    )
+    assert "Background Task Description" not in rules
+
+
+def test_build_rules_section_includes_function_result_clearing_guidance_when_spill_buffer_enabled():
+    from core.runtime.prompts import build_rules_section
+
+    rules = build_rules_section(
+        is_sandbox=False,
+        working_dir="/repo",
+        workspace_root="/repo",
+        spill_buffer_enabled=True,
+        spill_keep_recent=3,
+    )
+
+    assert "**Function Result Clearing**" in rules
+    assert "Old tool results may be cleared from context to free up space." in rules
+    assert "The 3 most recent results are always kept." in rules
+    assert "write down any important information you might need later in your response" in rules
+
+
+def test_build_rules_section_omits_function_result_clearing_guidance_when_spill_buffer_disabled():
+    from core.runtime.prompts import build_rules_section
+
+    rules = build_rules_section(
+        is_sandbox=False,
+        working_dir="/repo",
+        workspace_root="/repo",
+        spill_buffer_enabled=False,
+        spill_keep_recent=3,
+    )
+
+    assert "**Function Result Clearing**" not in rules
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_session_start_hook_runs_on_ainit(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Session start response")
+    seen = []
+
+    def on_start(payload):
+        seen.append(payload)
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        agent.app_state.add_session_hook("SessionStart", on_start)
+
+        await agent.ainit()
+
+        assert len(seen) == 1
+        assert seen[0]["event"] == "SessionStart"
+        assert seen[0]["sandbox"] == "local"
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_session_end_hook_runs_on_close(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Session end response")
+    seen = []
+
+    def on_end(payload):
+        seen.append(payload)
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.app_state.add_session_hook("SessionEnd", on_end)
+
+        agent.close()
+
+        assert len(seen) == 1
+        assert seen[0]["event"] == "SessionEnd"
+        assert seen[0]["sandbox"] == "local"
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_session_hooks_support_async_callbacks_and_fire_once(tmp_path):
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Session once response")
+    seen = []
+
+    async def on_start(payload):
+        seen.append(("start", payload["event"]))
+
+    async def on_end(payload):
+        seen.append(("end", payload["event"]))
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        agent.app_state.add_session_hook("SessionStart", on_start)
+        agent.app_state.add_session_hook("SessionEnd", on_end)
+
+        await agent.ainit()
+        await agent.ainit()
+        agent.close()
+        agent.close()
+
+        assert seen == [("start", "SessionStart"), ("end", "SessionEnd")]
+
+
+class _DeferredDiscoveryProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([tool.get("name") for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:TaskCreate"}, "id": "tc-search"}],
+            )
+        self._turn += 1
+        return AIMessage(content="done")
+
+
+class _DeferredExecutionProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([tool.get("name") for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:TaskCreate"}, "id": "tc-search"}],
+            )
+        if self._turn == 1:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[
+                    {
+                        "name": "TaskCreate",
+                        "args": {"subject": "PT02_EXEC", "description": "created after discovery"},
+                        "id": "tc-task-create",
+                    }
+                ],
+            )
+        self._turn += 1
+        return AIMessage(content="PT02_EXEC_DONE")
+
+
+class _DeferredCrossThreadProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([tool.get("name") for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        joined = " ".join(str(getattr(msg, "content", "")) for msg in messages)
+        current_tool_names = {tool.get("name") for tool in self._tools if isinstance(tool, dict)}
+
+        if "discover task tools" in joined and "TaskCreate" not in current_tool_names:
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:TaskCreate"}, "id": "tc-search"}],
+            )
+
+        if "discover task tools" in joined:
+            return AIMessage(content="discover-done")
+
+        return AIMessage(content="plain-done")
+
+
+class _DeferredInlineSelectProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([tool.get("name") for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:Read,TaskCreate"}, "id": "tc-search"}],
+            )
+        self._turn += 1
+        return AIMessage(content="after-inline-select")
+
+
+class _DeferredResumeProbeModel:
+    def __init__(self):
+        self.turn_tool_names: list[list[str]] = []
+        self._tools: list[dict] = []
+
+    def bind_tools(self, tools):
+        self._tools = list(tools or [])
+        self.turn_tool_names.append([tool.get("name") for tool in self._tools if isinstance(tool, dict)])
+        return self
+
+    def configurable_fields(self, **kwargs):
+        return self
+
+    def with_config(self, *args, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        return AIMessage(content="resume-done")
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_reinjects_discovered_deferred_tool_schemas_on_following_turn(tmp_path):
+    """Deferred tools discovered via tool_search must become real schemas on the next turn."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredDiscoveryProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result = await agent.ainvoke("discover task tools", thread_id="test-deferred-discovery")
+
+        assert result["reason"] == "completed"
+        assert len(probe_model.turn_tool_names) >= 2
+        first_turn, second_turn = probe_model.turn_tool_names[:2]
+        assert "TaskCreate" not in first_turn
+        assert "tool_search" in first_turn
+        assert "TaskCreate" in second_turn
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_can_execute_discovered_deferred_tool_on_following_turn(tmp_path):
+    """A deferred tool discovered via tool_search should become callable on the next turn."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredExecutionProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result = await agent.ainvoke("discover then run deferred task tool", thread_id="test-deferred-execution")
+
+        assert result["reason"] == "completed"
+        assert len(probe_model.turn_tool_names) >= 2
+        assert "TaskCreate" not in probe_model.turn_tool_names[0]
+        assert "TaskCreate" in probe_model.turn_tool_names[1]
+
+        task_tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage) and msg.tool_call_id == "tc-task-create"]
+        assert len(task_tool_messages) == 1
+        assert "PT02_EXEC" in str(task_tool_messages[0].content)
+        assert any(isinstance(msg, AIMessage) and msg.content == "PT02_EXEC_DONE" for msg in result["messages"])
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_deferred_discovery_does_not_leak_across_threads(tmp_path):
+    """Deferred tools discovered on one thread must not become inline on another thread."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredCrossThreadProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result_a = await agent.ainvoke("discover task tools", thread_id="thread-A")
+        result_b = await agent.ainvoke("plain request", thread_id="thread-B")
+
+        assert result_a["reason"] == "completed"
+        assert result_b["reason"] == "completed"
+        assert len(probe_model.turn_tool_names) >= 3
+
+        first_thread_a, second_thread_a, first_thread_b = probe_model.turn_tool_names[:3]
+        assert "TaskCreate" not in first_thread_a
+        assert "TaskCreate" in second_thread_a
+        assert "TaskCreate" not in first_thread_b
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_tool_search_exact_select_fails_loudly_for_inline_tools(tmp_path):
+    """Exact select should surface inline-tool misuse as a tool_use_error in the live loop."""
+    from core.runtime.agent import LeonAgent
+
+    probe_model = _DeferredInlineSelectProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        result = await agent.ainvoke("probe inline select", thread_id="test-inline-select")
+
+        assert result["reason"] == "completed"
+        tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage) and msg.tool_call_id == "tc-search"]
+        assert len(tool_messages) == 1
+        assert "<tool_use_error>" in str(tool_messages[0].content)
+        assert "inline/already-available tools: Read" in str(tool_messages[0].content)
+        assert any(isinstance(msg, AIMessage) and msg.content == "after-inline-select" for msg in result["messages"])
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_restores_discovered_deferred_tools_after_restart(tmp_path):
+    """Restarting the loop on the same thread should restore prior deferred discoveries from history."""
+    from core.runtime.agent import LeonAgent
+
+    checkpointer = _MemoryCheckpointer()
+    discovery_model = _DeferredDiscoveryProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=discovery_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.checkpointer = checkpointer
+        agent.agent.checkpointer = checkpointer
+
+        result = await agent.ainvoke("discover task tools", thread_id="resume-thread")
+        assert result["reason"] == "completed"
+        agent.close()
+
+    resume_model = _DeferredResumeProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=resume_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.checkpointer = checkpointer
+        agent.agent.checkpointer = checkpointer
+
+        result = await agent.ainvoke("after restart", thread_id="resume-thread")
+
+        assert result["reason"] == "completed"
+        assert resume_model.turn_tool_names
+        assert "TaskCreate" in resume_model.turn_tool_names[0]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_multiple_thread_ids(tmp_path):
+    """Different thread_ids produce independent sessions (no cross-contamination)."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = MagicMock()
+    mock_model.bind_tools.return_value = mock_model
+    mock_model.with_config.return_value = mock_model
+    mock_model.configurable_fields.return_value = mock_model
+    mock_model.ainvoke = AsyncMock(
+        side_effect=[
+            AIMessage(content="Response for thread-A"),
+            AIMessage(content="Response for thread-B"),
+        ]
+    )
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks_a = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "hi A"}]},
+            config={"configurable": {"thread_id": "thread-A"}},
+            stream_mode="updates",
+        ):
+            chunks_a.append(chunk)
+
+        chunks_b = []
+        async for chunk in agent.agent.astream(
+            {"messages": [{"role": "user", "content": "hi B"}]},
+            config={"configurable": {"thread_id": "thread-B"}},
+            stream_mode="updates",
+        ):
+            chunks_b.append(chunk)
+
+        # Both sessions produced chunks
+        assert len(chunks_a) > 0
+        assert len(chunks_b) > 0
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_wrapper_exposes_caller_surface(tmp_path):
+    """LeonAgent should expose a caller-owned astream surface instead of forcing callers onto agent.agent.astream."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Caller surface response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        chunks = []
+        async for chunk in agent.astream(
+            "caller stream",
+            thread_id="test-astream-wrapper",
+            stream_mode=["messages", "updates"],
+        ):
+            chunks.append(chunk)
+
+        assert chunks
+        assert all(isinstance(chunk, tuple) and len(chunk) == 2 for chunk in chunks)
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_astream_can_enforce_max_budget_per_event(tmp_path):
+    """Caller-owned astream surface should be able to stop once runtime cost exceeds a caller budget."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("Caller surface response")
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+
+        async def fake_stream(*args, **kwargs):
+            yield ("messages", ("first", {"langgraph_node": "agent"}))
+            yield ("updates", {"agent": {"messages": [AIMessage(content="done")]}})
+
+        agent.agent.astream = fake_stream
+        agent.runtime = SimpleNamespace(cost=0.75)
+
+        chunks = []
+        with pytest.raises(RuntimeError, match="max_budget_usd exceeded"):
+            async for chunk in agent.astream(
+                "caller stream",
+                thread_id="test-astream-budget",
+                stream_mode=["messages", "updates"],
+                max_budget_usd=0.5,
+            ):
+                chunks.append(chunk)
+
+        assert chunks == [("messages", ("first", {"langgraph_node": "agent"}))]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_aclear_thread_resets_thread_history(tmp_path):
+    """aclear_thread should clear replayable thread history while preserving accumulators."""
+    from core.runtime.agent import LeonAgent
+
+    mock_model = _mock_model("clearable response")
+    checkpointer = _MemoryCheckpointer()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.checkpointer = checkpointer
+        agent.agent.checkpointer = checkpointer
+        agent.app_state.total_cost = 1.25
+
+        await agent.ainvoke("hello", thread_id="clear-agent-thread")
+        assert checkpointer.store["clear-agent-thread"]["channel_values"]["messages"]
+
+        agent.agent._tool_read_file_state["/tmp/file.py"] = {"partial": False}
+        agent.agent._tool_loaded_nested_memory_paths.add("/tmp/memory.md")
+        agent.agent._tool_discovered_skill_names.add("skill-a")
+        old_session_id = agent._bootstrap.session_id
+
+        await agent.aclear_thread("clear-agent-thread")
+
+        assert checkpointer.store["clear-agent-thread"]["channel_values"]["messages"] == []
+        assert agent.app_state.messages == []
+        assert agent.app_state.turn_count == 0
+        assert agent.app_state.compact_boundary_index == 0
+        assert agent.app_state.total_cost == 1.25
+        assert agent._bootstrap.session_id != old_session_id
+        assert agent._bootstrap.parent_session_id == old_session_id
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_aclear_thread_does_not_restore_stale_summary(tmp_path):
+    from core.runtime.agent import LeonAgent
+    from core.runtime.middleware import ModelRequest, ModelResponse
+    from core.runtime.middleware.memory.summary_store import SummaryStore
+    from sandbox.thread_context import set_current_thread_id
+
+    async def _handler(req: ModelRequest) -> ModelResponse:
+        return ModelResponse(result=[AIMessage(content="final")], request_messages=req.messages)
+
+    mock_model = _mock_model("clearable response")
+    checkpointer = _MemoryCheckpointer()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=mock_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.checkpointer = checkpointer
+        agent.agent.checkpointer = checkpointer
+
+        store = SummaryStore(tmp_path / "summary.db")
+        agent._memory_middleware.summary_store = store
+        store.save_summary(
+            thread_id="clear-summary-thread",
+            summary_text="STALE SUMMARY",
+            compact_up_to_index=2,
+            compacted_at=2,
+        )
+
+        await agent.aclear_thread("clear-summary-thread")
+
+        assert store.get_latest_summary("clear-summary-thread") is None
+
+        set_current_thread_id("clear-summary-thread")
+        request = ModelRequest(
+            model=mock_model,
+            messages=[HumanMessage(content="fresh-1"), HumanMessage(content="fresh-2")],
+            system_message=SystemMessage(content="sys"),
+        )
+        result = await agent._memory_middleware.awrap_model_call(request, _handler)
+
+        assert [msg.content for msg in result.request_messages] == ["fresh-1", "fresh-2"]
+
+        agent.close()
+
+
+@pytest.mark.asyncio
+@_patch_env_api_key()
+async def test_leon_agent_persists_summary_store_after_second_turn_compaction(tmp_path):
+    from core.runtime.agent import LeonAgent
+    from core.runtime.middleware.memory.summary_store import SummaryStore
+
+    checkpointer = _MemoryCheckpointer()
+    probe_model = _DirectCompactionProbeModel()
+
+    with (
+        patch("core.runtime.agent.LeonAgent._create_model", return_value=probe_model),
+        patch("core.runtime.agent.LeonAgent._init_async_components", return_value=(None, [])),
+        patch("core.runtime.agent.LeonAgent._init_checkpointer", new_callable=AsyncMock, return_value=None),
+    ):
+        agent = LeonAgent(workspace_root=str(tmp_path), api_key="sk-test-integration")
+        await agent.ainit()
+        agent.checkpointer = checkpointer
+        agent.agent.checkpointer = checkpointer
+
+        store = SummaryStore(tmp_path / "summary.db")
+        agent._memory_middleware.summary_store = store
+        agent._memory_middleware._compaction_threshold = 0.01
+        agent._memory_middleware.compactor.keep_recent_tokens = 10
+
+        turn1 = await agent.ainvoke("A" * 12000, thread_id="agent-compaction-thread")
+        assert turn1["reason"] == "completed"
+        assert store.get_latest_summary("agent-compaction-thread") is None
+
+        turn2 = await agent.ainvoke("B" * 12000, thread_id="agent-compaction-thread")
+        assert turn2["reason"] == "completed"
+        assert probe_model.summary_calls == 1
+        assert agent._memory_middleware._cached_summary is not None
+        assert agent._memory_middleware._compact_up_to_index > 0
+
+        summary = store.get_latest_summary("agent-compaction-thread")
+        assert summary is not None
+        assert summary.compact_up_to_index == agent._memory_middleware._compact_up_to_index
+        assert "Request/Intent" in summary.summary_text
+
+        agent.close()
diff --git a/tests/middleware/memory/test_memory_middleware_integration.py b/tests/Integration/test_memory_middleware_integration.py
similarity index 80%
rename from tests/middleware/memory/test_memory_middleware_integration.py
rename to tests/Integration/test_memory_middleware_integration.py
index 2892d1081..a33a60098 100644
--- a/tests/middleware/memory/test_memory_middleware_integration.py
+++ b/tests/Integration/test_memory_middleware_integration.py
@@ -3,13 +3,16 @@
 Tests the complete flow: MemoryMiddleware → SummaryStore → SQLite → Checkpointer
 """
 
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import MagicMock
 
 import pytest
 from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import RunnableLambda
 
+from core.runtime.middleware import ModelRequest, ModelResponse
 from core.runtime.middleware.memory.middleware import MemoryMiddleware
 from core.runtime.middleware.memory.summary_store import SummaryStore
+from sandbox.thread_context import set_current_thread_id
 
 
 @pytest.fixture
@@ -41,7 +44,7 @@ def mock_get(config):
 @pytest.fixture
 def mock_model():
     """Create mock LLM model for testing."""
-    model = AsyncMock()
+    model = MagicMock()
 
     async def mock_ainvoke(messages):
         # Return a mock summary response
@@ -50,6 +53,7 @@ async def mock_ainvoke(messages):
         return response
 
     model.ainvoke = mock_ainvoke
+    model.bind.return_value = model
     return model
 
 
@@ -165,6 +169,59 @@ async def mock_handler(req):
         assert middleware2._compact_up_to_index == original_index
         assert middleware2._summary_restored is True
 
+    @pytest.mark.asyncio
+    async def test_summary_restore_is_isolated_per_thread_on_shared_middleware(self, temp_db, mock_model):
+        middleware = MemoryMiddleware(
+            context_limit=10000,
+            compaction_threshold=0.5,
+            db_path=temp_db,
+            verbose=True,
+        )
+        middleware.set_model(mock_model)
+
+        store = SummaryStore(temp_db)
+        store.save_summary(
+            thread_id="t1",
+            summary_text="SUMMARY ONE",
+            compact_up_to_index=1,
+            compacted_at=2,
+        )
+        store.save_summary(
+            thread_id="t2",
+            summary_text="SUMMARY TWO",
+            compact_up_to_index=1,
+            compacted_at=2,
+        )
+
+        async def handler(req: ModelRequest) -> ModelResponse:
+            return ModelResponse(result=[], request_messages=req.messages)
+
+        request_t1 = ModelRequest(
+            model=RunnableLambda(lambda x: x),
+            messages=[HumanMessage(content="a1"), HumanMessage(content="a2")],
+            system_message=None,
+        )
+
+        request_t2 = ModelRequest(
+            model=RunnableLambda(lambda x: x),
+            messages=[HumanMessage(content="b1"), HumanMessage(content="b2")],
+            system_message=None,
+        )
+
+        set_current_thread_id("t1")
+        result_t1 = await middleware.awrap_model_call(request_t1, handler)
+        set_current_thread_id("t2")
+        result_t2 = await middleware.awrap_model_call(request_t2, handler)
+
+        assert [getattr(msg, "content", "") for msg in result_t1.request_messages] == [
+            "[Conversation Summary]\nSUMMARY ONE",
+            "a2",
+        ]
+        assert [getattr(msg, "content", "") for msg in result_t2.request_messages] == [
+            "[Conversation Summary]\nSUMMARY TWO",
+            "b2",
+        ]
+
 
 class TestSplitTurnSaveAndRestore:
     """Test 3: Verify split turn summaries are saved and restored correctly."""
@@ -325,6 +382,59 @@ async def mock_handler(req):
         assert summary1.summary_id != summary2.summary_id
 
 
+class TestCompactionBreakerScope:
+    """Breaker should gate proactive compaction without poisoning reactive recovery."""
+
+    @pytest.mark.asyncio
+    async def test_reactive_recovery_can_bypass_and_clear_thread_breaker(self, temp_db, mock_request):
+        class _EventuallyRecoveringModel:
+            def __init__(self):
+                self.compact_calls = 0
+
+            async def ainvoke(self, messages):
+                self.compact_calls += 1
+                if self.compact_calls <= 3:
+                    raise RuntimeError("compaction failed")
+                response = MagicMock()
+                response.content = "Recovered summary"
+                return response
+
+        model = _EventuallyRecoveringModel()
+        middleware = MemoryMiddleware(
+            context_limit=10000,
+            compaction_threshold=0.5,
+            db_path=temp_db,
+            verbose=True,
+        )
+        middleware.set_model(model)
+
+        messages = create_large_message_list(30)
+        mock_request.messages = messages
+
+        async def mock_handler(req):
+            return ModelResponse(result=[], request_messages=req.messages)
+
+        for _ in range(3):
+            await middleware.awrap_model_call(mock_request, mock_handler)
+
+        snapshot = middleware.snapshot_thread_state("test-thread-1")
+        assert snapshot == {"failure_count": 3, "breaker_open": True}
+
+        recovered = await middleware.compact_messages_for_recovery(
+            messages,
+            thread_id="test-thread-1",
+        )
+        assert recovered is not None
+        assert getattr(recovered[0], "content", "").startswith("[Conversation Summary]\nRecovered summary")
+
+        snapshot = middleware.snapshot_thread_state("test-thread-1")
+        assert snapshot == {"failure_count": 0, "breaker_open": False}
+
+        result = await middleware.awrap_model_call(mock_request, mock_handler)
+        assert getattr(result.request_messages[0], "content", "").startswith("[Conversation Summary]\nRecovered summary")
+        assert model.compact_calls >= 5
+
+
 class TestMissingThreadIdRaisesError:
     """Test 6: Verify missing thread_id is handled gracefully."""
 
diff --git a/tests/test_monitor_resources_route.py b/tests/Integration/test_monitor_resources_route.py
similarity index 100%
rename from tests/test_monitor_resources_route.py
rename to tests/Integration/test_monitor_resources_route.py
diff --git a/tests/test_p3_api_only.py b/tests/Integration/test_p3_api_only.py
similarity index 100%
rename from tests/test_p3_api_only.py
rename to tests/Integration/test_p3_api_only.py
diff --git a/tests/test_p3_e2e.py b/tests/Integration/test_p3_e2e.py
similarity index 100%
rename from tests/test_p3_e2e.py
rename to tests/Integration/test_p3_e2e.py
diff --git a/tests/Integration/test_query_loop_backend_bridge.py b/tests/Integration/test_query_loop_backend_bridge.py
new file mode 100644
index 000000000..c7fa25cd5
--- /dev/null
+++ b/tests/Integration/test_query_loop_backend_bridge.py
@@ -0,0 +1,1971 @@
+"""Backend-facing regression tests for QueryLoop caller-contract bridge."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+
+from backend.web.models.requests import SendMessageRequest
+from backend.web.routers import threads as threads_router
+from backend.web.routers.threads import get_thread_history, get_thread_messages
+from backend.web.services.display_builder import DisplayBuilder
+from backend.web.services.event_buffer import ThreadEventBuffer
+from backend.web.services.streaming_service import (
+    _ensure_thread_handlers,
+    _repair_incomplete_tool_calls,
+    _run_agent_to_buffer,
+    start_agent_run,
+)
+from core.runtime.loop import QueryLoop
+from core.runtime.middleware.memory.middleware import MemoryMiddleware
+from core.runtime.middleware.monitor.state_monitor import AgentState
+from core.runtime.middleware.queue.manager import MessageQueueManager
+from core.runtime.middleware.queue.middleware import SteeringMiddleware
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.state import AppState, BootstrapConfig
+from core.tools.tool_search.service import ToolSearchService
+
+
+class _MemoryCheckpointer:
+    def __init__(self) -> None:
+        self.store: dict[str, dict] = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    async def aget_tuple(self, cfg):
+        return None
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        self.store[cfg["configurable"]["thread_id"]] = checkpoint
+
+
+class _NoToolModel:
+    def __init__(self, text: str = "done") -> None:
+        self._text = text
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        return AIMessage(content=self._text)
+
+
+class _TurnTextModel:
+    def __init__(self, *texts: str) -> None:
+        self._texts = list(texts)
+        self._index = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._index < len(self._texts):
+            text = self._texts[self._index]
+            self._index += 1
+            return AIMessage(content=text)
+        return AIMessage(content=self._texts[-1] if self._texts else "done")
+
+
+class _TerminalFollowthroughPromptAwareModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if "CommandNotification" not in last_human and "task-notification" not in last_human:
+            return AIMessage(content="UNRELATED")
+        if "Terminal background completion notifications require an explicit assistant followthrough." in system_text:
+            return AIMessage(content="FOLLOWTHROUGH_ACK")
+        return AIMessage(content="")
+
+
+class _TerminalFollowthroughSilentModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if "CommandNotification" in last_human or "task-notification" in last_human:
+            return AIMessage(content="")
+        return AIMessage(content="UNRELATED")
+
+
+class _ChatNotificationSilentModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if "New message from" in last_human and "read_messages(chat_id=" in last_human:
+            return AIMessage(content="")
+        return AIMessage(content="UNRELATED")
+
+
+class _PromptTooLongTwiceModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        raise RuntimeError("prompt is too long")
+
+
+class _QueryOkWithFailingCompactorModel:
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        if "tasked with summarizing conversations" in system_text or "split turn" in system_text.lower():
+            raise RuntimeError("compaction failed")
+        return AIMessage(content="OK")
+
+
+class _BridgeReactiveCompactMiddleware:
+    compact_boundary_index = 1
+
+    async def compact_messages_for_recovery(self, messages):
+        return [SystemMessage(content="[Conversation Summary]\nSUMMARY")] + list(messages[-1:])
+
+
+class _ToolSearchInlineSelectModel:
+    def __init__(self) -> None:
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "tool_search", "args": {"query": "select:Read,TaskCreate"}, "id": "tc-search"}],
+            )
+        return AIMessage(content="after-inline-select")
+
+
+class _ToolThenConcurrencyLimitModel:
+    def __init__(self) -> None:
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "Write", "args": {"file_path": "/tmp/demo.txt", "content": "hi"}, "id": "tc-write"}],
+            )
+        raise RuntimeError("Concurrency limit exceeded for user, please retry later")
+
+
+class _SteerAwareTerminalModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        return AIMessage(content="STEER_DONE" if last_human == "Stop and just say STEER_DONE." else "UNKNOWN")
+
+
+class _StopHonestyAwareModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        if last_human != "Stop immediately. Do not continue the old task. Reply exactly STOPPED_NOW and do not write any file.":
+            return AIMessage(content="UNKNOWN")
+        if "Steer requests accepted during an active run are non-preemptive." in system_text:
+            return AIMessage(content="STOP_ACK_AFTER_COMPLETED_WORK")
+        return AIMessage(content="STOPPED_NOW")
+
+
+class _SteerCancelPoisonModel:
+    def __init__(self) -> None:
+        self._turn = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        if self._turn == 0:
+            self._turn += 1
+            return AIMessage(
+                content="",
+                tool_calls=[{"name": "SleepTool", "args": {}, "id": "tc-sleep"}],
+            )
+        last_human = next(
+            (msg.content for msg in reversed(messages) if msg.__class__.__name__ == "HumanMessage"),
+            "",
+        )
+        return AIMessage(content=f"LAST_HUMAN:{last_human}")
+
+
+class _FakeDisplayBuilder:
+    def __init__(self, cached_entries):
+        self._cached_entries = cached_entries
+        self.rebuilt_with: tuple[str, list[dict]] | None = None
+
+    def get_entries(self, thread_id: str):
+        return self._cached_entries
+
+    def build_from_checkpoint(self, thread_id: str, messages: list[dict]):
+        self.rebuilt_with = (thread_id, messages)
+        return [{"id": "rebuilt-notice", "role": "notice", "content": "rebuilt"}]
+
+    def get_display_seq(self, thread_id: str) -> int:
+        return 7
+
+
+class _StreamingGraphAgent:
+    checkpointer = None
+
+    async def aget_state(self, _config):
+        return SimpleNamespace(values={"messages": []})
+
+    async def astream(self, *_args, **_kwargs):
+        if False:
+            yield None
+
+
+class _NoResumeGraphAgent(_StreamingGraphAgent):
+    def __init__(self) -> None:
+        self.astream_calls = 0
+        self.aupdate_calls = 0
+
+    async def aupdate_state(self, *_args, **_kwargs):
+        self.aupdate_calls += 1
+
+    async def astream(self, *_args, **_kwargs):
+        self.astream_calls += 1
+        if False:
+            yield None
+        return
+
+
+class _StreamingRuntime:
+    current_state = AgentState.IDLE
+
+    def __init__(self) -> None:
+        self.current_run_source = None
+        self._event_callback = None
+        self.state = SimpleNamespace(flags=SimpleNamespace(is_compacting=False))
+
+    def set_event_callback(self, cb) -> None:
+        self._event_callback = cb
+
+    def bind_thread(self, *, activity_sink=None) -> None:
+        self._activity_sink = activity_sink
+
+    def get_status_dict(self) -> dict[str, object]:
+        return {"state": {"state": "idle", "flags": {}}}
+
+    def transition(self, new_state) -> bool:
+        valid = {
+            AgentState.IDLE: {AgentState.ACTIVE},
+            AgentState.ACTIVE: {AgentState.IDLE},
+        }
+        if new_state not in valid.get(self.current_state, set()):
+            return False
+        self.current_state = new_state
+        return True
+
+
+async def _wait_for_followthrough_text(loop: QueryLoop, thread_id: str, expected: str) -> None:
+    for _ in range(100):
+        state = await loop.aget_state({"configurable": {"thread_id": thread_id}})
+        messages = state.values.get("messages", []) if state and state.values else []
+        if any(msg.__class__.__name__ == "AIMessage" and getattr(msg, "content", None) == expected for msg in messages):
+            return
+        await asyncio.sleep(0.01)
+    raise AssertionError(f"followthrough text not observed: {expected}")
+
+
+def _make_loop(
+    *,
+    text: str = "done",
+    model=None,
+    registry: ToolRegistry | None = None,
+    checkpointer: _MemoryCheckpointer | None = None,
+    middleware: list | None = None,
+) -> QueryLoop:
+    return QueryLoop(
+        model=model or _NoToolModel(text=text),
+        system_prompt=SystemMessage(content="sys"),
+        middleware=middleware or [],
+        checkpointer=checkpointer,
+        registry=registry or ToolRegistry(),
+        app_state=AppState(),
+        runtime=None,
+        bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+        max_turns=5,
+    )
+
+
+def _patch_streaming_event_store(monkeypatch: pytest.MonkeyPatch) -> None:
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+
+
+def _patch_direct_streaming(monkeypatch: pytest.MonkeyPatch) -> None:
+    _patch_streaming_event_store(monkeypatch)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+
+def _make_streaming_agent(loop: QueryLoop, *, queue_manager: MessageQueueManager | None = None) -> SimpleNamespace:
+    agent = SimpleNamespace(
+        agent=loop,
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    if queue_manager is not None:
+        agent.queue_manager = queue_manager
+    return agent
+
+
+def _make_streaming_app(
+    tmp_path: Path,
+    *,
+    thread_id: str | None = None,
+    agent: SimpleNamespace | None = None,
+    queue_manager: MessageQueueManager | None = None,
+    include_route_locks: bool = False,
+) -> tuple[SimpleNamespace, MessageQueueManager]:
+    queue_manager = queue_manager or MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    state = SimpleNamespace(
+        display_builder=DisplayBuilder(),
+        thread_tasks={},
+        thread_event_buffers={},
+        subagent_buffers={},
+        queue_manager=queue_manager,
+        thread_last_active={},
+        typing_tracker=None,
+    )
+    if thread_id is not None and agent is not None:
+        state.agent_pool = {f"{thread_id}:local": agent}
+        state.thread_sandbox = {thread_id: "local"}
+        state._event_loop = asyncio.get_running_loop()
+    if include_route_locks:
+        state.thread_locks = {}
+        state.thread_locks_guard = asyncio.Lock()
+    return SimpleNamespace(state=state), queue_manager
+
+
+def _make_direct_streaming_context(
+    tmp_path: Path,
+    loop: QueryLoop,
+    *,
+    queue_manager: MessageQueueManager | None = None,
+) -> tuple[SimpleNamespace, SimpleNamespace, ThreadEventBuffer]:
+    agent = _make_streaming_agent(loop, queue_manager=queue_manager)
+    app, _ = _make_streaming_app(tmp_path, queue_manager=queue_manager)
+    return agent, app, ThreadEventBuffer()
+
+
+def _make_route_followthrough_context(
+    tmp_path: Path,
+    *,
+    thread_id: str,
+    loop: QueryLoop,
+) -> tuple[MessageQueueManager, SimpleNamespace, SimpleNamespace]:
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    agent = _make_streaming_agent(loop, queue_manager=queue_manager)
+    app, _ = _make_streaming_app(tmp_path, thread_id=thread_id, agent=agent, queue_manager=queue_manager)
+    _ensure_thread_handlers(agent, thread_id, app)
+    return queue_manager, agent, app
+
+
+async def _run_direct_notification_followthrough(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    *,
+    loop: QueryLoop,
+    thread_id: str,
+    message: str,
+    run_id: str,
+    message_metadata: dict[str, str] | None = None,
+) -> list[dict]:
+    _patch_direct_streaming(monkeypatch)
+    agent, app, thread_buf = _make_direct_streaming_context(tmp_path, loop)
+
+    await _run_agent_to_buffer(
+        agent,
+        thread_id,
+        message,
+        app,
+        False,
+        thread_buf,
+        run_id,
+        message_metadata=message_metadata,
+    )
+
+    entries = app.state.display_builder.get_entries(thread_id)
+    assert entries is not None
+    return entries
+
+
+def _assert_notice_then_text(entries: list[dict], notice_contains: str, expected_text: str) -> None:
+    assert entries[0]["segments"][0]["type"] == "notice"
+    assert notice_contains in entries[0]["segments"][0]["content"]
+    assert entries[0]["segments"][1] == {"type": "text", "content": expected_text}
+
+
+async def _get_local_thread_history(thread_id: str, *, agent: SimpleNamespace, app: SimpleNamespace) -> dict:
+    with (
+        patch.object(threads_router, "get_or_create_agent", return_value=agent),
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+    ):
+        return await get_thread_history(thread_id, limit=20, truncate=400, user_id="u", app=app)
+
+
+def _patch_fake_event_bus(monkeypatch: pytest.MonkeyPatch) -> None:
+    class _FakeEventBus:
+        def subscribe(self, *_args, **_kwargs):
+            return None
+
+        def make_emitter(self, **_kwargs):
+            async def _emit(_event):
+                return None
+
+            return _emit
+
+    monkeypatch.setattr("backend.web.event_bus.get_event_bus", lambda: _FakeEventBus())
+
+
+@pytest.mark.asyncio
+async def test_repair_incomplete_tool_calls_uses_query_loop_state_bridge():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(checkpointer=checkpointer)
+    broken_ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Read", "args": {"file_path": "/tmp/a.txt"}, "id": "tc-1"}],
+    )
+    trailing = HumanMessage(content="after tool")
+    trailing.id = "human-after"
+    checkpointer.store["repair-live-thread"] = {"channel_values": {"messages": [broken_ai, trailing]}}
+
+    await _repair_incomplete_tool_calls(
+        SimpleNamespace(agent=loop),
+        {"configurable": {"thread_id": "repair-live-thread"}},
+    )
+
+    state = await loop.aget_state({"configurable": {"thread_id": "repair-live-thread"}})
+
+    assert [msg.__class__.__name__ for msg in state.values["messages"]] == [
+        "AIMessage",
+        "ToolMessage",
+        "HumanMessage",
+    ]
+    assert [getattr(msg, "content", None) for msg in state.values["messages"]] == [
+        "",
+        "Error: task was interrupted (server restart or timeout). Results unavailable.",
+        "after tool",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_reads_messages_via_query_loop_state_bridge():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="history reply", checkpointer=checkpointer)
+    config = {"configurable": {"thread_id": "history-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hello"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert history["total"] == 2
+    assert history["thread_id"] == "history-thread"
+    assert [item["role"] for item in history["messages"]] == ["human", "assistant"]
+    assert history["messages"][1]["text"] == "history reply"
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_skips_empty_ai_messages_after_notifications():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(checkpointer=checkpointer)
+    system_notice = HumanMessage(
+        content="<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>"
+    )
+    system_notice.metadata = {"source": "system"}
+    checkpointer.store["history-empty-ai-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="launch background task"),
+                system_notice,
+                AIMessage(content=""),
+            ]
+        }
+    }
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-empty-ai-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == ["human", "notification"]
+    assert history["messages"][-1]["text"].startswith("<system-reminder><task-notification>")
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_retains_tool_search_inline_select_error():
+    checkpointer = _MemoryCheckpointer()
+    registry = ToolRegistry()
+    registry.register(
+        ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={"name": "Read", "description": "read file"},
+            handler=lambda **_: "read",
+            source="test",
+        )
+    )
+    registry.register(
+        ToolEntry(
+            name="TaskCreate",
+            mode=ToolMode.DEFERRED,
+            schema={"name": "TaskCreate", "description": "create task"},
+            handler=lambda **_: "task",
+            source="test",
+        )
+    )
+    ToolSearchService(registry)
+    loop = _make_loop(
+        model=_ToolSearchInlineSelectModel(),
+        registry=registry,
+        checkpointer=checkpointer,
+    )
+    config = {"configurable": {"thread_id": "history-tool-search-inline-select"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "probe inline select"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-tool-search-inline-select",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == ["human", "tool_call", "tool_result", "assistant"]
+    assert history["messages"][1]["tool"] == "tool_search"
+    assert "<tool_use_error>" in history["messages"][2]["text"]
+    assert "inline/already-available tools: Read" in history["messages"][2]["text"]
+    assert history["messages"][3]["text"] == "after-inline-select"
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_persists_visible_assistant_error_after_model_failure():
+    checkpointer = _MemoryCheckpointer()
+    registry = ToolRegistry()
+    registry.register(
+        ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "description": "write file"},
+            handler=lambda **_: "FILE_WRITTEN",
+            source="test",
+        )
+    )
+    loop = _make_loop(
+        model=_ToolThenConcurrencyLimitModel(),
+        registry=registry,
+        checkpointer=checkpointer,
+    )
+    config = {"configurable": {"thread_id": "history-visible-model-error"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "write once, then continue"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "history-visible-model-error",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == ["human", "tool_call", "tool_result", "assistant"]
+    assert history["messages"][-1]["text"] == "Error: Concurrency limit exceeded for user, please retry later"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_visible_terminal_followthrough_when_system_notification_resume_is_silent():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="", checkpointer=checkpointer)
+    system_notice = HumanMessage(
+        content="<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>"
+    )
+    system_notice.metadata = {"source": "system", "notification_type": "agent"}
+    checkpointer.store["resume-empty-ai-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="launch background task"),
+                system_notice,
+            ]
+        }
+    }
+
+    async for _ in loop.query(
+        None,
+        config={"configurable": {"thread_id": "resume-empty-ai-thread"}},
+    ):
+        pass
+
+    state = await loop.aget_state({"configurable": {"thread_id": "resume-empty-ai-thread"}})
+
+    assert [msg.__class__.__name__ for msg in state.values["messages"]] == [
+        "HumanMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert state.values["messages"][-2].content.startswith("<system-reminder><task-notification>")
+    assert state.values["messages"][-1].content == "Background agent failed, but the followthrough assistant reply was empty."
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_midrun_steer_message_into_checkpoint_state(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop and just say STEER_DONE.",
+        "steer-persist-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+    runtime = SimpleNamespace(events=[], emit_activity_event=lambda event: runtime.events.append(event))
+    loop = _make_loop(
+        model=_SteerAwareTerminalModel(),
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    checkpointer.store["steer-persist-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="Use Bash to run `sleep 20; echo LONG_PHASE_DONE`, then reply exactly ORIGINAL_DONE."),
+                AIMessage(
+                    content="",
+                    tool_calls=[{"name": "Bash", "args": {"command": "sleep 20; echo LONG_PHASE_DONE"}, "id": "tc-bash"}],
+                ),
+                ToolMessage(content="LONG_PHASE_DONE", name="Bash", tool_call_id="tc-bash"),
+            ]
+        }
+    }
+
+    async for _ in loop.query(None, config={"configurable": {"thread_id": "steer-persist-thread"}}):
+        pass
+
+    state = await loop.aget_state({"configurable": {"thread_id": "steer-persist-thread"}})
+    persisted = state.values["messages"]
+
+    assert [msg.__class__.__name__ for msg in persisted] == [
+        "HumanMessage",
+        "AIMessage",
+        "ToolMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert persisted[3].content == "Stop and just say STEER_DONE."
+    assert persisted[3].metadata["source"] == "owner"
+    assert persisted[3].metadata["is_steer"] is True
+    assert persisted[4].content == "STEER_DONE"
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_rebuilds_persisted_midrun_steer_message(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop and just say STEER_DONE.",
+        "steer-history-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+    runtime = SimpleNamespace(events=[], emit_activity_event=lambda event: runtime.events.append(event))
+    loop = _make_loop(
+        model=_SteerAwareTerminalModel(),
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    checkpointer.store["steer-history-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="Use Bash to run `sleep 20; echo LONG_PHASE_DONE`, then reply exactly ORIGINAL_DONE."),
+                AIMessage(
+                    content="",
+                    tool_calls=[{"name": "Bash", "args": {"command": "sleep 20; echo LONG_PHASE_DONE"}, "id": "tc-bash"}],
+                ),
+                ToolMessage(content="LONG_PHASE_DONE", name="Bash", tool_call_id="tc-bash"),
+            ]
+        }
+    }
+
+    async for _ in loop.query(None, config={"configurable": {"thread_id": "steer-history-thread"}}):
+        pass
+
+    fake_agent = SimpleNamespace(agent=loop)
+    fake_app = SimpleNamespace(state=SimpleNamespace())
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(
+            "steer-history-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert [item["role"] for item in history["messages"]] == [
+        "human",
+        "tool_call",
+        "tool_result",
+        "human",
+        "assistant",
+    ]
+    assert history["messages"][3]["text"] == "Stop and just say STEER_DONE."
+    assert history["messages"][4]["text"] == "STEER_DONE"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_adds_non_preemptive_steer_contract_before_terminal_reply(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "Stop immediately. Do not continue the old task. Reply exactly STOPPED_NOW and do not write any file.",
+        "steer-stop-honesty-thread",
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+    runtime = SimpleNamespace(events=[], emit_activity_event=lambda event: runtime.events.append(event))
+    loop = _make_loop(
+        model=_StopHonestyAwareModel(),
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    checkpointer.store["steer-stop-honesty-thread"] = {
+        "channel_values": {
+            "messages": [
+                HumanMessage(content="Run the long bash."),
+                AIMessage(
+                    content="",
+                    tool_calls=[{"name": "Bash", "args": {"command": "sleep 15; echo LONG_PHASE_DONE"}, "id": "tc-bash"}],
+                ),
+                ToolMessage(content="LONG_PHASE_DONE", name="Bash", tool_call_id="tc-bash"),
+            ]
+        }
+    }
+
+    async for _ in loop.query(None, config={"configurable": {"thread_id": "steer-stop-honesty-thread"}}):
+        pass
+
+    state = await loop.aget_state({"configurable": {"thread_id": "steer-stop-honesty-thread"}})
+    persisted = state.values["messages"]
+
+    assert [msg.__class__.__name__ for msg in persisted] == [
+        "HumanMessage",
+        "AIMessage",
+        "ToolMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert persisted[3].content == "Stop immediately. Do not continue the old task. Reply exactly STOPPED_NOW and do not write any file."
+    assert persisted[4].content == "STOP_ACK_AFTER_COMPLETED_WORK"
+
+
+@pytest.mark.asyncio
+async def test_cancelled_midrun_steer_persists_and_does_not_poison_next_turn(monkeypatch, tmp_path):
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+    checkpointer = _MemoryCheckpointer()
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    runtime = _StreamingRuntime()
+    tool_started = asyncio.Event()
+
+    async def sleep_tool() -> str:
+        tool_started.set()
+        try:
+            await asyncio.sleep(60)
+        except asyncio.CancelledError:
+            raise
+        return "SLEPT"
+
+    registry = ToolRegistry()
+    registry.register(
+        ToolEntry(
+            name="SleepTool",
+            mode=ToolMode.INLINE,
+            schema={"name": "SleepTool", "description": "sleep", "parameters": {}},
+            handler=sleep_tool,
+            source="test",
+        )
+    )
+    loop = _make_loop(
+        model=_SteerCancelPoisonModel(),
+        registry=registry,
+        checkpointer=checkpointer,
+        middleware=[SteeringMiddleware(queue_manager=queue_manager, agent_runtime=runtime)],
+    )
+    agent = SimpleNamespace(
+        agent=loop,
+        runtime=runtime,
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=queue_manager,
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_id = "steer-cancel-poison-thread"
+    config = {"configurable": {"thread_id": thread_id}}
+
+    start_agent_run(agent, thread_id, "start", app)
+    task = app.state.thread_tasks[thread_id]
+
+    await asyncio.wait_for(tool_started.wait(), timeout=2)
+    queue_manager.enqueue(
+        "Stop and just say STEER_DONE.",
+        thread_id,
+        notification_type="steer",
+        source="owner",
+        is_steer=True,
+    )
+
+    task.cancel()
+    await asyncio.gather(task, return_exceptions=True)
+
+    assert queue_manager.list_queue(thread_id) == []
+    assert app.state.thread_tasks.get(thread_id) is None
+    assert runtime.current_state == AgentState.IDLE
+
+    state_after_cancel = await loop.aget_state(config)
+    cancelled_contents = [getattr(msg, "content", "") for msg in state_after_cancel.values["messages"]]
+    assert cancelled_contents[:2] == ["start", "Stop and just say STEER_DONE."]
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "fresh user message"}]},
+        config=config,
+    ):
+        pass
+
+    final_state = await loop.aget_state(config)
+    final_contents = [getattr(msg, "content", "") for msg in final_state.values["messages"]]
+    assert final_contents == [
+        "start",
+        "Stop and just say STEER_DONE.",
+        "fresh user message",
+        "LAST_HUMAN:fresh user message",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_get_thread_messages_rebuilds_idle_thread_when_cached_entries_are_stale():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="history reply", checkpointer=checkpointer)
+    config = {"configurable": {"thread_id": "detail-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hello"}]},
+        config=config,
+    ):
+        pass
+
+    display_builder = _FakeDisplayBuilder(cached_entries=[{"id": "stale-turn", "role": "assistant", "segments": []}])
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=display_builder))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "detail-thread",
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert detail["entries"] == [{"id": "rebuilt-notice", "role": "notice", "content": "rebuilt"}]
+    assert display_builder.rebuilt_with is not None
+    rebuilt_thread_id, rebuilt_messages = display_builder.rebuilt_with
+    assert rebuilt_thread_id == "detail-thread"
+    assert [msg["type"] for msg in rebuilt_messages] == ["HumanMessage", "AIMessage"]
+
+
+@pytest.mark.asyncio
+async def test_get_thread_messages_idle_rebuild_replays_latest_run_error_from_event_log():
+    human = HumanMessage(content="hello")
+    fake_agent = SimpleNamespace(
+        agent=SimpleNamespace(aget_state=AsyncMock(return_value=SimpleNamespace(values={"messages": [human]}))),
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+    run_events = [
+        {
+            "seq": 1,
+            "event": "run_start",
+            "data": json.dumps(
+                {
+                    "thread_id": "detail-thread",
+                    "run_id": "run-error-1",
+                    "source": "owner",
+                    "showing": True,
+                }
+            ),
+            "message_id": None,
+        },
+        {
+            "seq": 2,
+            "event": "error",
+            "data": json.dumps({"error": "quota exploded"}),
+            "message_id": None,
+        },
+        {
+            "seq": 3,
+            "event": "run_done",
+            "data": json.dumps({"thread_id": "detail-thread", "run_id": "run-error-1"}),
+            "message_id": None,
+        },
+    ]
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+        patch("backend.web.services.event_store.get_latest_run_id", AsyncMock(return_value="run-error-1")),
+        patch("backend.web.services.event_store.read_events_after", AsyncMock(return_value=run_events)),
+    ):
+        detail = await get_thread_messages(
+            "detail-thread",
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert detail["entries"][0]["role"] == "user"
+    assert any(
+        entry.get("role") == "assistant"
+        and any(segment.get("type") == "text" and "quota exploded" in segment.get("content", "") for segment in entry.get("segments", []))
+        for entry in detail["entries"]
+    )
+
+
+@pytest.mark.asyncio
+async def test_cold_rebuild_surfaces_persisted_compaction_notice_in_detail_and_history():
+    checkpointer = _MemoryCheckpointer()
+    summary_model = MagicMock()
+    summary_model.bind.return_value = summary_model
+    summary_model.ainvoke = AsyncMock(return_value=AIMessage(content="SUMMARY"))
+    memory = MemoryMiddleware(
+        context_limit=40,
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+        compaction_threshold=0.1,
+    )
+    memory.set_model(summary_model)
+    loop = _make_loop(
+        text="after compact",
+        checkpointer=checkpointer,
+        middleware=[memory],
+    )
+    config = {"configurable": {"thread_id": "compact-thread"}}
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in loop.query({"messages": history}, config=config):
+        pass
+
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "compact-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        rebuilt_history = await get_thread_history(
+            "compact-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        any(segment.get("type") == "notice" and segment.get("notification_type") == "compact" for segment in entry.get("segments", []))
+        for entry in detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+    assert any(
+        item.get("role") == "notification" and "Conversation compacted" in item.get("text", "") for item in rebuilt_history["messages"]
+    )
+
+
+@pytest.mark.asyncio
+async def test_cold_rebuild_surfaces_persisted_prompt_too_long_notice_after_recovery_exhausts():
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(
+        model=_PromptTooLongTwiceModel(),
+        checkpointer=checkpointer,
+        middleware=[_BridgeReactiveCompactMiddleware()],
+    )
+    config = {"configurable": {"thread_id": "prompt-too-long-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "start"}]},
+        config=config,
+    ):
+        pass
+
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "prompt-too-long-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        rebuilt_history = await get_thread_history(
+            "prompt-too-long-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        any(
+            segment.get("type") == "notice" and "Prompt is too long. Automatic recovery exhausted." in segment.get("content", "")
+            for segment in entry.get("segments", [])
+        )
+        for entry in detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+    assert any(
+        item.get("role") == "notification" and "Prompt is too long. Automatic recovery exhausted." in item.get("text", "")
+        for item in rebuilt_history["messages"]
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("task_status", "result_text"),
+    [
+        ("completed", "CHILD_DONE"),
+        ("error", "Agent failed"),
+        ("cancelled", "Agent cancelled"),
+    ],
+)
+async def test_get_thread_messages_idle_rebuild_keeps_terminal_subagent_stream_status(
+    task_status: str,
+    result_text: str,
+):
+    ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Agent", "args": {"prompt": "do work", "run_in_background": True}, "id": "tc-agent-1"}],
+    )
+    tool = ToolMessage(
+        content=(
+            '{"task_id":"task-123","agent_name":"agent-task-123",'
+            '"thread_id":"subagent-task-123","status":"running",'
+            '"message":"Agent started in background. Use TaskOutput to get result."}'
+        ),
+        name="Agent",
+        tool_call_id="tc-agent-1",
+    )
+    notice = HumanMessage(
+        content=(
+            "<system-reminder>\n"
+            "<task-notification>\n"
+            "  <run-id>task-123</run-id>\n"
+            f"  <status>{task_status}</status>\n"
+            "  <description>child task</description>\n"
+            "  <summary>child task</summary>\n"
+            f"  <result>{result_text}</result>\n"
+            "</task-notification>\n"
+            "</system-reminder>"
+        )
+    )
+    notice.metadata = {"source": "system", "notification_type": "agent"}
+
+    fake_agent = SimpleNamespace(
+        agent=SimpleNamespace(aget_state=AsyncMock(return_value=SimpleNamespace(values={"messages": [ai, tool, notice]}))),
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "parent-thread",
+            user_id="u",
+            app=fake_app,
+        )
+
+    seg = detail["entries"][0]["segments"][0]
+    assert seg["step"]["subagent_stream"]["task_id"] == "task-123"
+    assert seg["step"]["subagent_stream"]["thread_id"] == "subagent-task-123"
+    assert seg["step"]["subagent_stream"]["status"] == task_status
+
+
+@pytest.mark.asyncio
+async def test_compaction_clear_then_recovery_notice_rebuilds_honestly(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    summary_model = MagicMock()
+    summary_model.bind.return_value = summary_model
+    summary_model.ainvoke = AsyncMock(return_value=AIMessage(content="SUMMARY"))
+
+    memory = MemoryMiddleware(
+        context_limit=40,
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+        compaction_threshold=0.1,
+        db_path=tmp_path / "compaction-lifecycle.db",
+    )
+    memory.set_model(summary_model)
+    config = {"configurable": {"thread_id": "compaction-lifecycle-thread"}}
+    compact_loop = _make_loop(
+        text="after compact",
+        checkpointer=checkpointer,
+        middleware=[memory],
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in compact_loop.query({"messages": history}, config=config):
+        pass
+
+    assert memory.summary_store is not None
+    assert memory.summary_store.get_latest_summary("compaction-lifecycle-thread") is not None
+
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+    fake_agent = SimpleNamespace(
+        agent=compact_loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        compact_detail = await get_thread_messages(
+            "compaction-lifecycle-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        compact_history = await get_thread_history(
+            "compaction-lifecycle-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        item.get("role") == "notification" and "Conversation compacted" in item.get("text", "") for item in compact_history["messages"]
+    )
+    assert any(
+        any(
+            segment.get("type") == "notice" and "Conversation compacted" in segment.get("content", "")
+            for segment in entry.get("segments", [])
+        )
+        for entry in compact_detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+
+    await compact_loop.aclear("compaction-lifecycle-thread")
+
+    assert memory.summary_store.get_latest_summary("compaction-lifecycle-thread") is None
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        cleared_detail = await get_thread_messages(
+            "compaction-lifecycle-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        cleared_history = await get_thread_history(
+            "compaction-lifecycle-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert cleared_detail["entries"] == []
+    assert cleared_history["messages"] == []
+
+    recovery_loop = _make_loop(
+        model=_PromptTooLongTwiceModel(),
+        checkpointer=checkpointer,
+        middleware=[_BridgeReactiveCompactMiddleware()],
+    )
+    recovery_agent = SimpleNamespace(
+        agent=recovery_loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+
+    async for _ in recovery_loop.query(
+        {"messages": [{"role": "user", "content": "start"}]},
+        config=config,
+    ):
+        pass
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=recovery_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        recovery_detail = await get_thread_messages(
+            "compaction-lifecycle-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        recovery_history = await get_thread_history(
+            "compaction-lifecycle-thread",
+            limit=20,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    notices = [item for item in recovery_history["messages"] if item.get("role") == "notification"]
+    assert notices == [
+        {
+            "role": "notification",
+            "text": "Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one.",
+        }
+    ]
+    assert not any("Conversation compacted" in item.get("text", "") for item in recovery_history["messages"])
+    assert any(
+        any(
+            segment.get("type") == "notice" and "Prompt is too long. Automatic recovery exhausted." in segment.get("content", "")
+            for segment in entry.get("segments", [])
+        )
+        for entry in recovery_detail["entries"]
+        if entry.get("role") == "assistant"
+    )
+
+
+@pytest.mark.asyncio
+async def test_cold_rebuild_surfaces_compaction_breaker_notice_after_repeated_failures(tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    model = _QueryOkWithFailingCompactorModel()
+    memory = MemoryMiddleware(
+        context_limit=10000,
+        compaction_threshold=0.5,
+        db_path=tmp_path / "compaction-breaker.db",
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+    )
+    memory.set_model(model)
+    loop = _make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        middleware=[memory],
+    )
+    config = {"configurable": {"thread_id": "compaction-breaker-thread"}}
+
+    for attempt in range(3):
+        async for _ in loop.query(
+            {
+                "messages": [
+                    {"role": "user", "content": "A" * 8000},
+                    {"role": "assistant", "content": "B" * 8000},
+                    {"role": "user", "content": f"start {attempt} " + ("C" * 8000)},
+                ]
+            },
+            config=config,
+        ):
+            pass
+
+    fake_agent = SimpleNamespace(
+        agent=loop,
+        runtime=SimpleNamespace(current_state=AgentState.IDLE),
+    )
+    fake_app = SimpleNamespace(state=SimpleNamespace(display_builder=DisplayBuilder()))
+
+    with (
+        patch("backend.web.routers.threads.get_or_create_agent", return_value=fake_agent),
+        patch("backend.web.routers.threads.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.routers.threads.get_sandbox_info", return_value={"type": "local"}),
+    ):
+        detail = await get_thread_messages(
+            "compaction-breaker-thread",
+            user_id="u",
+            app=fake_app,
+        )
+        rebuilt_history = await get_thread_history(
+            "compaction-breaker-thread",
+            limit=50,
+            truncate=300,
+            user_id="u",
+            app=fake_app,
+        )
+
+    assert any(
+        entry.get("role") == "assistant"
+        and any(
+            seg.get("type") == "notice"
+            and "Automatic compaction disabled for this thread after repeated failures." in seg.get("content", "")
+            for seg in entry.get("segments", [])
+        )
+        for entry in detail["entries"]
+    )
+    assert any(
+        item.get("role") == "notification"
+        and "Automatic compaction disabled for this thread after repeated failures." in item.get("text", "")
+        for item in rebuilt_history["messages"]
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_emits_notice_for_system_agent_notifications(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    agent = SimpleNamespace(
+        agent=_StreamingGraphAgent(),
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=MessageQueueManager(db_path=str(tmp_path / "queue.db")),
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-notice",
+        "<system-reminder><task-notification><status>completed</status></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-notice",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    entries = app.state.display_builder.get_entries("thread-notice")
+    assert entries is not None
+    assert entries[0]["segments"] == [
+        {
+            "type": "notice",
+            "content": "<system-reminder><task-notification><status>completed</status></task-notification></system-reminder>",
+            "notification_type": "agent",
+        }
+    ]
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_persists_terminal_notifications_before_assistant_followthrough(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(checkpointer=checkpointer)
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        "thread-terminal-history",
+        notification_type="agent",
+        source="system",
+    )
+
+    agent = SimpleNamespace(
+        agent=loop,
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=queue_manager,
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-terminal-history",
+        "<system-reminder><task-notification><status>completed</status><result>BG_OK</result></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-terminal-history",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    state = await loop.aget_state({"configurable": {"thread_id": "thread-terminal-history"}})
+
+    assert [msg.__class__.__name__ for msg in state.values["messages"]] == [
+        "HumanMessage",
+        "HumanMessage",
+        "AIMessage",
+    ]
+    assert "BG_OK" in state.values["messages"][0].content
+    assert "Agent failed" in state.values["messages"][1].content
+    assert state.values["messages"][2].content == "done"
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_resumes_graph_for_terminal_background_notifications(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    graph = _NoResumeGraphAgent()
+    agent = SimpleNamespace(
+        agent=graph,
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=MessageQueueManager(db_path=str(tmp_path / "queue.db")),
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-terminal-notice",
+        "<system-reminder><task-notification><status>completed</status><result>BG_SEEN:RESULT:3</result></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-terminal-notice",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    assert graph.astream_calls == 1
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    (
+        "thread_id",
+        "run_id",
+        "message",
+        "message_metadata",
+        "notice_contains",
+        "expected_text",
+    ),
+    [
+        (
+            "thread-terminal-followthrough",
+            "run-terminal-followthrough",
+            "<system-reminder><task-notification><status>completed</status><result>BG_OK</result></task-notification></system-reminder>",
+            {"source": "system", "notification_type": "agent"},
+            "BG_OK",
+            "AFTER_BG_DONE",
+        ),
+        (
+            "thread-command-followthrough",
+            "run-command-followthrough",
+            "<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+            {"source": "system", "notification_type": "command"},
+            "CommandNotification",
+            "AFTER_COMMAND_DONE",
+        ),
+        (
+            "thread-command-cancel-followthrough",
+            "run-command-cancel-followthrough",
+            '<CommandNotification task_id="cmd-x" status="cancelled"><Status>cancelled</Status><Description>cancelled task</Description></CommandNotification>',
+            {"source": "system", "notification_type": "command"},
+            "cancelled",
+            "AFTER_COMMAND_CANCELLED",
+        ),
+    ],
+)
+async def test_run_agent_to_buffer_surfaces_notice_then_assistant_followthrough(
+    monkeypatch,
+    tmp_path,
+    thread_id: str,
+    run_id: str,
+    message: str,
+    message_metadata: dict[str, str],
+    notice_contains: str,
+    expected_text: str,
+):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text=expected_text, checkpointer=checkpointer)
+
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id=thread_id,
+        message=message,
+        run_id=run_id,
+        message_metadata=message_metadata,
+    )
+
+    _assert_notice_then_text(entries, notice_contains, expected_text)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("thread_id", "message", "notification_type", "expected_notice", "expected_text"),
+    [
+        (
+            "thread-route-followthrough",
+            "<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+            "command",
+            "CommandNotification",
+            "AFTER_QUEUE_WAKE",
+        ),
+        (
+            "thread-route-agent-followthrough",
+            "<system-reminder><task-notification><status>completed</status><summary>Simple background tool test</summary><result>Simple Background Tool Test Done</result></task-notification></system-reminder>",
+            "agent",
+            "Simple Background Tool Test Done",
+            "AFTER_AGENT_WAKE",
+        ),
+        (
+            "thread-route-agent-error-followthrough",
+            "<system-reminder><task-notification><status>error</status><summary>Simple background tool test</summary><result>Agent failed</result></task-notification></system-reminder>",
+            "agent",
+            "Agent failed",
+            "AFTER_AGENT_ERROR_WAKE",
+        ),
+    ],
+)
+async def test_queue_wake_handler_starts_terminal_followthrough_run(
+    monkeypatch,
+    tmp_path,
+    thread_id: str,
+    message: str,
+    notification_type: str,
+    expected_notice: str,
+    expected_text: str,
+):
+    _patch_streaming_event_store(monkeypatch)
+
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text=expected_text, checkpointer=checkpointer)
+    queue_manager, agent, app = _make_route_followthrough_context(tmp_path, thread_id=thread_id, loop=loop)
+
+    queue_manager.enqueue(
+        message,
+        thread_id,
+        notification_type=notification_type,
+        source="system",
+    )
+
+    await _wait_for_followthrough_text(loop, thread_id, expected_text)
+    history = await _get_local_thread_history(thread_id, agent=agent, app=app)
+
+    assert [item["role"] for item in history["messages"]] == ["notification", "assistant"]
+    assert expected_notice in history["messages"][0]["text"]
+    assert history["messages"][1]["text"] == expected_text
+
+
+@pytest.mark.asyncio
+async def test_cancelled_task_notification_wakes_followthrough_run(monkeypatch, tmp_path):
+    _patch_streaming_event_store(monkeypatch)
+    _patch_fake_event_bus(monkeypatch)
+
+    thread_id = "thread-route-cancel-followthrough"
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(text="AFTER_CANCEL_WAKE", checkpointer=checkpointer)
+    queue_manager, agent, app = _make_route_followthrough_context(tmp_path, thread_id=thread_id, loop=loop)
+    run = SimpleNamespace(is_done=True, description="cancelled task", command="echo hi")
+    await threads_router._notify_task_cancelled(app, thread_id, "cmd-cancel", run)
+
+    await _wait_for_followthrough_text(loop, thread_id, "AFTER_CANCEL_WAKE")
+    history = await _get_local_thread_history(thread_id, agent=agent, app=app)
+    assert [item["role"] for item in history["messages"]] == ["notification", "assistant"]
+    assert "cancelled" in history["messages"][0]["text"]
+    assert history["messages"][1]["text"] == "AFTER_CANCEL_WAKE"
+
+
+@pytest.mark.asyncio
+async def test_send_message_route_then_agent_terminal_notification_reenters_followthrough(monkeypatch, tmp_path):
+    _patch_streaming_event_store(monkeypatch)
+
+    thread_id = "thread-route-send-message-followthrough"
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_TurnTextModel("OWNER_OK", "AFTER_AGENT_ROUTE_WAKE"), checkpointer=checkpointer)
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    agent = _make_streaming_agent(loop, queue_manager=queue_manager)
+    app, _ = _make_streaming_app(
+        tmp_path,
+        thread_id=thread_id,
+        agent=agent,
+        queue_manager=queue_manager,
+        include_route_locks=True,
+    )
+
+    with (
+        patch("backend.web.services.agent_pool.get_or_create_agent", AsyncMock(return_value=agent)),
+        patch("backend.web.services.agent_pool.resolve_thread_sandbox", return_value="local"),
+    ):
+        result = await threads_router.send_message(
+            thread_id,
+            SendMessageRequest(message="start owner turn"),
+            user_id="u",
+            app=app,
+        )
+
+    assert result["status"] == "started"
+    await _wait_for_followthrough_text(loop, thread_id, "OWNER_OK")
+
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>completed</status><summary>Simple background tool test</summary><result>Simple Background Tool Test Done</result></task-notification></system-reminder>",
+        thread_id,
+        notification_type="agent",
+        source="system",
+    )
+
+    await _wait_for_followthrough_text(loop, thread_id, "AFTER_AGENT_ROUTE_WAKE")
+
+    with (
+        patch.object(threads_router, "get_or_create_agent", return_value=agent),
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+    ):
+        history = await get_thread_history(thread_id, limit=20, truncate=400, user_id="u", app=app)
+
+    assert [item["role"] for item in history["messages"]] == ["human", "assistant", "notification", "assistant"]
+    assert history["messages"][0]["text"] == "start owner turn"
+    assert history["messages"][1]["text"] == "OWNER_OK"
+    assert "Simple Background Tool Test Done" in history["messages"][2]["text"]
+    assert history["messages"][3]["text"] == "AFTER_AGENT_ROUTE_WAKE"
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_adds_terminal_followthrough_system_note_to_prevent_silent_completion(monkeypatch, tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_TerminalFollowthroughPromptAwareModel(), checkpointer=checkpointer)
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id="thread-terminal-followthrough-note",
+        message="<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+        run_id="run-terminal-followthrough-note",
+        message_metadata={"source": "system", "notification_type": "command"},
+    )
+    _assert_notice_then_text(entries, "CommandNotification", "FOLLOWTHROUGH_ACK")
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_turns_silent_terminal_reentry_into_visible_followthrough(monkeypatch, tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_TerminalFollowthroughSilentModel(), checkpointer=checkpointer)
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id="thread-terminal-followthrough-silent",
+        message="<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+        run_id="run-terminal-followthrough-silent",
+        message_metadata={"source": "system", "notification_type": "command"},
+    )
+    _assert_notice_then_text(
+        entries,
+        "CommandNotification",
+        "Background command completed, but the followthrough assistant reply was empty.",
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_turns_silent_chat_notification_into_visible_followthrough(monkeypatch, tmp_path):
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_ChatNotificationSilentModel(), checkpointer=checkpointer)
+    entries = await _run_direct_notification_followthrough(
+        monkeypatch,
+        tmp_path,
+        loop=loop,
+        thread_id="thread-chat-followthrough-silent",
+        message='<system-reminder>\nNew message from alice in chat chat-123 (1 unread).\nRead it with read_messages(chat_id="chat-123").\nReply with send_message(chat_id="chat-123", content="...").\nDo not treat your normal assistant text as a chat reply.\n</system-reminder>',
+        run_id="run-chat-followthrough-silent",
+        message_metadata={"source": "external", "notification_type": "chat"},
+    )
+    _assert_notice_then_text(
+        entries,
+        'read_messages(chat_id="chat-123")',
+        'I received a chat notification, but the followthrough assistant reply was empty. Read it with read_messages(chat_id="chat-123") before deciding whether to reply.',
+    )
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_tags_display_delta_with_source_seq(monkeypatch, tmp_path):
+    _patch_streaming_event_store(monkeypatch)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    checkpointer = _MemoryCheckpointer()
+    loop = _make_loop(model=_NoToolModel("SEQ_OK"), checkpointer=checkpointer)
+    agent, app, thread_buf = _make_direct_streaming_context(tmp_path, loop)
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-display-delta-seq",
+        "hello",
+        app,
+        False,
+        thread_buf,
+        "run-display-delta-seq",
+    )
+
+    events, _ = await thread_buf.read_with_timeout(0, timeout=0.01)
+    assert events is not None
+    display_deltas = [json.loads(event["data"]) for event in events if event.get("event") == "display_delta"]
+    assert display_deltas
+    assert all(isinstance(delta.get("_seq"), int) for delta in display_deltas)
+
+
+@pytest.mark.asyncio
+async def test_run_agent_to_buffer_batches_additional_terminal_notifications(monkeypatch, tmp_path):
+    seq = 0
+
+    async def fake_append_event(thread_id, run_id, event, message_id=None, run_event_repo=None):
+        nonlocal seq
+        seq += 1
+        return seq
+
+    async def fake_cleanup_old_runs(thread_id, keep_latest=1, run_event_repo=None):
+        return 0
+
+    monkeypatch.setattr("backend.web.services.event_store.append_event", fake_append_event)
+    monkeypatch.setattr("backend.web.services.streaming_service.cleanup_old_runs", fake_cleanup_old_runs)
+    monkeypatch.setattr("backend.web.services.streaming_service._ensure_thread_handlers", lambda *args, **kwargs: None)
+
+    start_calls: list[tuple[str, str, dict | None]] = []
+
+    def fake_start_agent_run(agent, thread_id, message, app, enable_trajectory=False, message_metadata=None):
+        start_calls.append((thread_id, message, message_metadata))
+        return "run-next"
+
+    monkeypatch.setattr("backend.web.services.streaming_service.start_agent_run", fake_start_agent_run)
+
+    queue_manager = MessageQueueManager(db_path=str(tmp_path / "queue.db"))
+    queue_manager.enqueue(
+        "<system-reminder><task-notification><status>error</status><result>Agent failed</result></task-notification></system-reminder>",
+        "thread-batch-notice",
+        notification_type="agent",
+    )
+    queue_manager.enqueue(
+        "<system-reminder><CommandNotification><Status>completed</Status><Output>42</Output></CommandNotification></system-reminder>",
+        "thread-batch-notice",
+        notification_type="command",
+    )
+
+    agent = SimpleNamespace(
+        agent=_StreamingGraphAgent(),
+        runtime=_StreamingRuntime(),
+        storage_container=None,
+    )
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            display_builder=DisplayBuilder(),
+            thread_tasks={},
+            thread_event_buffers={},
+            subagent_buffers={},
+            queue_manager=queue_manager,
+            thread_last_active={},
+            typing_tracker=None,
+        )
+    )
+    thread_buf = ThreadEventBuffer()
+
+    await _run_agent_to_buffer(
+        agent,
+        "thread-batch-notice",
+        "<system-reminder><task-notification><status>completed</status><result>BG_OK</result></task-notification></system-reminder>",
+        app,
+        False,
+        thread_buf,
+        "run-batch-notice",
+        message_metadata={"source": "system", "notification_type": "agent"},
+    )
+
+    entries = app.state.display_builder.get_entries("thread-batch-notice")
+    assert entries is not None
+    notice_segments = [segment for segment in entries[0]["segments"] if segment.get("type") == "notice"]
+    assert len(notice_segments) == 3
+    assert "BG_OK" in notice_segments[0]["content"]
+    assert "Agent failed" in notice_segments[1]["content"]
+    assert "CommandNotification" in notice_segments[2]["content"]
+    assert start_calls == []
+    assert queue_manager.list_queue("thread-batch-notice") == []
diff --git a/tests/test_queue_mode_integration.py b/tests/Integration/test_queue_mode_integration.py
similarity index 100%
rename from tests/test_queue_mode_integration.py
rename to tests/Integration/test_queue_mode_integration.py
diff --git a/tests/test_real_multiround.py b/tests/Integration/test_real_multiround.py
similarity index 100%
rename from tests/test_real_multiround.py
rename to tests/Integration/test_real_multiround.py
diff --git a/tests/test_sse_reconnect_integration.py b/tests/Integration/test_sse_reconnect_integration.py
similarity index 100%
rename from tests/test_sse_reconnect_integration.py
rename to tests/Integration/test_sse_reconnect_integration.py
diff --git a/tests/Integration/test_storage_runtime_wiring.py b/tests/Integration/test_storage_runtime_wiring.py
new file mode 100644
index 000000000..f4303b764
--- /dev/null
+++ b/tests/Integration/test_storage_runtime_wiring.py
@@ -0,0 +1,169 @@
+"""Runtime storage wiring tests for backend agent creation path."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from backend.web.services import agent_pool
+from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
+from storage.providers.sqlite.eval_repo import SQLiteEvalRepo
+from storage.providers.supabase.checkpoint_repo import SupabaseCheckpointRepo
+
+
+class _FakeSupabaseClient:
+    def table(self, table_name: str):
+        raise AssertionError(f"table() should not be called in this wiring test: {table_name}")
+
+
+def _build_fake_supabase_client() -> _FakeSupabaseClient:
+    return _FakeSupabaseClient()
+
+
+def _build_invalid_supabase_client() -> object:
+    return object()
+
+
+def _capture_create_leon_agent(monkeypatch: pytest.MonkeyPatch) -> dict[str, Any]:
+    captured: dict[str, Any] = {}
+
+    def _fake_create_leon_agent(**kwargs):
+        captured.update(kwargs)
+        return object()
+
+    monkeypatch.setattr(agent_pool, "create_leon_agent", _fake_create_leon_agent)
+    return captured
+
+
+def test_create_agent_sync_wires_supabase_storage_container(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
+    monkeypatch.setenv(
+        "LEON_SUPABASE_CLIENT_FACTORY",
+        "tests.Integration.test_storage_runtime_wiring:_build_fake_supabase_client",
+    )
+    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
+    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
+
+    captured = _capture_create_leon_agent(monkeypatch)
+    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+
+    container = captured["storage_container"]
+    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
+
+
+def test_create_agent_sync_supabase_missing_runtime_config_fails_loud(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
+    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
+
+    with pytest.raises(
+        RuntimeError,
+        match="LEON_SUPABASE_CLIENT_FACTORY",
+    ):
+        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+
+
+def test_create_agent_sync_supabase_invalid_runtime_config_fails_loud(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
+    monkeypatch.setenv(
+        "LEON_SUPABASE_CLIENT_FACTORY",
+        "tests.Integration.test_storage_runtime_wiring:_build_invalid_supabase_client",
+    )
+
+    with pytest.raises(RuntimeError, match="callable table\\(name\\) API"):
+        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+
+
+def test_create_agent_sync_defaults_to_sqlite_storage_container(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.delenv("LEON_STORAGE_STRATEGY", raising=False)
+    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
+    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
+
+    captured = _capture_create_leon_agent(monkeypatch)
+    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+
+    container = captured["storage_container"]
+    assert isinstance(container.checkpoint_repo(), SQLiteCheckpointRepo)
+
+
+def test_create_agent_sync_enables_thread_permission_resolver_scope(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.delenv("LEON_STORAGE_STRATEGY", raising=False)
+    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
+    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
+
+    captured = _capture_create_leon_agent(monkeypatch)
+    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+
+    assert captured["permission_resolver_scope"] == "thread"
+
+
+def test_create_agent_sync_repo_override_supabase_with_sqlite_default(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "sqlite")
+    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"checkpoint_repo":"supabase"}')
+    monkeypatch.setenv(
+        "LEON_SUPABASE_CLIENT_FACTORY",
+        "tests.Integration.test_storage_runtime_wiring:_build_fake_supabase_client",
+    )
+    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
+
+    captured = _capture_create_leon_agent(monkeypatch)
+    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+    container = captured["storage_container"]
+    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
+
+
+def test_create_agent_sync_repo_override_sqlite_with_supabase_default(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
+    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"eval_repo":"sqlite"}')
+    monkeypatch.setenv(
+        "LEON_SUPABASE_CLIENT_FACTORY",
+        "tests.Integration.test_storage_runtime_wiring:_build_fake_supabase_client",
+    )
+    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
+    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
+
+    captured = _capture_create_leon_agent(monkeypatch)
+    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+    container = captured["storage_container"]
+    assert isinstance(container.eval_repo(), SQLiteEvalRepo)
+
+
+def test_create_agent_sync_repo_override_supabase_without_runtime_config_fails_loud(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "sqlite")
+    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"checkpoint_repo":"supabase"}')
+    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
+
+    with pytest.raises(RuntimeError, match="LEON_SUPABASE_CLIENT_FACTORY"):
+        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
+
+
+def test_create_agent_sync_invalid_repo_override_json_fails_loud(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", "not-json")
+
+    with pytest.raises(RuntimeError, match="Invalid LEON_STORAGE_REPO_PROVIDERS"):
+        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
diff --git a/tests/Integration/test_threads_router.py b/tests/Integration/test_threads_router.py
new file mode 100644
index 000000000..21daac42b
--- /dev/null
+++ b/tests/Integration/test_threads_router.py
@@ -0,0 +1,893 @@
+from __future__ import annotations
+
+import json
+from contextlib import contextmanager
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
+
+from backend.web.models.requests import CreateThreadRequest
+from backend.web.routers import threads as threads_router
+from core.runtime.loop import QueryLoop
+from core.runtime.middleware.monitor import AgentState
+from core.runtime.registry import ToolRegistry
+from core.runtime.state import AppState, BootstrapConfig, ToolPermissionState
+from storage.contracts import MemberRow, MemberType
+
+
+class _FakeMemberRepo:
+    def __init__(self) -> None:
+        self._members = {
+            "member-1": MemberRow(
+                id="member-1",
+                name="Toad",
+                type=MemberType.MYCEL_AGENT,
+                owner_user_id="owner-1",
+                created_at=1.0,
+            )
+        }
+        self._seq = {"member-1": 0}
+
+    def get_by_id(self, member_id: str):
+        return self._members.get(member_id)
+
+    def increment_entity_seq(self, member_id: str) -> int:
+        self._seq[member_id] += 1
+        return self._seq[member_id]
+
+
+class _FakeThreadRepo:
+    def __init__(self) -> None:
+        self.rows: dict[str, dict] = {}
+
+    def get_by_id(self, thread_id: str):
+        row = self.rows.get(thread_id)
+        if row is None:
+            return None
+        return {"id": thread_id, **row}
+
+    def get_main_thread(self, member_id: str):
+        for row in self.rows.values():
+            if row["member_id"] == member_id and row["is_main"]:
+                return {"id": row["thread_id"], **row}
+        return None
+
+    def get_next_branch_index(self, member_id: str) -> int:
+        indices = [row["branch_index"] for row in self.rows.values() if row["member_id"] == member_id]
+        return max(indices, default=0) + 1
+
+    def create(self, **kwargs):
+        self.rows[kwargs["thread_id"]] = dict(kwargs)
+
+
+class _FakeEntityRepo:
+    def __init__(self) -> None:
+        self.rows = []
+
+    def create(self, row):
+        self.rows.append(row)
+
+    def get_by_id(self, entity_id: str):
+        for row in self.rows:
+            if row.id == entity_id:
+                return row
+        return None
+
+    def update_thread_id(self, entity_id: str, thread_id: str):
+        row = self.get_by_id(entity_id)
+        if row is not None:
+            row.thread_id = thread_id
+
+
+class _FakeAuthService:
+    def __init__(self) -> None:
+        self.tokens: list[str] = []
+
+    def verify_token(self, token: str) -> dict:
+        self.tokens.append(token)
+        return {"user_id": "owner-1"}
+
+
+class _FakeRequest:
+    def __init__(self, headers: dict[str, str] | None = None) -> None:
+        self.headers = headers or {}
+
+
+class _FakePermissionAgent:
+    def __init__(self) -> None:
+        self.pending = [
+            {
+                "request_id": "perm-1",
+                "thread_id": "thread-1",
+                "tool_name": "Write",
+                "args": {"path": "/tmp/demo.txt"},
+                "message": "needs approval",
+            }
+        ]
+        self.session_rules = {
+            "allow": ["Read"],
+            "deny": ["Bash"],
+            "ask": ["Edit"],
+        }
+        self.managed_only = False
+        self.resolve_calls: list[tuple[str, str, str | None, list[dict] | None, dict | None]] = []
+        self.rule_add_calls: list[tuple[str, str]] = []
+        self.rule_remove_calls: list[tuple[str, str]] = []
+        self.agent = SimpleNamespace(
+            aget_state=AsyncMock(return_value=SimpleNamespace(values={})),
+            apersist_state=AsyncMock(),
+        )
+
+    def get_pending_permission_requests(self, thread_id: str | None = None):
+        if thread_id is None:
+            return list(self.pending)
+        return [item for item in self.pending if item["thread_id"] == thread_id]
+
+    def resolve_permission_request(
+        self,
+        request_id: str,
+        *,
+        decision: str,
+        message: str | None = None,
+        answers: list[dict] | None = None,
+        annotations: dict | None = None,
+    ) -> bool:
+        self.resolve_calls.append((request_id, decision, message, answers, annotations))
+        if request_id != "perm-1":
+            return False
+        self.pending = []
+        return True
+
+    def drop_permission_request(self, request_id: str) -> bool:
+        before = len(self.pending)
+        self.pending = [item for item in self.pending if item["request_id"] != request_id]
+        return len(self.pending) != before
+
+    def get_thread_permission_rules(self, thread_id: str) -> dict[str, object]:
+        return {
+            "thread_id": thread_id,
+            "scope": "session",
+            "managed_only": self.managed_only,
+            "rules": dict(self.session_rules),
+        }
+
+    def add_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        self.rule_add_calls.append((behavior, tool_name))
+        if self.managed_only:
+            return False
+        for bucket in self.session_rules.values():
+            if tool_name in bucket:
+                bucket.remove(tool_name)
+        bucket = self.session_rules.setdefault(behavior, [])
+        if tool_name not in bucket:
+            bucket.append(tool_name)
+        return True
+
+    def remove_thread_permission_rule(self, thread_id: str, *, behavior: str, tool_name: str) -> bool:
+        self.rule_remove_calls.append((behavior, tool_name))
+        bucket = self.session_rules.get(behavior, [])
+        if tool_name not in bucket:
+            return False
+        bucket.remove(tool_name)
+        return True
+
+
+class _MemoryCheckpointer:
+    def __init__(self, channel_values: dict | None = None) -> None:
+        self._checkpoint = {"channel_values": dict(channel_values or {})}
+
+    async def aget(self, _cfg):
+        return self._checkpoint
+
+
+class _LivePendingPermissionAgent:
+    def __init__(self) -> None:
+        app_state = AppState(
+            tool_permission_context=ToolPermissionState(alwaysAskRules={"session": ["Bash"]}),
+            pending_permission_requests={
+                "perm-live": {
+                    "request_id": "perm-live",
+                    "thread_id": "thread-1",
+                    "tool_name": "Bash",
+                    "args": {"command": "echo hi"},
+                    "message": "Permission required by rule: Bash",
+                }
+            },
+        )
+        self.agent = QueryLoop(
+            model=MagicMock(),
+            system_prompt=SystemMessage(content="sys"),
+            middleware=[],
+            checkpointer=_MemoryCheckpointer(channel_values={"messages": []}),
+            registry=ToolRegistry(),
+            app_state=app_state,
+            runtime=SimpleNamespace(current_state=AgentState.ACTIVE),
+            bootstrap=BootstrapConfig(
+                workspace_root=Path("/tmp"),
+                model_name="test-model",
+                permission_resolver_scope="thread",
+            ),
+            max_turns=1,
+        )
+
+    def get_pending_permission_requests(self, thread_id: str | None = None):
+        requests = list(self.agent._app_state.pending_permission_requests.values())
+        if thread_id is None:
+            return requests
+        return [item for item in requests if item["thread_id"] == thread_id]
+
+    def get_thread_permission_rules(self, thread_id: str) -> dict[str, object]:
+        state = self.agent._app_state.tool_permission_context
+        return {
+            "thread_id": thread_id,
+            "scope": "session",
+            "managed_only": state.allowManagedPermissionRulesOnly,
+            "rules": {
+                "allow": list(state.alwaysAllowRules.get("session", [])),
+                "deny": list(state.alwaysDenyRules.get("session", [])),
+                "ask": list(state.alwaysAskRules.get("session", [])),
+            },
+        }
+
+
+class _FakeAskUserQuestionAgent(_FakePermissionAgent):
+    def __init__(self) -> None:
+        super().__init__()
+        self.pending = [
+            {
+                "request_id": "perm-ask",
+                "thread_id": "thread-1",
+                "tool_name": "AskUserQuestion",
+                "args": {
+                    "questions": [
+                        {
+                            "header": "Style",
+                            "question": "Choose a style",
+                            "options": [
+                                {"label": "Minimal", "description": "Keep it simple"},
+                                {"label": "Bold", "description": "Make it loud"},
+                            ],
+                        }
+                    ]
+                },
+                "message": "Please answer the following questions so Leon can continue.",
+            }
+        ]
+
+    def resolve_permission_request(
+        self,
+        request_id: str,
+        *,
+        decision: str,
+        message: str | None = None,
+        answers: list[dict] | None = None,
+        annotations: dict | None = None,
+    ) -> bool:
+        self.resolve_calls.append((request_id, decision, message, answers, annotations))
+        if request_id != "perm-ask":
+            return False
+        self.pending = []
+        return True
+
+
+class _NullLock:
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb):
+        return False
+
+
+class _FakeClearAgent:
+    def __init__(self, state: AgentState = AgentState.IDLE) -> None:
+        self.runtime = SimpleNamespace(current_state=state)
+        self.aclear_thread = AsyncMock()
+
+
+def _make_threads_app(
+    *,
+    member_repo=None,
+    thread_repo=None,
+    entity_repo=None,
+    **state_overrides,
+):
+    return SimpleNamespace(
+        state=SimpleNamespace(
+            member_repo=member_repo or _FakeMemberRepo(),
+            thread_repo=thread_repo or _FakeThreadRepo(),
+            entity_repo=entity_repo or _FakeEntityRepo(),
+            **state_overrides,
+        )
+    )
+
+
+def _make_clear_thread_app():
+    display_builder = SimpleNamespace(clear=MagicMock())
+    queue_manager = SimpleNamespace(clear_all=MagicMock())
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            display_builder=display_builder,
+            queue_manager=queue_manager,
+            thread_event_buffers={"thread-1": object()},
+        )
+    )
+    return app, display_builder, queue_manager
+
+
+@contextmanager
+def _patch_create_thread_noop_guards():
+    with (
+        patch.object(threads_router, "_validate_sandbox_provider_gate", return_value=None),
+        patch.object(threads_router, "_validate_mount_capability_gate", return_value=None),
+        patch.object(threads_router, "_create_thread_sandbox_resources", return_value=None) as create_resources,
+        patch.object(threads_router, "_invalidate_resource_overview_cache", return_value=None),
+        patch.object(threads_router, "save_last_successful_config", return_value=None),
+    ):
+        yield create_resources
+
+
+@contextmanager
+def _patch_local_clear_thread_agent(agent):
+    with (
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+        patch.object(threads_router, "get_or_create_agent", AsyncMock(return_value=agent)),
+        patch.object(threads_router, "get_thread_lock", AsyncMock(return_value=_NullLock())),
+    ):
+        yield
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_preserves_legacy_sandbox_type_alias():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox_type": "daytona_selfhost",
+            "model": "gpt-5.4-mini",
+        }
+    )
+
+    with _patch_create_thread_noop_guards():
+        result = await threads_router.create_thread(payload, "owner-1", app)
+
+    assert result["sandbox"] == "daytona_selfhost"
+    assert app.state.thread_sandbox[result["thread_id"]] == "daytona_selfhost"
+    assert app.state.thread_repo.rows[result["thread_id"]]["sandbox_type"] == "daytona_selfhost"
+
+
+@pytest.mark.asyncio
+async def test_resolve_main_thread_returns_null_for_orphaned_main_thread_metadata():
+    thread_repo = _FakeThreadRepo()
+    thread_repo.create(
+        thread_id="thread-1",
+        member_id="member-1",
+        owner_user_id="owner-1",
+        sandbox_type="local",
+        is_main=True,
+        branch_index=0,
+    )
+    app = _make_threads_app(thread_repo=thread_repo)
+
+    payload = threads_router.ResolveMainThreadRequest(member_id="member-1")
+
+    result = await threads_router.resolve_main_thread(payload, "owner-1", app)
+
+    assert result == {"thread": None}
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_uses_canonical_existing_lease_binding_helper():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "lease_id": "lease-1",
+            "cwd": "/workspace/reused",
+        }
+    )
+
+    with (
+        patch.object(
+            threads_router.sandbox_service,
+            "list_user_leases",
+            return_value=[{"lease_id": "lease-1", "provider_name": "local", "recipe": None}],
+        ),
+        patch.object(threads_router, "bind_thread_to_existing_lease", return_value="/workspace/reused") as bind_helper,
+        patch.object(threads_router, "_invalidate_resource_overview_cache", return_value=None),
+        patch.object(threads_router, "save_last_successful_config", return_value=None),
+    ):
+        result = await threads_router.create_thread(payload, "owner-1", app)
+
+    bind_helper.assert_called_once_with(
+        result["thread_id"],
+        "lease-1",
+        cwd="/workspace/reused",
+    )
+    assert app.state.thread_cwd[result["thread_id"]] == "/workspace/reused"
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_passes_local_cwd_into_sandbox_bootstrap():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "cwd": "/tmp/fresh-local-thread",
+        }
+    )
+
+    with _patch_create_thread_noop_guards() as create_resources:
+        result = await threads_router.create_thread(payload, "owner-1", app)
+
+    create_resources.assert_called_once_with(
+        result["thread_id"],
+        "local",
+        None,
+        "/tmp/fresh-local-thread",
+    )
+
+
+@pytest.mark.asyncio
+async def test_list_threads_hides_internal_subagent_threads():
+    app = _make_threads_app(
+        thread_repo=SimpleNamespace(
+            list_by_owner_user_id=lambda user_id: [
+                {
+                    "id": "main-thread",
+                    "sandbox_type": "local",
+                    "member_name": "Toad",
+                    "member_id": "member-1",
+                    "entity_name": "Toad",
+                    "branch_index": 0,
+                    "is_main": True,
+                    "member_avatar": None,
+                },
+                {
+                    "id": "subagent-deadbeef",
+                    "sandbox_type": "local",
+                    "member_name": "Toad",
+                    "member_id": "member-1",
+                    "entity_name": "worker-1",
+                    "branch_index": 1,
+                    "is_main": False,
+                    "member_avatar": None,
+                },
+            ]
+        ),
+        agent_pool={},
+        thread_last_active={},
+    )
+
+    payload = await threads_router.list_threads("owner-1", app)
+
+    assert [item["thread_id"] for item in payload["threads"]] == ["main-thread"]
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_rejects_unavailable_provider():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "sandbox": "daytona",
+        }
+    )
+
+    with patch.object(threads_router.sandbox_service, "build_provider_from_config_name", return_value=None):
+        result = await threads_router.create_thread(payload, "owner-1", app)
+
+    assert isinstance(result, threads_router.JSONResponse)
+    assert result.status_code == 400
+    assert json.loads(result.body.decode()) == {
+        "error": "sandbox_provider_unavailable",
+        "provider": "daytona",
+    }
+    assert app.state.thread_repo.rows == {}
+
+
+@pytest.mark.asyncio
+async def test_create_thread_route_rejects_unavailable_provider_for_existing_lease():
+    app = _make_threads_app(thread_sandbox={}, thread_cwd={})
+    payload = CreateThreadRequest.model_validate(
+        {
+            "member_id": "member-1",
+            "lease_id": "lease-1",
+        }
+    )
+
+    with (
+        patch.object(
+            threads_router.sandbox_service,
+            "list_user_leases",
+            return_value=[{"lease_id": "lease-1", "provider_name": "daytona", "recipe": None}],
+        ),
+        patch.object(threads_router.sandbox_service, "build_provider_from_config_name", return_value=None),
+    ):
+        result = await threads_router.create_thread(payload, "owner-1", app)
+
+    assert isinstance(result, threads_router.JSONResponse)
+    assert result.status_code == 400
+    assert json.loads(result.body.decode()) == {
+        "error": "sandbox_provider_unavailable",
+        "provider": "daytona",
+    }
+    assert app.state.thread_repo.rows == {}
+
+
+@pytest.mark.asyncio
+async def test_stream_thread_events_requires_token():
+    app = _make_threads_app(
+        auth_service=_FakeAuthService(),
+        thread_repo=SimpleNamespace(get_by_id=lambda _thread_id: None),
+        thread_event_buffers={},
+    )
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.stream_thread_events(
+            "thread-1",
+            request=_FakeRequest(),
+            token=None,
+            app=app,
+        )
+
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail == "Missing token"
+
+
+@pytest.mark.asyncio
+async def test_stream_thread_events_verifies_token_before_owner_check():
+    auth_service = _FakeAuthService()
+    thread_repo = SimpleNamespace(get_by_id=lambda _thread_id: {"member_id": "member-1"})
+    app = _make_threads_app(
+        auth_service=auth_service,
+        thread_repo=thread_repo,
+        thread_event_buffers={},
+    )
+
+    response = await threads_router.stream_thread_events(
+        "thread-1",
+        request=_FakeRequest(),
+        token="tok-thread",
+        app=app,
+    )
+
+    assert auth_service.tokens == ["tok-thread"]
+    assert response is not None
+
+
+@pytest.mark.asyncio
+async def test_get_thread_permissions_returns_thread_scoped_pending_requests():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.get_thread_permissions(
+        "thread-1",
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "thread_id": "thread-1",
+        "requests": [
+            {
+                "request_id": "perm-1",
+                "thread_id": "thread-1",
+                "tool_name": "Write",
+                "args": {"path": "/tmp/demo.txt"},
+                "message": "needs approval",
+            }
+        ],
+        "session_rules": {
+            "allow": ["Read"],
+            "deny": ["Bash"],
+            "ask": ["Edit"],
+        },
+        "managed_only": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_get_thread_permissions_does_not_clear_live_pending_requests_during_active_run():
+    agent = _LivePendingPermissionAgent()
+
+    result = await threads_router.get_thread_permissions(
+        "thread-1",
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "thread_id": "thread-1",
+        "requests": [
+            {
+                "request_id": "perm-live",
+                "thread_id": "thread-1",
+                "tool_name": "Bash",
+                "args": {"command": "echo hi"},
+                "message": "Permission required by rule: Bash",
+            }
+        ],
+        "session_rules": {
+            "allow": [],
+            "deny": [],
+            "ask": ["Bash"],
+        },
+        "managed_only": False,
+    }
+    assert agent.agent._app_state.pending_permission_requests == {
+        "perm-live": {
+            "request_id": "perm-live",
+            "thread_id": "thread-1",
+            "tool_name": "Bash",
+            "args": {"command": "echo hi"},
+            "message": "Permission required by rule: Bash",
+        }
+    }
+
+
+@pytest.mark.asyncio
+async def test_get_thread_history_does_not_clear_live_pending_requests_during_active_run():
+    agent = _LivePendingPermissionAgent()
+    agent.agent._app_state.messages = [
+        HumanMessage(content="please run bash"),
+        ToolMessage(content="Permission required by rule: Bash", tool_call_id="call-1", name="Bash"),
+    ]
+
+    with (
+        patch.object(threads_router, "resolve_thread_sandbox", return_value="local"),
+        patch.object(
+            threads_router,
+            "get_or_create_agent",
+            AsyncMock(return_value=agent),
+        ),
+    ):
+        result = await threads_router.get_thread_history(
+            "thread-1",
+            limit=20,
+            truncate=0,
+            user_id="owner-1",
+            app=SimpleNamespace(state=SimpleNamespace()),
+        )
+
+    assert result["messages"] == [
+        {"role": "human", "text": "please run bash"},
+        {"role": "tool_result", "tool": "Bash", "text": "Permission required by rule: Bash"},
+    ]
+    assert agent.agent._app_state.pending_permission_requests == {
+        "perm-live": {
+            "request_id": "perm-live",
+            "thread_id": "thread-1",
+            "tool_name": "Bash",
+            "args": {"command": "echo hi"},
+            "message": "Permission required by rule: Bash",
+        }
+    }
+
+
+@pytest.mark.asyncio
+async def test_resolve_thread_permission_request_persists_resolution():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.resolve_thread_permission_request(
+        "thread-1",
+        "perm-1",
+        SimpleNamespace(decision="allow", message="go ahead"),
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {"ok": True, "thread_id": "thread-1", "request_id": "perm-1"}
+    assert agent.resolve_calls == [("perm-1", "allow", "go ahead", None, None)]
+    agent.agent.apersist_state.assert_awaited_once_with("thread-1")
+
+
+@pytest.mark.asyncio
+async def test_resolve_ask_user_question_request_starts_followup_run_with_answers():
+    agent = _FakeAskUserQuestionAgent()
+    app = SimpleNamespace()
+    payload = SimpleNamespace(
+        decision="allow",
+        message=None,
+        answers=[
+            {
+                "header": "Style",
+                "question": "Choose a style",
+                "selected_options": ["Minimal"],
+            }
+        ],
+        annotations={"source": "ask-user-ui"},
+    )
+
+    with patch(
+        "backend.web.services.message_routing.route_message_to_brain",
+        AsyncMock(return_value={"status": "started", "routing": "direct", "thread_id": "thread-1"}),
+    ) as route_message:
+        result = await threads_router.resolve_thread_permission_request(
+            "thread-1",
+            "perm-ask",
+            payload,
+            user_id="owner-1",
+            agent=agent,
+            app=app,
+        )
+
+    assert result == {
+        "ok": True,
+        "thread_id": "thread-1",
+        "request_id": "perm-ask",
+        "followup": {"status": "started", "routing": "direct", "thread_id": "thread-1"},
+    }
+    assert agent.resolve_calls == [
+        (
+            "perm-ask",
+            "allow",
+            None,
+            [
+                {
+                    "header": "Style",
+                    "question": "Choose a style",
+                    "selected_options": ["Minimal"],
+                }
+            ],
+            {"source": "ask-user-ui"},
+        )
+    ]
+    route_message.assert_awaited_once()
+    followup_message = route_message.await_args.args[2]
+    assert "AskUserQuestion" in followup_message
+    assert "Minimal" in followup_message
+    assert "Choose a style" in followup_message
+    assert agent.pending == []
+    assert agent.agent.apersist_state.await_count == 2
+    assert [call.args for call in agent.agent.apersist_state.await_args_list] == [("thread-1",), ("thread-1",)]
+
+
+@pytest.mark.asyncio
+async def test_resolve_ask_user_question_request_requires_answers_for_allow():
+    agent = _FakeAskUserQuestionAgent()
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.resolve_thread_permission_request(
+            "thread-1",
+            "perm-ask",
+            SimpleNamespace(decision="allow", message=None, answers=None, annotations=None),
+            user_id="owner-1",
+            agent=agent,
+            app=SimpleNamespace(),
+        )
+
+    assert exc_info.value.status_code == 400
+    assert exc_info.value.detail == "AskUserQuestion answers are required when approving the request"
+    agent.agent.apersist_state.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_resolve_thread_permission_request_404s_missing_request():
+    agent = _FakePermissionAgent()
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.resolve_thread_permission_request(
+            "thread-1",
+            "missing",
+            SimpleNamespace(decision="deny", message="no"),
+            user_id="owner-1",
+            agent=agent,
+        )
+
+    assert exc_info.value.status_code == 404
+    assert exc_info.value.detail == "Permission request not found"
+    agent.agent.apersist_state.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_add_thread_permission_rule_persists_session_rule():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.add_thread_permission_rule(
+        "thread-1",
+        SimpleNamespace(behavior="allow", tool_name="Write"),
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "ok": True,
+        "thread_id": "thread-1",
+        "scope": "session",
+        "rules": {
+            "allow": ["Read", "Write"],
+            "deny": ["Bash"],
+            "ask": ["Edit"],
+        },
+        "managed_only": False,
+    }
+    assert agent.rule_add_calls == [("allow", "Write")]
+    agent.agent.apersist_state.assert_awaited_once_with("thread-1")
+
+
+@pytest.mark.asyncio
+async def test_add_thread_permission_rule_fails_loud_when_managed_only():
+    agent = _FakePermissionAgent()
+    agent.managed_only = True
+
+    with pytest.raises(threads_router.HTTPException) as exc_info:
+        await threads_router.add_thread_permission_rule(
+            "thread-1",
+            SimpleNamespace(behavior="allow", tool_name="Write"),
+            user_id="owner-1",
+            agent=agent,
+        )
+
+    assert exc_info.value.status_code == 409
+    assert exc_info.value.detail == "Managed permission rules only; session overrides are disabled"
+    agent.agent.apersist_state.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_remove_thread_permission_rule_persists_session_rule_change():
+    agent = _FakePermissionAgent()
+
+    result = await threads_router.delete_thread_permission_rule(
+        "thread-1",
+        "deny",
+        "Bash",
+        user_id="owner-1",
+        agent=agent,
+    )
+
+    assert result == {
+        "ok": True,
+        "thread_id": "thread-1",
+        "scope": "session",
+        "rules": {
+            "allow": ["Read"],
+            "deny": [],
+            "ask": ["Edit"],
+        },
+        "managed_only": False,
+    }
+    assert agent.rule_remove_calls == [("deny", "Bash")]
+    agent.agent.apersist_state.assert_awaited_once_with("thread-1")
+
+
+@pytest.mark.asyncio
+async def test_clear_thread_route_clears_agent_state_and_thread_buffers():
+    agent = _FakeClearAgent()
+    app, display_builder, queue_manager = _make_clear_thread_app()
+
+    with _patch_local_clear_thread_agent(agent):
+        result = await threads_router.clear_thread_history(
+            "thread-1",
+            user_id="owner-1",
+            app=app,
+        )
+
+    assert result == {"ok": True, "thread_id": "thread-1"}
+    agent.aclear_thread.assert_awaited_once_with("thread-1")
+    display_builder.clear.assert_called_once_with("thread-1")
+    queue_manager.clear_all.assert_called_once_with("thread-1")
+    assert app.state.thread_event_buffers == {}
+
+
+@pytest.mark.asyncio
+async def test_clear_thread_route_rejects_active_run():
+    agent = _FakeClearAgent(state=AgentState.ACTIVE)
+    app, display_builder, queue_manager = _make_clear_thread_app()
+
+    with _patch_local_clear_thread_agent(agent):
+        with pytest.raises(threads_router.HTTPException) as exc_info:
+            await threads_router.clear_thread_history(
+                "thread-1",
+                user_id="owner-1",
+                app=app,
+            )
+
+    assert exc_info.value.status_code == 409
+    assert exc_info.value.detail == "Cannot clear thread while run is in progress"
+    agent.aclear_thread.assert_not_awaited()
+    display_builder.clear.assert_not_called()
+    queue_manager.clear_all.assert_not_called()
+    assert "thread-1" in app.state.thread_event_buffers
diff --git a/tests/Unit/backend/test_message_routing.py b/tests/Unit/backend/test_message_routing.py
new file mode 100644
index 000000000..9c5cf47d4
--- /dev/null
+++ b/tests/Unit/backend/test_message_routing.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from backend.web.services.message_routing import route_message_to_brain
+from core.runtime.middleware.monitor import AgentState
+
+
+class _FakeQueueManager:
+    def enqueue(self, *args, **kwargs) -> None:
+        raise AssertionError("enqueue should not be used for idle -> active routing")
+
+
+class _FakeRuntime:
+    def __init__(self) -> None:
+        self.current_state = AgentState.IDLE
+
+    def transition(self, next_state: AgentState) -> bool:
+        self.current_state = next_state
+        return True
+
+
+class _FakeAgent:
+    def __init__(self) -> None:
+        self.runtime = _FakeRuntime()
+
+
+@pytest.mark.asyncio
+async def test_route_message_to_brain_clears_resource_overview_cache_when_starting_run() -> None:
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            queue_manager=_FakeQueueManager(),
+            thread_locks={},
+            thread_locks_guard=asyncio.Lock(),
+        )
+    )
+    agent = _FakeAgent()
+
+    with (
+        patch("backend.web.services.agent_pool.resolve_thread_sandbox", return_value="local"),
+        patch("backend.web.services.agent_pool.get_or_create_agent", AsyncMock(return_value=agent)),
+        patch("backend.web.services.streaming_service.start_agent_run", return_value="run-123"),
+        patch("backend.web.services.resource_cache.clear_resource_overview_cache") as clear_cache,
+    ):
+        result = await route_message_to_brain(app, "thread-1", "hello")
+
+    assert result == {"status": "started", "routing": "direct", "run_id": "run-123", "thread_id": "thread-1"}
+    clear_cache.assert_called_once_with()
diff --git a/tests/Unit/core/test_agent_pool.py b/tests/Unit/core/test_agent_pool.py
new file mode 100644
index 000000000..1f537dfc2
--- /dev/null
+++ b/tests/Unit/core/test_agent_pool.py
@@ -0,0 +1,206 @@
+import asyncio
+import time
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, cast
+
+import pytest
+
+from backend.web.services import agent_pool
+
+
+class _FakeThreadRepo:
+    def get_by_id(self, thread_id: str):
+        return {"id": thread_id, "cwd": "/tmp", "model": "leon:large"}
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_creates_once_per_thread(monkeypatch: pytest.MonkeyPatch):
+    created: list[object] = []
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        entity_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        time.sleep(0.05)
+        obj = SimpleNamespace()
+        created.append(obj)
+        return obj
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-1")
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_FakeThreadRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    first, second = await asyncio.gather(
+        agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-1"),
+        agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-1"),
+    )
+
+    assert len(created) == 1
+    assert first is second
+    assert app.state.agent_pool["thread-1:local"] is first
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_ignores_unavailable_local_cwd(monkeypatch: pytest.MonkeyPatch):
+    captured: dict[str, object] = {}
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        entity_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        captured["workspace_root"] = workspace_root
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "cwd": "/Users/lexicalmathical/Codebase/homeworks/aiagent",
+                "model": "leon:large",
+            }
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-2")
+    monkeypatch.setattr(Path, "exists", lambda self: False)
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-2")
+
+    assert captured["workspace_root"] is None
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_honors_fresh_local_thread_cwd_even_when_missing(monkeypatch: pytest.MonkeyPatch, tmp_path):
+    captured: dict[str, object] = {}
+    requested = tmp_path / "fresh-workspace"
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        entity_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        captured["workspace_root"] = workspace_root
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "cwd": None,
+                "model": "leon:large",
+            }
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-3")
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            thread_cwd={"thread-3": str(requested)},
+            thread_sandbox={},
+        )
+    )
+
+    await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-3")
+
+    assert captured["workspace_root"] == requested.resolve()
+    assert requested.is_dir()
+    assert app.state.thread_cwd["thread-3"] == str(requested.resolve())
+
+
+@pytest.mark.asyncio
+async def test_get_or_create_agent_passes_member_bundle_dir(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+    captured: dict[str, object] = {}
+    member_dir = tmp_path / "members" / "member-1"
+    member_dir.mkdir(parents=True)
+
+    def _fake_create_agent_sync(
+        sandbox_name: str,
+        workspace_root=None,
+        model_name: str | None = None,
+        agent: str | None = None,
+        bundle_dir=None,
+        thread_repo=None,
+        entity_repo=None,
+        member_repo=None,
+        queue_manager=None,
+        chat_repos=None,
+        extra_allowed_paths=None,
+        web_app=None,
+    ) -> object:
+        captured["bundle_dir"] = bundle_dir
+        return SimpleNamespace()
+
+    class _ThreadRepo:
+        def get_by_id(self, thread_id: str):
+            return {
+                "id": thread_id,
+                "cwd": None,
+                "model": "leon:large",
+                "member_id": "member-1",
+                "member_name": "Toad",
+            }
+
+    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
+    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-4")
+    monkeypatch.setattr(agent_pool, "preferred_existing_user_home_path", lambda *parts: member_dir)
+
+    app = SimpleNamespace(
+        state=SimpleNamespace(
+            agent_pool={},
+            thread_repo=_ThreadRepo(),
+            thread_cwd={},
+            thread_sandbox={},
+        )
+    )
+
+    await agent_pool.get_or_create_agent(cast(Any, app), "local", thread_id="thread-4")
+
+    assert captured["bundle_dir"] == member_dir.resolve()
diff --git a/tests/Unit/core/test_agent_service.py b/tests/Unit/core/test_agent_service.py
new file mode 100644
index 000000000..a5a8e530c
--- /dev/null
+++ b/tests/Unit/core/test_agent_service.py
@@ -0,0 +1,1525 @@
+"""Unit tests for AgentService sub-agent boundaries and policy."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+
+from core.agents.service import (
+    AGENT_DISALLOWED,
+    AGENT_SCHEMA,
+    ASK_USER_QUESTION_SCHEMA,
+    EXPLORE_ALLOWED,
+    TASK_OUTPUT_SCHEMA,
+    AgentService,
+    _BashBackgroundRun,
+    _RunningTask,
+)
+from core.runtime.registry import ToolRegistry
+from core.runtime.runner import ToolRunner
+from core.runtime.state import AppState, BootstrapConfig, ToolUseContext
+from sandbox.manager import SandboxManager
+from sandbox.providers.local import LocalSessionProvider
+from sandbox.thread_context import get_current_thread_id, set_current_messages, set_current_thread_id
+from storage.contracts import EntityRow
+
+
+class _FakeRegistry:
+    def register(self, entry):
+        self.last_entry = entry
+
+
+class _FakeAgentRegistry:
+    def __init__(self) -> None:
+        self._latest_by_name_parent: dict[tuple[str, str | None], object] = {}
+
+    async def register(self, entry):
+        self.entry = entry
+
+    async def update_status(self, agent_id: str, status: str):
+        self.last_status = (agent_id, status)
+
+    async def get_latest_by_name_and_parent(self, name: str, parent_agent_id: str | None):
+        return self._latest_by_name_parent.get((name, parent_agent_id))
+
+
+class _FakeThreadRepo:
+    def __init__(self, rows: dict[str, dict] | None = None):
+        self.rows = rows or {}
+        self.created: list[dict] = []
+
+    def get_by_id(self, thread_id: str):
+        return self.rows.get(thread_id)
+
+    def get_next_branch_index(self, member_id: str) -> int:
+        branch_indexes = [int(row["branch_index"]) for row in self.rows.values() if row["member_id"] == member_id]
+        return (max(branch_indexes) if branch_indexes else 0) + 1
+
+    def create(self, thread_id: str, member_id: str, sandbox_type: str, cwd: str | None, created_at: float, **extra):
+        row = {
+            "id": thread_id,
+            "member_id": member_id,
+            "sandbox_type": sandbox_type,
+            "cwd": cwd,
+            "model": extra.get("model"),
+            "is_main": bool(extra.get("is_main", False)),
+            "branch_index": int(extra["branch_index"]),
+            "created_at": created_at,
+        }
+        self.rows[thread_id] = row
+        self.created.append(row)
+
+
+class _FakeEntityRepo:
+    def __init__(self):
+        self.rows_by_thread: dict[str, EntityRow] = {}
+
+    def create(self, row: EntityRow):
+        self.rows_by_thread[row.thread_id] = row
+
+    def get_by_thread_id(self, thread_id: str):
+        return self.rows_by_thread.get(thread_id)
+
+
+class _FakeMemberRepo:
+    def __init__(self, names: dict[str, str]):
+        self._names = names
+
+    def get_by_id(self, member_id: str):
+        name = self._names.get(member_id)
+        if name is None:
+            return None
+        return SimpleNamespace(id=member_id, name=name, avatar=None)
+
+
+class _FakeChildAgent:
+    def __init__(self, workspace_root: Path, model_name: str):
+        self.workspace_root = workspace_root
+        self.model_name = model_name
+        self._bootstrap = BootstrapConfig(workspace_root=workspace_root, model_name=model_name)
+        self.apply_fork_calls: list[tuple[BootstrapConfig, ToolUseContext | None]] = []
+        self.cleanup_calls = 0
+        self.closed = False
+        self.close_kwargs: dict[str, object] = {}
+        self._agent_service = SimpleNamespace(
+            _parent_bootstrap=None,
+            _parent_tool_context=None,
+            cleanup_background_runs=self._cleanup_background_runs,
+        )
+        self.agent = SimpleNamespace(astream=self._astream)
+
+    async def ainit(self):
+        return None
+
+    async def _astream(self, *args, **kwargs):
+        if False:
+            yield None
+        return
+
+    async def _cleanup_background_runs(self):
+        self.cleanup_calls += 1
+
+    def close(self, **kwargs):
+        self.closed = True
+        self.close_kwargs = kwargs
+        return None
+
+    def apply_forked_child_context(
+        self,
+        bootstrap: BootstrapConfig,
+        *,
+        tool_context: ToolUseContext | None = None,
+    ) -> None:
+        self.apply_fork_calls.append((bootstrap, tool_context))
+        self._bootstrap = bootstrap
+        self.agent._bootstrap = bootstrap
+        self._agent_service._parent_bootstrap = bootstrap
+        if tool_context is not None:
+            self._agent_service._parent_tool_context = tool_context
+            self.agent._tool_abort_controller = tool_context.abort_controller
+
+
+class _FakeAsyncCommand:
+    def __init__(self):
+        self.done = False
+        self.stdout_buffer = []
+        self.stderr_buffer = []
+        self.exit_code = None
+        self.process = SimpleNamespace(terminate=self._terminate, kill=self._kill, wait=self._wait)
+        self.terminated = False
+        self.killed = False
+        self.wait_calls = 0
+
+    def _terminate(self):
+        self.terminated = True
+        self.done = True
+
+    def _kill(self):
+        self.killed = True
+        self.done = True
+
+    async def _wait(self):
+        self.wait_calls += 1
+        return 0
+
+
+def _make_parent_context(tmp_path: Path, model_name: str = "gpt-parent") -> ToolUseContext:
+    parent_state = AppState(turn_count=1)
+    return ToolUseContext(
+        bootstrap=BootstrapConfig(workspace_root=tmp_path, model_name=model_name),
+        get_app_state=parent_state.get_state,
+        set_app_state=parent_state.set_state,
+        set_app_state_for_tasks=parent_state.set_state,
+        read_file_state={"/tmp/readme.md": {"partial": False}},
+        loaded_nested_memory_paths={"/tmp/memory.md"},
+        discovered_skill_names={"skill-a"},
+        nested_memory_attachment_triggers={"turn-a"},
+        messages=["hello"],
+    )
+
+
+def _make_service(tmp_path: Path, **kwargs) -> AgentService:
+    tool_registry = kwargs.pop("tool_registry", None) or _FakeRegistry()
+    agent_registry = kwargs.pop("agent_registry", None) or _FakeAgentRegistry()
+    model_name = kwargs.pop("model_name", "gpt-test")
+    return AgentService(
+        tool_registry=tool_registry,
+        agent_registry=agent_registry,
+        workspace_root=tmp_path,
+        model_name=model_name,
+        **kwargs,
+    )
+
+
+def _agent_tool_json(result) -> dict:
+    content = getattr(result, "content", result)
+    return json.loads(content)
+
+
+async def _sleep_forever():
+    while True:
+        await asyncio.sleep(3600)
+
+
+@pytest.mark.asyncio
+async def test_task_output_reports_running_command_honestly(tmp_path):
+    service = _make_service(tmp_path)
+    async_cmd = _FakeAsyncCommand()
+    service._tasks["cmd_test123"] = _BashBackgroundRun(async_cmd, "echo hello")
+
+    payload = json.loads(await service._handle_task_output("cmd_test123", block=False))
+
+    assert payload == {
+        "task_id": "cmd_test123",
+        "status": "running",
+        "message": "Command is still running.",
+    }
+
+
+@pytest.mark.asyncio
+async def test_task_output_keeps_agent_running_message_for_agent_tasks(tmp_path):
+    service = _make_service(tmp_path)
+    task = asyncio.create_task(_sleep_forever())
+    service._tasks["task_agent123"] = _RunningTask(
+        task=task,
+        agent_id="agent-1",
+        thread_id="thread-1",
+    )
+
+    try:
+        payload = json.loads(await service._handle_task_output("task_agent123", block=False))
+    finally:
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    assert payload == {
+        "task_id": "task_agent123",
+        "status": "running",
+        "message": "Agent is still running.",
+    }
+
+
+@pytest.mark.asyncio
+async def test_task_output_times_out_when_blocking_wait_expires(tmp_path):
+    service = _make_service(tmp_path)
+    task = asyncio.create_task(_sleep_forever())
+    service._tasks["task_agent123"] = _RunningTask(
+        task=task,
+        agent_id="agent-1",
+        thread_id="thread-1",
+    )
+
+    try:
+        payload = json.loads(await service._handle_task_output("task_agent123", timeout=1))
+    finally:
+        task.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await task
+
+    assert payload == {
+        "task_id": "task_agent123",
+        "status": "timeout",
+        "message": "Agent is still running.",
+    }
+
+
+@pytest.mark.asyncio
+async def test_run_agent_applies_forked_bootstrap_to_child_agent(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        original_cwd=Path("/launcher"),
+        project_root=Path("/workspace/project"),
+        cwd=Path("/workspace/project/src"),
+        model_name="gpt-parent",
+        api_key="sk-parent",
+        extra_allowed_paths=["/shared"],
+        total_cost_usd=1.5,
+        total_tool_duration_ms=77,
+        model_provider="openai",
+        base_url="https://api.example.com/v1",
+        context_limit=12345,
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    child = created[0]
+    assert child._bootstrap.original_cwd == Path("/launcher")
+    assert child._bootstrap.project_root == Path("/workspace/project")
+    assert child._bootstrap.cwd == Path("/workspace/project/src")
+    assert child._bootstrap.extra_allowed_paths == ["/shared"]
+    assert child._bootstrap.parent_session_id == service._parent_bootstrap.session_id
+    assert child._bootstrap.session_id != service._parent_bootstrap.session_id
+    assert child._bootstrap.total_cost_usd == 1.5
+    assert child._bootstrap.total_tool_duration_ms == 77
+    assert child._bootstrap.model_provider == "openai"
+    assert child._bootstrap.base_url == "https://api.example.com/v1"
+    assert child._bootstrap.context_limit == 12345
+
+
+@pytest.mark.asyncio
+async def test_run_agent_applies_isolated_tool_context_to_child_agent_service(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    child_context = created[0]._agent_service._parent_tool_context
+    assert child_context is not None
+    assert child_context is not parent_context
+    assert child_context.bootstrap.parent_session_id == parent_context.bootstrap.session_id
+    child_context.set_app_state(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert parent_context.get_app_state().turn_count == 1
+    child_context.set_app_state_for_tasks(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert parent_context.get_app_state().turn_count == 9
+
+
+@pytest.mark.asyncio
+async def test_run_agent_uses_explicit_child_fork_wiring_api(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert len(created[0].apply_fork_calls) == 1
+    applied_bootstrap, applied_context = created[0].apply_fork_calls[0]
+    assert applied_bootstrap is created[0]._bootstrap
+    assert applied_context is created[0]._agent_service._parent_tool_context
+
+
+@pytest.mark.asyncio
+async def test_run_agent_uses_injected_child_agent_factory(tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_child_agent_factory(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    service = _make_service(tmp_path, child_agent_factory=fake_child_agent_factory)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert len(created) == 1
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_fork_context_uses_parent_tool_context_messages(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    class _CapturingChild(_FakeChildAgent):
+        async def _astream(self, payload, *args, **kwargs):
+            captured["messages"] = payload["messages"]
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CapturingChild(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {"prompt": "inspect", "description": "inspect workspace", "fork_context": True},
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    assert captured["messages"] == [
+        "hello",
+        {
+            "role": "user",
+            "content": (
+                "\n\n### ENTERING SUB-AGENT ROUTINE ###\n"
+                "Messages above are from the parent thread (read-only context).\n"
+                "Only complete the specific task assigned below.\n\n"
+                "inspect"
+            ),
+        },
+    ]
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_fork_context_treats_empty_parent_messages_as_authoritative(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    class _CapturingChild(_FakeChildAgent):
+        async def _astream(self, payload, *args, **kwargs):
+            captured["messages"] = payload["messages"]
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CapturingChild(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    set_current_messages([{"role": "user", "content": "AMBIENT_LEAK"}])
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    parent_context = _make_parent_context(tmp_path)
+    parent_context.messages = []
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {"prompt": "inspect", "description": "inspect workspace", "fork_context": True},
+            "id": "tc-1",
+        },
+        state=parent_context,
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    assert captured["messages"] == [
+        {
+            "role": "user",
+            "content": (
+                "\n\n### ENTERING SUB-AGENT ROUTINE ###\n"
+                "Messages above are from the parent thread (read-only context).\n"
+                "Only complete the specific task assigned below.\n\n"
+                "inspect"
+            ),
+        },
+    ]
+
+
+@pytest.mark.asyncio
+async def test_run_agent_rolls_child_bootstrap_costs_back_into_parent_bootstrap(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    class _CostReportingChild(_FakeChildAgent):
+        async def _astream(self, *args, **kwargs):
+            self._bootstrap.total_cost_usd = 9.75
+            self._bootstrap.total_tool_duration_ms = 222
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _CostReportingChild(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        model_name="gpt-parent",
+        total_cost_usd=1.5,
+        total_tool_duration_ms=77,
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0]._bootstrap.total_cost_usd == 9.75
+    assert created[0]._bootstrap.total_tool_duration_ms == 222
+    assert service._parent_bootstrap.total_cost_usd == 9.75
+    assert service._parent_bootstrap.total_tool_duration_ms == 222
+
+
+@pytest.mark.asyncio
+async def test_run_agent_preserves_concurrent_parent_and_child_bootstrap_growth(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    class _ConcurrentCostChild(_FakeChildAgent):
+        async def _astream(self, *args, **kwargs):
+            service._parent_bootstrap.total_cost_usd = 2.0
+            service._parent_bootstrap.total_tool_duration_ms = 20
+            self._bootstrap.total_cost_usd = 1.5
+            self._bootstrap.total_tool_duration_ms = 15
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _ConcurrentCostChild(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        model_name="gpt-parent",
+        total_cost_usd=1.0,
+        total_tool_duration_ms=10,
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0]._bootstrap.total_cost_usd == 1.5
+    assert created[0]._bootstrap.total_tool_duration_ms == 15
+    assert service._parent_bootstrap.total_cost_usd == 2.5
+    assert service._parent_bootstrap.total_tool_duration_ms == 25
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_live_runner_path_passes_isolated_tool_context_to_child(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    parent_context = _make_parent_context(tmp_path)
+    request = SimpleNamespace(
+        tool_call={"name": "Agent", "args": {"prompt": "do work", "description": "do work"}, "id": "tc-1"},
+        state=parent_context,
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    child_context = created[0]._agent_service._parent_tool_context
+    assert child_context is not None
+    assert child_context.bootstrap.parent_session_id == parent_context.bootstrap.session_id
+    child_context.set_app_state(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert parent_context.get_app_state().turn_count == 1
+
+
+@pytest.mark.asyncio
+async def test_run_agent_without_fork_context_does_not_inject_parent_messages(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    class _CapturingChild(_FakeChildAgent):
+        async def _astream(self, payload, *args, **kwargs):
+            captured["messages"] = payload["messages"]
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _CapturingChild(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+    parent_context.messages = [
+        {
+            "role": "user",
+            "content": "PARENT_CONTROL_PROMPT",
+        }
+    ]
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="child task only",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert captured["messages"] == [{"role": "user", "content": "child task only"}]
+
+
+@pytest.mark.asyncio
+async def test_run_agent_child_tool_context_deep_clones_read_file_state(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+    parent_context.read_file_state = {"/tmp/readme.md": {"partial": False, "meta": {"seen": 1}}}
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    child_context = created[0]._agent_service._parent_tool_context
+    child_context.read_file_state["/tmp/readme.md"]["partial"] = True
+    child_context.read_file_state["/tmp/readme.md"]["meta"]["seen"] = 9
+    assert parent_context.read_file_state["/tmp/readme.md"] == {
+        "partial": False,
+        "meta": {"seen": 1},
+    }
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_live_runner_path_applies_role_specific_tool_filters(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["workspace_root"] = Path(workspace_root)
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="gpt-parent")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="gpt-parent"),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    assert result.content == "(Agent completed with no text output)"
+    assert captured["model_name"] == "gpt-parent"
+    assert captured["kwargs"]["agent"] == "explore"
+    assert captured["kwargs"]["allowed_tools"] == EXPLORE_ALLOWED
+    assert captured["kwargs"]["extra_blocked_tools"] == AGENT_DISALLOWED
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_prefers_env_over_tool_frontmatter_and_parent(monkeypatch, tmp_path):
+    agent_dir = tmp_path / ".leon" / "agents"
+    agent_dir.mkdir(parents=True)
+    (agent_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: frontmatter-model\ntools:\n  - Read\n---\nfrontmatter prompt\n",
+        encoding="utf-8",
+    )
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    monkeypatch.setenv("CLAUDE_CODE_SUBAGENT_MODEL", "env-model")
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {
+                "prompt": "inspect",
+                "description": "inspect workspace",
+                "subagent_type": "explore",
+                "model": "tool-model",
+            },
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "env-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_prefers_tool_over_frontmatter_and_parent(monkeypatch, tmp_path):
+    agent_dir = tmp_path / ".leon" / "agents"
+    agent_dir.mkdir(parents=True)
+    (agent_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: frontmatter-model\ntools:\n  - Read\n---\nfrontmatter prompt\n",
+        encoding="utf-8",
+    )
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {
+                "prompt": "inspect",
+                "description": "inspect workspace",
+                "subagent_type": "explore",
+                "model": "tool-model",
+            },
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "tool-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_default_literal_inherits_parent_model(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {
+                "prompt": "inspect",
+                "description": "inspect workspace",
+                "subagent_type": "explore",
+                "model": "default",
+            },
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_inherit_literal_inherits_parent_model(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {
+                "prompt": "inspect",
+                "description": "inspect workspace",
+                "subagent_type": "explore",
+                "model": "inherit",
+            },
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_inherited_default_bootstrap_model_uses_parent_service_model(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-service-model")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="default"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-service-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_prefers_frontmatter_over_parent(monkeypatch, tmp_path):
+    agent_dir = tmp_path / ".leon" / "agents"
+    agent_dir.mkdir(parents=True)
+    (agent_dir / "explore.md").write_text(
+        "---\nname: explore\nmodel: frontmatter-model\ntools:\n  - Read\n---\nfrontmatter prompt\n",
+        encoding="utf-8",
+    )
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="parent-model")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "frontmatter-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_model_priority_inherits_parent_when_no_env_tool_or_frontmatter(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["kwargs"] = kwargs
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry, model_name="service-model")
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {"prompt": "inspect", "description": "inspect workspace", "subagent_type": "explore"},
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path, model_name="parent-model"),
+    )
+
+    await runner.awrap_tool_call(request, AsyncMock())
+
+    assert captured["model_name"] == "parent-model"
+    assert captured["kwargs"]["agent"] == "explore"
+
+
+@pytest.mark.asyncio
+async def test_cleanup_background_runs_cancels_pending_agent_and_shell_runs(tmp_path):
+    service = _make_service(tmp_path)
+    agent_task = asyncio.create_task(_sleep_forever())
+    shell_cmd = _FakeAsyncCommand()
+    service._tasks["agent-task"] = _RunningTask(
+        task=agent_task,
+        agent_id="agent-task",
+        thread_id="subagent-agent-task",
+        description="agent task",
+    )
+    service._tasks["bash-task"] = _BashBackgroundRun(
+        async_cmd=shell_cmd,
+        command="sleep 999",
+        description="bash task",
+    )
+
+    await service.cleanup_background_runs()
+
+    assert agent_task.cancelled() is True
+    assert shell_cmd.terminated is True
+    assert shell_cmd.wait_calls == 1
+    assert service._tasks == {}
+
+
+@pytest.mark.asyncio
+async def test_cleanup_background_runs_does_not_relabel_completed_agent_run(tmp_path):
+    registry = _FakeAgentRegistry()
+    service = _make_service(tmp_path, agent_registry=registry)
+    completed_task = asyncio.create_task(asyncio.sleep(0, result="done"))
+    await completed_task
+    service._tasks["agent-task"] = _RunningTask(
+        task=completed_task,
+        agent_id="agent-task",
+        thread_id="subagent-agent-task",
+        description="agent task",
+    )
+
+    await service.cleanup_background_runs()
+
+    assert getattr(registry, "last_status", None) is None
+    assert service._tasks == {}
+
+
+@pytest.mark.asyncio
+async def test_run_agent_cleans_up_child_background_runs_before_close(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-task-1",
+        prompt="hello",
+        subagent_type="explore",
+        max_turns=None,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0].cleanup_calls == 1
+    assert created[0].closed is True
+
+
+@pytest.mark.asyncio
+async def test_run_agent_links_child_abort_controller_to_parent_tool_context(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    parent_context = _make_parent_context(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-task-1",
+        prompt="hello",
+        subagent_type="explore",
+        max_turns=None,
+        parent_tool_context=parent_context,
+    )
+
+    assert result == "(Agent completed with no text output)"
+
+    child_context = created[0]._agent_service._parent_tool_context
+    assert child_context is not None
+    assert getattr(created[0].agent, "_tool_abort_controller", None) is child_context.abort_controller
+
+    parent_context.abort_controller.abort()
+
+    assert child_context.abort_controller.is_aborted() is True
+
+
+@pytest.mark.asyncio
+async def test_run_agent_reuses_parent_lease_for_child_thread_terminal(monkeypatch, tmp_path, temp_db):
+    created: list[_FakeChildAgent] = []
+    observed: dict[str, str] = {}
+    parent_thread_id = "parent-thread"
+    child_thread_id = "subagent-child"
+
+    manager = SandboxManager(
+        provider=LocalSessionProvider(default_cwd=str(tmp_path)),
+        db_path=temp_db,
+    )
+    monkeypatch.setenv("LEON_SANDBOX_DB_PATH", str(temp_db))
+    monkeypatch.setattr(manager, "_setup_mounts", lambda thread_id: {"source": object(), "remote_path": str(tmp_path)})
+    monkeypatch.setattr(manager, "_sync_to_sandbox", lambda *args, **kwargs: None)
+
+    parent_capability = manager.get_sandbox(parent_thread_id)
+    parent_terminal_id = parent_capability._session.terminal.terminal_id
+    parent_lease_id = parent_capability._session.lease.lease_id
+
+    class _LeaseCapturingChild(_FakeChildAgent):
+        async def _astream(self, *args, **kwargs):
+            child_capability = manager.get_sandbox(get_current_thread_id())
+            observed["child_terminal_id"] = child_capability._session.terminal.terminal_id
+            observed["child_lease_id"] = child_capability._session.lease.lease_id
+            if False:
+                yield None
+            return
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _LeaseCapturingChild(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    set_current_thread_id(parent_thread_id)
+
+    service = _make_service(tmp_path)
+
+    try:
+        result = await service._run_agent(
+            task_id="task-1",
+            agent_name="child",
+            thread_id=child_thread_id,
+            prompt="hello",
+            subagent_type="explore",
+            max_turns=None,
+        )
+
+        assert result == "(Agent completed with no text output)"
+        assert created
+        assert observed["child_terminal_id"] != parent_terminal_id
+        assert observed["child_lease_id"] == parent_lease_id
+    finally:
+        manager.close()
+
+
+@pytest.mark.asyncio
+async def test_run_agent_inherits_parent_sandbox_when_forking_child(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["model_name"] = model_name
+        captured["workspace_root"] = Path(workspace_root)
+        captured["sandbox"] = kwargs.get("sandbox")
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+    service._parent_bootstrap = BootstrapConfig(
+        workspace_root=Path("/home/daytona"),
+        original_cwd=Path("/home/daytona"),
+        project_root=Path("/home/daytona"),
+        cwd=Path("/home/daytona"),
+        model_name="gpt-parent",
+        sandbox_type="daytona_selfhost",
+    )
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert captured["workspace_root"] == Path("/home/daytona")
+    assert captured["sandbox"] == "daytona_selfhost"
+
+
+@pytest.mark.asyncio
+async def test_run_agent_child_cleanup_skips_sandbox_close(monkeypatch, tmp_path):
+    created: list[_FakeChildAgent] = []
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        child = _FakeChildAgent(Path(workspace_root), model_name)
+        created.append(child)
+        return child
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    service = _make_service(tmp_path)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "(Agent completed with no text output)"
+    assert created[0].closed is True
+    assert created[0].close_kwargs == {"cleanup_sandbox": False}
+
+
+@pytest.mark.asyncio
+async def test_handle_agent_registers_subagent_thread_metadata_before_return(monkeypatch, tmp_path):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    thread_repo = _FakeThreadRepo(
+        rows={
+            "parent-thread": {
+                "id": "parent-thread",
+                "member_id": "member-1",
+                "sandbox_type": "daytona_selfhost",
+                "cwd": "/home/daytona",
+                "model": "gpt-parent",
+                "is_main": True,
+                "branch_index": 0,
+                "created_at": 1.0,
+            }
+        }
+    )
+    entity_repo = _FakeEntityRepo()
+    member_repo = _FakeMemberRepo({"member-1": "Toad"})
+    service = _make_service(
+        tmp_path,
+        thread_repo=thread_repo,
+        entity_repo=entity_repo,
+        member_repo=member_repo,
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="do work",
+            name="worker-1",
+            run_in_background=True,
+        )
+        payload = _agent_tool_json(raw)
+        child_thread_id = payload["thread_id"]
+
+        child_thread = thread_repo.get_by_id(child_thread_id)
+        child_entity = entity_repo.get_by_thread_id(child_thread_id)
+
+        assert child_thread is not None
+        assert child_thread["member_id"] == "member-1"
+        assert child_thread["sandbox_type"] == "daytona_selfhost"
+        assert child_thread["cwd"] == "/home/daytona"
+        assert child_thread["is_main"] is False
+        assert child_thread["branch_index"] == 1
+        assert child_entity is not None
+        assert child_entity.id == child_thread_id
+        assert child_entity.member_id == "member-1"
+        assert child_entity.name == "worker-1"
+    finally:
+        await service.cleanup_background_runs()
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_handle_agent_reuses_existing_completed_child_thread_for_same_parent_and_name(monkeypatch, tmp_path):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    thread_repo = _FakeThreadRepo(
+        rows={
+            "parent-thread": {
+                "id": "parent-thread",
+                "member_id": "member-1",
+                "sandbox_type": "daytona_selfhost",
+                "cwd": "/home/daytona",
+                "model": "gpt-parent",
+                "is_main": True,
+                "branch_index": 0,
+                "created_at": 1.0,
+            },
+            "subagent-existing": {
+                "id": "subagent-existing",
+                "member_id": "member-1",
+                "sandbox_type": "daytona_selfhost",
+                "cwd": "/home/daytona",
+                "model": "gpt-test",
+                "is_main": False,
+                "branch_index": 1,
+                "created_at": 2.0,
+            },
+        }
+    )
+    entity_repo = _FakeEntityRepo()
+    entity_repo.create(
+        EntityRow(
+            id="subagent-existing",
+            member_id="member-1",
+            thread_id="subagent-existing",
+            name="worker-1",
+            type="agent",
+            created_at=2.0,
+        )
+    )
+    registry = _FakeAgentRegistry()
+    registry._latest_by_name_parent[("worker-1", "parent-thread")] = SimpleNamespace(
+        agent_id="old-agent",
+        name="worker-1",
+        thread_id="subagent-existing",
+        status="completed",
+        parent_agent_id="parent-thread",
+        subagent_type="general",
+    )
+    service = _make_service(
+        tmp_path,
+        agent_registry=registry,
+        thread_repo=thread_repo,
+        entity_repo=entity_repo,
+        member_repo=_FakeMemberRepo({"member-1": "Toad"}),
+    )
+
+    set_current_thread_id("parent-thread")
+    try:
+        raw = await service._handle_agent(
+            prompt="continue work",
+            name="worker-1",
+            run_in_background=True,
+        )
+
+        payload = _agent_tool_json(raw)
+        assert payload["thread_id"] == "subagent-existing"
+        assert len(thread_repo.created) == 0
+    finally:
+        await service.cleanup_background_runs()
+        set_current_thread_id("")
+
+
+@pytest.mark.asyncio
+async def test_agent_tool_blocking_result_preserves_child_identity_metadata(monkeypatch, tmp_path):
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    request = SimpleNamespace(
+        tool_call={
+            "name": "Agent",
+            "args": {"prompt": "inspect", "description": "inspect workspace"},
+            "id": "tc-1",
+        },
+        state=_make_parent_context(tmp_path),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    meta = result.additional_kwargs["tool_result_meta"]
+    assert meta["task_id"]
+    assert meta["subagent_thread_id"].startswith("subagent-")
+
+
+@pytest.mark.asyncio
+async def test_run_agent_uses_live_child_thread_bridge_when_web_app_present(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    async def fake_run_child_thread_live(agent, thread_id, prompt, app, *, input_messages):
+        captured["agent"] = agent
+        captured["thread_id"] = thread_id
+        captured["prompt"] = prompt
+        captured["app"] = app
+        captured["input_messages"] = input_messages
+        return "LIVE_CHILD_DONE"
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        captured["child_web_app"] = kwargs.get("web_app")
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    monkeypatch.setattr("backend.web.services.streaming_service.run_child_thread_live", fake_run_child_thread_live)
+
+    web_app = SimpleNamespace()
+    service = _make_service(tmp_path, web_app=web_app)
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt="do work",
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "LIVE_CHILD_DONE"
+    assert captured["thread_id"] == "subagent-1"
+    assert captured["prompt"] == "do work"
+    assert captured["app"] is web_app
+    assert captured["child_web_app"] is web_app
+    assert len(captured["input_messages"]) == 1
+    assert captured["input_messages"][0]["role"] == "user"
+    assert captured["input_messages"][0]["content"] == "do work"
+    assert captured["agent"].cleanup_calls == 1
+    assert captured["agent"].closed is False
+
+
+@pytest.mark.asyncio
+async def test_run_agent_normalizes_workspace_suffix_in_child_prompt(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    async def fake_run_child_thread_live(agent, thread_id, prompt, app, *, input_messages):
+        captured["prompt"] = prompt
+        captured["input_messages"] = input_messages
+        return "LIVE_CHILD_DONE"
+
+    def fake_create_leon_agent(*, model_name, workspace_root, **kwargs):
+        return _FakeChildAgent(Path(workspace_root), model_name)
+
+    monkeypatch.setattr("core.runtime.agent.create_leon_agent", fake_create_leon_agent)
+    monkeypatch.setattr("backend.web.services.streaming_service.run_child_thread_live", fake_run_child_thread_live)
+
+    service = _make_service(tmp_path, web_app=SimpleNamespace())
+    raw_prompt = f"Inspect the workspace at {tmp_path}/current working directory. Read-only only. Report existing files."
+
+    result = await service._run_agent(
+        task_id="task-1",
+        agent_name="child",
+        thread_id="subagent-1",
+        prompt=raw_prompt,
+        subagent_type="general",
+        max_turns=None,
+        fork_context=False,
+    )
+
+    assert result == "LIVE_CHILD_DONE"
+    expected_prompt = f"Inspect the workspace at {tmp_path}. Read-only only. Report existing files."
+    assert captured["prompt"] == expected_prompt
+    assert captured["input_messages"][0]["content"] == expected_prompt
+
+
+def test_agent_schema_does_not_claim_general_has_full_tool_access():
+    description = AGENT_SCHEMA["description"]
+
+    assert "general (full tool access)" not in description
+    assert "general (broad tool access except Agent, TaskOutput, and TaskStop)" in description
+
+
+def test_agent_schema_requires_description():
+    assert AGENT_SCHEMA["parameters"]["required"] == ["prompt", "description"]
+
+
+def test_task_output_schema_exposes_block_and_timeout():
+    properties = TASK_OUTPUT_SCHEMA["parameters"]["properties"]
+
+    assert properties["block"]["default"] is True
+    assert properties["timeout"]["default"] == 30000
+    assert properties["timeout"]["maximum"] == 600000
+
+
+@pytest.mark.asyncio
+async def test_ask_user_question_requests_structured_question_payload(tmp_path):
+    registry = ToolRegistry()
+    _make_service(tmp_path, tool_registry=registry)
+    runner = ToolRunner(registry=registry)
+    app_state = AppState()
+    captured: dict[str, object] = {}
+
+    def request_permission(name, args, context, request, message):
+        captured["name"] = name
+        captured["args"] = dict(args)
+        captured["message"] = message
+        return {"request_id": "ask-1"}
+
+    request = SimpleNamespace(
+        tool_call={
+            "name": "AskUserQuestion",
+            "args": {
+                "questions": [
+                    {
+                        "header": "Color",
+                        "question": "Which color should I use?",
+                        "options": [
+                            {"label": "Blue", "description": "Use blue"},
+                            {"label": "Green", "description": "Use green"},
+                        ],
+                    }
+                ]
+            },
+            "id": "tc-1",
+        },
+        state=ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root=tmp_path, model_name="gpt-test"),
+            get_app_state=app_state.get_state,
+            set_app_state=app_state.set_state,
+            request_permission=request_permission,
+        ),
+    )
+
+    result = await runner.awrap_tool_call(request, AsyncMock())
+
+    meta = result.additional_kwargs["tool_result_meta"]
+    assert meta["kind"] == "permission_request"
+    assert meta["request_id"] == "ask-1"
+    assert result.content == "User input required to continue."
+    assert captured["name"] == "AskUserQuestion"
+    assert captured["message"] == "Please answer the following questions so Leon can continue."
+    assert captured["args"] == {
+        "questions": [
+            {
+                "header": "Color",
+                "question": "Which color should I use?",
+                "options": [
+                    {"label": "Blue", "description": "Use blue"},
+                    {"label": "Green", "description": "Use green"},
+                ],
+            }
+        ]
+    }
+
+
+def test_ask_user_question_schema_requires_questions():
+    assert ASK_USER_QUESTION_SCHEMA["parameters"]["required"] == ["questions"]
diff --git a/tests/Unit/core/test_capability_async.py b/tests/Unit/core/test_capability_async.py
new file mode 100644
index 000000000..d07334c3d
--- /dev/null
+++ b/tests/Unit/core/test_capability_async.py
@@ -0,0 +1,195 @@
+import asyncio
+import uuid
+from pathlib import Path
+from types import SimpleNamespace
+
+from sandbox.base import LocalSandbox
+from sandbox.capability import SandboxCapability
+from sandbox.interfaces.executor import AsyncCommand, ExecuteResult
+from sandbox.thread_context import set_current_thread_id
+
+
+class _DummyState:
+    cwd = "/tmp"
+
+
+class _DummyTerminal:
+    terminal_id = "dummy-term"
+
+    def get_state(self):
+        return _DummyState()
+
+
+class _DummyRuntime:
+    def __init__(self):
+        self.commands: list[str] = []
+        self._async_commands: dict[str, AsyncCommand] = {}
+
+    async def execute(self, command: str, timeout=None):
+        self.commands.append(command)
+        await asyncio.sleep(0.01)
+        return ExecuteResult(exit_code=0, stdout=f"ok:{command}", stderr="")
+
+    async def start_command(self, command: str, cwd: str) -> AsyncCommand:
+        command_id = f"cmd_{uuid.uuid4().hex[:12]}"
+        result = await self.execute(command)
+        async_cmd = AsyncCommand(
+            command_id=command_id,
+            command_line=command,
+            cwd=cwd,
+            exit_code=result.exit_code,
+            done=True,
+            stdout_buffer=[result.stdout],
+        )
+        self._async_commands[command_id] = async_cmd
+        return async_cmd
+
+    async def get_command(self, command_id: str) -> AsyncCommand | None:
+        return self._async_commands.get(command_id)
+
+    async def wait_for_command(self, command_id: str, timeout: float | None = None) -> ExecuteResult | None:
+        cmd = self._async_commands.get(command_id)
+        if cmd is None:
+            return None
+        return ExecuteResult(
+            exit_code=cmd.exit_code or 0,
+            stdout="".join(cmd.stdout_buffer),
+            stderr="".join(cmd.stderr_buffer),
+        )
+
+
+class _DummySession:
+    def __init__(self):
+        self.terminal = _DummyTerminal()
+        self.runtime = _DummyRuntime()
+        self.touches = 0
+
+    def touch(self):
+        self.touches += 1
+
+
+async def _run_async_command_flow():
+    session = _DummySession()
+    capability = SandboxCapability(session)
+
+    async_cmd = await capability.command.execute_async("echo hi", cwd="/tmp/demo", env={"A": "1"})
+    assert async_cmd.command_id.startswith("cmd_")
+
+    status = await capability.command.get_status(async_cmd.command_id)
+    assert status is not None
+
+    result = await capability.command.wait_for(async_cmd.command_id, timeout=1.0)
+    assert result is not None
+    assert result.exit_code == 0
+    assert "echo hi" in result.stdout
+    assert session.touches > 0
+
+
+def test_command_wrapper_supports_execute_async():
+    asyncio.run(_run_async_command_flow())
+
+
+def test_local_sandbox_rebuilds_stale_closed_capability_before_execute_async(tmp_path):
+    root = Path(tmp_path)
+    thread_id = "thread-stale-session"
+    sandbox = LocalSandbox(str(root), db_path=root / "sandbox.db")
+    set_current_thread_id(thread_id)
+    capability = sandbox._get_capability()
+    stale_session_id = capability._session.session_id
+    sandbox.manager.session_manager.delete(stale_session_id, reason="test_close")
+
+    async def run():
+        async_cmd = await sandbox.shell().execute_async("sleep 0.01; echo hi")
+        result = await sandbox.shell().wait_for(async_cmd.command_id, timeout=1.0)
+        return async_cmd, result
+
+    async_cmd, result = asyncio.run(run())
+
+    assert capability._session.status == "closed"
+    refreshed = sandbox._get_capability()
+    assert refreshed._session.session_id != stale_session_id
+    assert async_cmd.command_id.startswith("cmd_")
+    assert result is not None
+    assert result.exit_code == 0
+    assert "hi" in result.stdout
+
+
+def test_filesystem_wrapper_auto_resumes_paused_lease_before_listing():
+    class _PausedLease:
+        def __init__(self):
+            self.observed_state = "paused"
+
+        def ensure_active_instance(self, _provider):
+            if self.observed_state == "paused":
+                raise RuntimeError("Sandbox lease lease-1 is paused. Resume before executing commands.")
+            return SimpleNamespace(instance_id="inst-1")
+
+    class _RemoteProvider:
+        def list_dir(self, instance_id: str, path: str):
+            assert instance_id == "inst-1"
+            assert path == "/home/daytona"
+            return [{"name": "demo.txt", "type": "file", "size": 7}]
+
+    lease = _PausedLease()
+    provider = _RemoteProvider()
+    resume_calls: list[tuple[str, str]] = []
+
+    class _RemoteSession:
+        def __init__(self):
+            self.thread_id = "thread-paused"
+            self.terminal = _DummyTerminal()
+            self.lease = lease
+            self.runtime = SimpleNamespace(provider=provider)
+            self.touches = 0
+
+        def touch(self):
+            self.touches += 1
+
+    session = _RemoteSession()
+    manager = SimpleNamespace(
+        resume_session=lambda thread_id, source="user_resume": (
+            resume_calls.append((thread_id, source)) or setattr(lease, "observed_state", "running") or True
+        )
+    )
+
+    capability = SandboxCapability(session, manager=manager)
+
+    result = capability.fs.list_dir("/home/daytona")
+
+    assert resume_calls == [("thread-paused", "auto_resume")]
+    assert [entry.name for entry in result.entries] == ["demo.txt"]
+    assert result.error is None
+
+
+def test_filesystem_wrapper_derives_remote_file_size_from_parent_listing():
+    class _Lease:
+        observed_state = "running"
+
+        def ensure_active_instance(self, _provider):
+            return SimpleNamespace(instance_id="inst-1")
+
+    class _RemoteProvider:
+        def list_dir(self, instance_id: str, path: str):
+            assert instance_id == "inst-1"
+            assert path == "/home/daytona"
+            return [
+                {"name": "demo.txt", "type": "file", "size": 42},
+                {"name": "nested", "type": "directory", "size": 0},
+            ]
+
+    class _RemoteSession:
+        def __init__(self):
+            self.thread_id = "thread-size"
+            self.terminal = _DummyTerminal()
+            self.lease = _Lease()
+            self.runtime = SimpleNamespace(provider=_RemoteProvider())
+            self.touches = 0
+
+        def touch(self):
+            self.touches += 1
+
+    capability = SandboxCapability(_RemoteSession())
+
+    assert capability.fs.file_size("/home/daytona/demo.txt") == 42
+    assert capability.fs.file_size("/home/daytona/missing.txt") is None
+    assert capability.fs.file_size("/") is None
diff --git a/tests/Unit/core/test_chat_tool_service.py b/tests/Unit/core/test_chat_tool_service.py
new file mode 100644
index 000000000..facf94e15
--- /dev/null
+++ b/tests/Unit/core/test_chat_tool_service.py
@@ -0,0 +1,161 @@
+from types import SimpleNamespace
+
+from langchain_core.messages import HumanMessage
+
+from core.agents.communication.chat_tool_service import ChatToolService
+from core.runtime.agent import LeonAgent
+from core.runtime.registry import ToolRegistry
+from storage.contracts import EntityRow, MemberRow, MemberType
+
+
+class _EntityRepo:
+    def __init__(self, entities: list[EntityRow]) -> None:
+        self._entities = {entity.id: entity for entity in entities}
+
+    def list_all(self) -> list[EntityRow]:
+        return list(self._entities.values())
+
+    def get_by_id(self, entity_id: str) -> EntityRow | None:
+        return self._entities.get(entity_id)
+
+
+class _MemberRepo:
+    def __init__(self, members: list[MemberRow]) -> None:
+        self._members = {member.id: member for member in members}
+
+    def get_by_id(self, member_id: str) -> MemberRow | None:
+        return self._members.get(member_id)
+
+    def list_all(self) -> list[MemberRow]:
+        return list(self._members.values())
+
+
+def test_chat_tool_registry_exposes_only_canonical_chat_surface() -> None:
+    registry = ToolRegistry()
+    ChatToolService(
+        registry,
+        user_id="m_agent",
+        owner_user_id="u_owner",
+        entity_repo=_EntityRepo([]),
+        chat_service=SimpleNamespace(),
+        chat_entity_repo=SimpleNamespace(),
+        chat_message_repo=SimpleNamespace(),
+        member_repo=_MemberRepo([]),
+        chat_event_bus=SimpleNamespace(),
+        runtime_fn=lambda: None,
+    )
+
+    for tool_name in ("list_chats", "read_messages", "send_message", "search_messages"):
+        assert registry.get(tool_name) is not None
+
+    assert registry.get("chats") is None
+    assert registry.get("read_message") is None
+    assert registry.get("search_message") is None
+    assert registry.get("directory") is None
+
+
+def test_compose_system_prompt_hardens_chat_reply_contract() -> None:
+    owner_entity = EntityRow(id="e_owner", type="human", member_id="u_owner", name="Owner", created_at=1.0)
+    agent_entity = EntityRow(id="e_agent", type="agent", member_id="m_agent", name="Helper", created_at=2.0)
+
+    agent = LeonAgent.__new__(LeonAgent)
+    agent._chat_repos = {
+        "user_id": "m_agent",
+        "owner_user_id": "u_owner",
+        "entity_repo": _EntityRepo([owner_entity, agent_entity]),
+        "member_repo": _MemberRepo(
+            [
+                MemberRow(id="u_owner", name="Owner", type=MemberType.HUMAN, created_at=1.0),
+                MemberRow(id="m_agent", name="Helper Member", type=MemberType.MYCEL_AGENT, owner_user_id="u_owner", created_at=2.0),
+            ]
+        ),
+    }
+    agent._build_system_prompt = lambda: "BASE"
+    agent.config = SimpleNamespace(system_prompt=None)
+
+    prompt = agent._compose_system_prompt()
+
+    assert "you MUST read it with read_messages()" in prompt
+    assert "prefer using that exact chat_id directly" in prompt
+    assert "you MUST call send_message()" in prompt
+    assert "Never claim you replied unless send_message() succeeded." in prompt
+    assert "directory" not in prompt
+
+
+def test_read_messages_validate_input_fills_missing_chat_id_from_latest_notification() -> None:
+    registry = ToolRegistry()
+    ChatToolService(
+        registry,
+        user_id="m_agent",
+        owner_user_id="u_owner",
+        entity_repo=_EntityRepo([]),
+        chat_service=SimpleNamespace(),
+        chat_entity_repo=SimpleNamespace(),
+        chat_message_repo=SimpleNamespace(),
+        member_repo=_MemberRepo([]),
+        chat_event_bus=SimpleNamespace(),
+        runtime_fn=lambda: None,
+    )
+    entry = registry.get("read_messages")
+    assert entry is not None
+    assert entry.validate_input is not None
+
+    request = SimpleNamespace(
+        state=SimpleNamespace(
+            messages=[
+                HumanMessage(
+                    content=(
+                        "<system-reminder>\n"
+                        "New message from alice in chat chat-123 (1 unread).\n"
+                        'Read it with read_messages(chat_id="chat-123").\n'
+                        "</system-reminder>"
+                    ),
+                    metadata={"source": "external", "notification_type": "chat"},
+                )
+            ]
+        )
+    )
+
+    args = entry.validate_input({"chat_id": "", "range": "-10:"}, request)
+
+    assert args == {"chat_id": "chat-123", "range": "-10:"}
+
+
+def test_send_message_validate_input_fills_missing_chat_id_from_latest_notification() -> None:
+    registry = ToolRegistry()
+    ChatToolService(
+        registry,
+        user_id="m_agent",
+        owner_user_id="u_owner",
+        entity_repo=_EntityRepo([]),
+        chat_service=SimpleNamespace(),
+        chat_entity_repo=SimpleNamespace(),
+        chat_message_repo=SimpleNamespace(),
+        member_repo=_MemberRepo([]),
+        chat_event_bus=SimpleNamespace(),
+        runtime_fn=lambda: None,
+    )
+    entry = registry.get("send_message")
+    assert entry is not None
+    assert entry.validate_input is not None
+
+    request = SimpleNamespace(
+        state=SimpleNamespace(
+            messages=[
+                HumanMessage(
+                    content=(
+                        "<system-reminder>\n"
+                        "New message from alice in chat chat-456 (1 unread).\n"
+                        'Read it with read_messages(chat_id="chat-456").\n'
+                        'Reply with send_message(chat_id="chat-456", content="...").\n'
+                        "</system-reminder>"
+                    ),
+                    metadata={"source": "external", "notification_type": "chat"},
+                )
+            ]
+        )
+    )
+
+    args = entry.validate_input({"content": "hi", "chat_id": ""}, request)
+
+    assert args == {"content": "hi", "chat_id": "chat-456"}
diff --git a/tests/test_command_middleware.py b/tests/Unit/core/test_command_middleware.py
similarity index 73%
rename from tests/test_command_middleware.py
rename to tests/Unit/core/test_command_middleware.py
index 05d64edf1..c48e0b681 100644
--- a/tests/test_command_middleware.py
+++ b/tests/Unit/core/test_command_middleware.py
@@ -5,10 +5,12 @@
 
 import pytest
 
+from core.runtime.registry import ToolRegistry
 from core.tools.command.base import AsyncCommand, BaseExecutor, ExecuteResult
 from core.tools.command.dispatcher import get_executor, get_shell_info
 from core.tools.command.hooks.dangerous_commands import DangerousCommandsHook
 from core.tools.command.middleware import CommandMiddleware
+from core.tools.command.service import CommandService
 
 
 class TestExecuteResult:
@@ -107,6 +109,36 @@ def test_block_rm_rf(self):
         assert not result.allow
         assert "SECURITY" in result.error_message
 
+    def test_allow_dangerous_text_inside_quotes(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('echo "rm -rf /"', {})
+        assert result.allow
+
+    def test_allow_dangerous_text_inside_comment(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command("echo hi # rm -rf /", {})
+        assert result.allow
+
+    def test_block_obfuscated_dangerous_command_name_with_inline_quotes(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('s"u"do echo hi', {})
+        assert not result.allow
+
+    def test_block_obfuscated_file_mutation_command_name_with_inline_quotes(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('ch"mo"d 777 /tmp/x', {})
+        assert not result.allow
+
+    def test_block_ansi_c_quoted_obfuscation(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command("s$'udo' echo hi", {})
+        assert not result.allow
+
+    def test_block_locale_quoted_obfuscation(self):
+        hook = DangerousCommandsHook(verbose=False)
+        result = hook.check_command('$"chmod" 777 /tmp/x', {})
+        assert not result.allow
+
     def test_block_sudo(self):
         hook = DangerousCommandsHook()
         result = hook.check_command("sudo apt install", {})
@@ -185,6 +217,29 @@ def store_completed_result(self, command_id: str, command_line: str, cwd: str, r
         return None
 
 
+class _BlankErrorExecutor(BaseExecutor):
+    runtime_owns_cwd = True
+    shell_name = "bash"
+
+    class BlankCommandError(Exception):
+        pass
+
+    async def execute(self, command: str, cwd: str | None = None, timeout: float | None = None, env=None):
+        raise self.BlankCommandError()
+
+    async def execute_async(self, command: str, cwd: str | None = None, env=None):
+        raise self.BlankCommandError()
+
+    async def get_status(self, command_id: str):
+        return None
+
+    async def wait_for(self, command_id: str, timeout: float | None = None):
+        return None
+
+    def store_completed_result(self, command_id: str, command_line: str, cwd: str, result: ExecuteResult) -> None:
+        return None
+
+
 class TestCommandStatusFormatting:
     @pytest.mark.asyncio
     async def test_running_status_strips_pty_prompt_echo_noise(self, tmp_path):
@@ -224,3 +279,25 @@ async def test_running_status_includes_stderr_chunks(self, tmp_path):
         output_block = out.split("Output so far:\n", 1)[1]
         assert "out" in output_block
         assert "err" in output_block
+
+
+class TestFailLoudBlankExceptions:
+    @pytest.mark.asyncio
+    async def test_command_middleware_surfaces_exception_type_when_message_is_blank(self, tmp_path):
+        middleware = CommandMiddleware(workspace_root=tmp_path, executor=_BlankErrorExecutor(), verbose=False)
+
+        out = await middleware._execute_blocking("pwd", str(tmp_path), timeout=1)
+
+        assert out == "Error executing command: BlankCommandError"
+
+    @pytest.mark.asyncio
+    async def test_command_service_surfaces_exception_type_when_message_is_blank(self, tmp_path):
+        service = CommandService(
+            registry=ToolRegistry(),
+            workspace_root=tmp_path,
+            executor=_BlankErrorExecutor(),
+        )
+
+        out = await service._bash("pwd")
+
+        assert out == "Error executing command: BlankCommandError"
diff --git a/tests/test_event_bus.py b/tests/Unit/core/test_event_bus.py
similarity index 100%
rename from tests/test_event_bus.py
rename to tests/Unit/core/test_event_bus.py
diff --git a/tests/Unit/core/test_loop.py b/tests/Unit/core/test_loop.py
new file mode 100644
index 000000000..15135c05e
--- /dev/null
+++ b/tests/Unit/core/test_loop.py
@@ -0,0 +1,2886 @@
+"""Unit tests for core.runtime.loop QueryLoop."""
+
+import asyncio
+import json
+import tempfile
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, RemoveMessage, SystemMessage, ToolMessage
+from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
+
+from core.runtime.loop import QueryLoop, _StreamingToolExecutor
+from core.runtime.middleware import AgentMiddleware
+from core.runtime.middleware.memory import MemoryMiddleware
+from core.runtime.middleware.monitor import AgentState
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.state import AppState, BootstrapConfig, ToolPermissionState
+from storage.providers.sqlite.kernel import connect_sqlite_async
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_registry(*entries):
+    reg = ToolRegistry()
+    for e in entries:
+        reg.register(e)
+    return reg
+
+
+def make_loop(model, registry=None, middleware=None, max_turns=10, app_state=None, runtime=None, bootstrap=None, checkpointer=None):
+    return QueryLoop(
+        model=model,
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=middleware or [],
+        checkpointer=checkpointer,
+        registry=registry or make_registry(),
+        app_state=app_state,
+        runtime=runtime,
+        bootstrap=bootstrap or BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+        max_turns=max_turns,
+    )
+
+
+class _MemoryCheckpointer:
+    def __init__(self):
+        self.store = {}
+
+    async def aget(self, cfg):
+        return self.store.get(cfg["configurable"]["thread_id"])
+
+    async def aput(self, cfg, checkpoint, metadata, new_versions):
+        self.store[cfg["configurable"]["thread_id"]] = checkpoint
+
+
+def mock_model_no_tools(text="Hello!"):
+    """Model that returns a plain AIMessage (no tool calls)."""
+    ai_msg = AIMessage(content=text)
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(return_value=ai_msg)
+    return model
+
+
+def mock_model_with_tool_call(tool_name="echo", args=None, call_id="tc-1", then_text="Done"):
+    """Model that first responds with a tool call, then responds with plain text."""
+    args = args or {"message": "hi"}
+    tool_call_msg = AIMessage(
+        content="",
+        tool_calls=[{"name": tool_name, "args": args, "id": call_id}],
+    )
+    final_msg = AIMessage(content=then_text)
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(side_effect=[tool_call_msg, final_msg])
+    return model
+
+
+def mock_model_with_two_tool_turns():
+    first = AIMessage(content="", tool_calls=[{"name": "echo", "args": {"message": "one"}, "id": "tc-1"}])
+    second = AIMessage(content="", tool_calls=[{"name": "echo", "args": {"message": "two"}, "id": "tc-2"}])
+    final = AIMessage(content="done")
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(side_effect=[first, second, final])
+    return model
+
+
+def _make_summary_memory_middleware(*, context_limit=40, keep_recent_tokens=10, compaction_threshold=0.1):
+    summary_model = MagicMock()
+    summary_model.bind.return_value = summary_model
+    summary_model.ainvoke = AsyncMock(return_value=AIMessage(content="SUMMARY"))
+
+    memory = MemoryMiddleware(
+        context_limit=context_limit,
+        compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=keep_recent_tokens),
+        compaction_threshold=compaction_threshold,
+    )
+    memory.set_model(summary_model)
+    return memory, summary_model
+
+
+def _make_prompt_too_long_model(*responses):
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(side_effect=list(responses))
+    return model
+
+
+def make_inline_tool(name, handler, *, schema=None, is_concurrency_safe=True):
+    return ToolEntry(
+        name=name,
+        mode=ToolMode.INLINE,
+        schema=schema or {"name": name, "description": name, "parameters": {}},
+        handler=handler,
+        source="test",
+        is_concurrency_safe=is_concurrency_safe,
+    )
+
+
+def test_tool_use_context_get_app_state_is_live_closure():
+    app_state = AppState(turn_count=1)
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([])
+    assert ctx is not None
+    assert ctx.get_app_state().turn_count == 1
+
+    app_state.set_state(lambda prev: prev.model_copy(update={"turn_count": 7}))
+
+    assert ctx.get_app_state().turn_count == 7
+
+
+def test_tool_use_context_session_refs_persist_across_turns():
+    app_state = AppState()
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx1 = loop._build_tool_use_context([HumanMessage(content="one")])
+    ctx2 = loop._build_tool_use_context([HumanMessage(content="two")])
+
+    assert ctx1 is not None
+    assert ctx2 is not None
+
+    ctx1.discovered_skill_names.add("skill-a")
+    ctx1.loaded_nested_memory_paths.add("/tmp/memory.md")
+    ctx1.read_file_state["/tmp/file.py"] = {"partial": False}
+
+    assert ctx2.discovered_skill_names is ctx1.discovered_skill_names
+    assert ctx2.loaded_nested_memory_paths is ctx1.loaded_nested_memory_paths
+    assert ctx2.read_file_state is ctx1.read_file_state
+    assert "skill-a" in ctx2.discovered_skill_names
+    assert "/tmp/memory.md" in ctx2.loaded_nested_memory_paths
+    assert "/tmp/file.py" in ctx2.read_file_state
+
+
+def test_tool_use_context_turn_refs_are_fresh_per_turn():
+    app_state = AppState()
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx1 = loop._build_tool_use_context([HumanMessage(content="one")])
+    ctx2 = loop._build_tool_use_context([HumanMessage(content="two")])
+
+    assert ctx1 is not None
+    assert ctx2 is not None
+
+    ctx1.nested_memory_attachment_triggers.add("memo-a")
+
+    assert ctx2.nested_memory_attachment_triggers == set()
+    assert ctx2.nested_memory_attachment_triggers is not ctx1.nested_memory_attachment_triggers
+
+
+def test_tool_use_context_permission_request_surface_tracks_thread_pending_state():
+    app_state = AppState()
+    loop = make_loop(
+        mock_model_no_tools(),
+        app_state=app_state,
+        bootstrap=BootstrapConfig(
+            workspace_root=Path("/tmp"),
+            model_name="test-model",
+            permission_resolver_scope="thread",
+        ),
+    )
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    request_id = ctx.request_permission("Write", {"path": "x"}, None, None, "needs approval")
+
+    assert isinstance(request_id, str)
+    assert app_state.pending_permission_requests[request_id]["thread_id"] == "thread-a"
+    assert app_state.pending_permission_requests[request_id]["tool_name"] == "Write"
+
+
+def test_tool_use_context_consumes_resolved_permission_once():
+    app_state = AppState(
+        resolved_permission_requests={
+            "perm-1": {
+                "thread_id": "thread-a",
+                "tool_name": "Write",
+                "args": {"path": "x"},
+                "decision": "allow",
+                "message": "approved",
+            }
+        }
+    )
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    first = ctx.consume_permission_resolution("Write", {"path": "x"}, None, None)
+    second = ctx.consume_permission_resolution("Write", {"path": "x"}, None, None)
+
+    assert first == {"decision": "allow", "message": "approved"}
+    assert second is None
+    assert app_state.resolved_permission_requests == {}
+
+
+def test_tool_use_context_can_use_tool_reads_app_state_permission_rules():
+    app_state = AppState()
+    app_state.tool_permission_context.alwaysAskRules["session"] = ["Write"]
+    loop = make_loop(
+        mock_model_no_tools(),
+        app_state=app_state,
+        bootstrap=BootstrapConfig(
+            workspace_root=Path("/tmp"),
+            model_name="test-model",
+            permission_resolver_scope="thread",
+        ),
+    )
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    decision = ctx.can_use_tool(
+        "Write",
+        {},
+        SimpleNamespace(is_read_only=False, is_destructive=False),
+        None,
+    )
+
+    assert decision == {
+        "decision": "ask",
+        "message": "Permission required by rule: Write",
+    }
+
+
+def test_tool_use_context_omits_permission_request_surface_without_interactive_resolver():
+    app_state = AppState()
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    assert ctx.request_permission is None
+
+
+def test_tool_use_context_fails_loud_when_ask_has_no_interactive_resolver():
+    app_state = AppState()
+    app_state.tool_permission_context.alwaysAskRules["session"] = ["Write"]
+    loop = make_loop(mock_model_no_tools(), app_state=app_state)
+
+    ctx = loop._build_tool_use_context([], thread_id="thread-a")
+    assert ctx is not None
+
+    decision = ctx.can_use_tool(
+        "Write",
+        {},
+        SimpleNamespace(is_read_only=False, is_destructive=False),
+        None,
+    )
+
+    assert decision == {
+        "decision": "deny",
+        "message": "Permission required by rule: Write. No interactive permission resolver is available for this run.",
+    }
+
+
+class _CaptureTurnLocalStateMiddleware(AgentMiddleware):
+    def __init__(self):
+        self.turn_ids = []
+        self.trigger_snapshots = []
+
+    async def awrap_tool_call(self, request, handler):
+        self.turn_ids.append(request.state.turn_id)
+        self.trigger_snapshots.append(set(request.state.nested_memory_attachment_triggers))
+        if len(self.turn_ids) == 1:
+            request.state.nested_memory_attachment_triggers.add("first-turn-mark")
+        return await handler(request)
+
+
+@pytest.mark.asyncio
+async def test_query_loop_rebuilds_turn_local_tool_context_each_tool_turn():
+    model = mock_model_with_two_tool_turns()
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    capture = _CaptureTurnLocalStateMiddleware()
+    loop = make_loop(model, registry=make_registry(entry), middleware=[capture], app_state=AppState())
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "two turns"}]}):
+        pass
+
+    assert len(capture.turn_ids) == 2
+    assert capture.turn_ids[0] != capture.turn_ids[1]
+    assert capture.trigger_snapshots == [set(), set()]
+
+
+# ---------------------------------------------------------------------------
+# Tests: no tool calls → single agent chunk
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_no_tool_calls_yields_one_agent_chunk():
+    model = mock_model_no_tools("Hello world")
+    loop = make_loop(model)
+
+    chunks = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "hi"}]}):
+        chunks.append(chunk)
+
+    assert len(chunks) == 1
+    assert "agent" in chunks[0]
+    msgs = chunks[0]["agent"]["messages"]
+    assert len(msgs) == 1
+    assert msgs[0].content == "Hello world"
+
+
+@pytest.mark.asyncio
+async def test_no_tool_calls_model_called_once():
+    model = mock_model_no_tools()
+    loop = make_loop(model)
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "hi"}]}):
+        pass
+
+    assert model.ainvoke.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_query_loop_clear_resets_turn_state_but_preserves_accumulators():
+    model = mock_model_no_tools("after clear")
+    checkpointer = _MemoryCheckpointer()
+    app_state = AppState(total_cost=1.25, tool_overrides={"Bash": False})
+    bootstrap = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model")
+    loop = make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        app_state=app_state,
+        bootstrap=bootstrap,
+    )
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hi"}]},
+        config={"configurable": {"thread_id": "clear-thread"}},
+    ):
+        pass
+
+    loop._tool_read_file_state["/tmp/file.py"] = {"partial": False}
+    loop._tool_loaded_nested_memory_paths.add("/tmp/memory.md")
+    loop._tool_discovered_skill_names.add("skill-a")
+    old_session_id = bootstrap.session_id
+
+    await loop.aclear("clear-thread")
+
+    assert checkpointer.store["clear-thread"]["channel_values"]["messages"] == []
+    assert app_state.messages == []
+    assert app_state.turn_count == 0
+    assert app_state.compact_boundary_index == 0
+    assert app_state.total_cost == 1.25
+    assert app_state.tool_overrides == {"Bash": False}
+    assert loop._tool_read_file_state == {}
+    assert loop._tool_loaded_nested_memory_paths == set()
+    assert loop._tool_discovered_skill_names == set()
+    assert bootstrap.session_id != old_session_id
+    assert bootstrap.parent_session_id == old_session_id
+
+
+@pytest.mark.asyncio
+async def test_query_loop_replays_messages_with_real_async_sqlite_saver():
+    db_path = Path(tempfile.mkdtemp()) / "checkpoints.db"
+    conn = await connect_sqlite_async(db_path)
+    saver = AsyncSqliteSaver(conn)
+    await saver.setup()
+
+    try:
+        model = mock_model_no_tools("persist me")
+        loop = make_loop(
+            model=model,
+            checkpointer=saver,
+            app_state=AppState(),
+        )
+
+        async for _ in loop.query(
+            {"messages": [{"role": "user", "content": "first"}]},
+            config={"configurable": {"thread_id": "persist-thread"}},
+        ):
+            pass
+
+        reloaded = await loop._load_messages("persist-thread")
+        assert [msg.content for msg in reloaded] == ["first", "persist me"]
+    finally:
+        await conn.close()
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aclear_wipes_real_async_sqlite_saver_history():
+    db_path = Path(tempfile.mkdtemp()) / "checkpoints.db"
+    conn = await connect_sqlite_async(db_path)
+    saver = AsyncSqliteSaver(conn)
+    await saver.setup()
+
+    try:
+        model = mock_model_no_tools("persist me")
+        loop = make_loop(
+            model=model,
+            checkpointer=saver,
+            app_state=AppState(total_cost=1.25),
+            bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model", total_cost_usd=1.25),
+        )
+
+        async for _ in loop.query(
+            {"messages": [{"role": "user", "content": "first"}]},
+            config={"configurable": {"thread_id": "clear-real-thread"}},
+        ):
+            pass
+
+        assert [msg.content for msg in await loop._load_messages("clear-real-thread")] == ["first", "persist me"]
+
+        await loop.aclear("clear-real-thread")
+
+        assert await loop._load_messages("clear-real-thread") == []
+        assert loop._app_state is not None
+        assert loop._app_state.total_cost == 1.25
+    finally:
+        await conn.close()
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aget_state_exposes_messages_for_backend_callers():
+    model = mock_model_no_tools("state me")
+    checkpointer = _MemoryCheckpointer()
+    loop = make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+    config = {"configurable": {"thread_id": "state-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "hello"}]},
+        config=config,
+    ):
+        pass
+
+    state = await loop.aget_state(config)
+
+    assert state.values is not None
+    assert [msg.content for msg in state.values["messages"]] == ["hello", "state me"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aget_state_exposes_persisted_permission_state_for_backend_callers():
+    checkpointer = _MemoryCheckpointer()
+    pending = {
+        "perm-1": {
+            "request_id": "perm-1",
+            "thread_id": "perm-thread",
+            "tool_name": "Write",
+            "args": {"path": "/tmp/a.txt"},
+            "message": "needs approval",
+        }
+    }
+    resolved = {
+        "perm-2": {
+            "request_id": "perm-2",
+            "thread_id": "perm-thread",
+            "tool_name": "Edit",
+            "args": {"path": "/tmp/b.txt"},
+            "decision": "allow",
+            "message": "approved",
+        }
+    }
+    loop = make_loop(
+        model=mock_model_no_tools("persist permissions"),
+        checkpointer=checkpointer,
+        app_state=AppState(
+            tool_permission_context=ToolPermissionState(
+                alwaysAllowRules={"session": ["Write"]},
+                alwaysDenyRules={"session": ["Bash"]},
+                alwaysAskRules={"session": ["Edit"]},
+            ),
+            pending_permission_requests=pending,
+            resolved_permission_requests=resolved,
+        ),
+    )
+    config = {"configurable": {"thread_id": "perm-thread"}}
+
+    await loop._save_messages("perm-thread", [HumanMessage(content="hello")])
+
+    reloaded = make_loop(
+        model=mock_model_no_tools("unused"),
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+
+    state = await reloaded.aget_state(config)
+
+    assert state.values["pending_permission_requests"] == pending
+    assert state.values["resolved_permission_requests"] == resolved
+    assert state.values["tool_permission_context"] == {
+        "alwaysAllowRules": {"session": ["Write"]},
+        "alwaysDenyRules": {"session": ["Bash"]},
+        "alwaysAskRules": {"session": ["Edit"]},
+        "allowManagedPermissionRulesOnly": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aget_state_uses_live_permission_state_while_active():
+    checkpointer = _MemoryCheckpointer()
+    app_state = AppState(
+        messages=[HumanMessage(content="live human")],
+        tool_permission_context=ToolPermissionState(alwaysAskRules={"session": ["Bash"]}),
+        pending_permission_requests={
+            "perm-live": {
+                "request_id": "perm-live",
+                "thread_id": "perm-thread",
+                "tool_name": "Bash",
+                "args": {"command": "echo hi"},
+                "message": "Permission required by rule: Bash",
+            }
+        },
+    )
+    loop = make_loop(
+        model=mock_model_no_tools("unused"),
+        checkpointer=checkpointer,
+        app_state=app_state,
+        runtime=SimpleNamespace(current_state=AgentState.ACTIVE),
+    )
+    config = {"configurable": {"thread_id": "perm-thread"}}
+
+    state = await loop.aget_state(config)
+
+    assert [msg.content for msg in state.values["messages"]] == ["live human"]
+    assert state.values["pending_permission_requests"] == {
+        "perm-live": {
+            "request_id": "perm-live",
+            "thread_id": "perm-thread",
+            "tool_name": "Bash",
+            "args": {"command": "echo hi"},
+            "message": "Permission required by rule: Bash",
+        }
+    }
+    assert state.values["tool_permission_context"] == {
+        "alwaysAllowRules": {},
+        "alwaysDenyRules": {},
+        "alwaysAskRules": {"session": ["Bash"]},
+        "allowManagedPermissionRulesOnly": False,
+    }
+
+
+@pytest.mark.asyncio
+async def test_query_loop_restores_persisted_permission_state_into_live_app_state():
+    checkpointer = _MemoryCheckpointer()
+    pending = {
+        "perm-1": {
+            "request_id": "perm-1",
+            "thread_id": "perm-thread",
+            "tool_name": "Write",
+            "args": {"path": "/tmp/a.txt"},
+            "message": "needs approval",
+        }
+    }
+    resolved = {
+        "perm-2": {
+            "request_id": "perm-2",
+            "thread_id": "perm-thread",
+            "tool_name": "Edit",
+            "args": {"path": "/tmp/b.txt"},
+            "decision": "allow",
+            "message": "approved",
+        }
+    }
+    seed_loop = make_loop(
+        model=mock_model_no_tools("seed"),
+        checkpointer=checkpointer,
+        app_state=AppState(
+            tool_permission_context=ToolPermissionState(
+                alwaysAllowRules={"session": ["Write"]},
+                alwaysDenyRules={"session": ["Bash"]},
+                alwaysAskRules={"session": ["Edit"]},
+            ),
+            pending_permission_requests=pending,
+            resolved_permission_requests=resolved,
+        ),
+    )
+    await seed_loop._save_messages("perm-thread", [HumanMessage(content="existing")])
+
+    app_state = AppState()
+    reloaded = make_loop(
+        model=mock_model_no_tools("after restore"),
+        checkpointer=checkpointer,
+        app_state=app_state,
+    )
+
+    async for _ in reloaded.query(
+        {"messages": [{"role": "user", "content": "continue"}]},
+        config={"configurable": {"thread_id": "perm-thread"}},
+    ):
+        pass
+
+    assert app_state.pending_permission_requests == pending
+    assert app_state.resolved_permission_requests == resolved
+    assert app_state.tool_permission_context.alwaysAllowRules == {"session": ["Write"]}
+    assert app_state.tool_permission_context.alwaysDenyRules == {"session": ["Bash"]}
+    assert app_state.tool_permission_context.alwaysAskRules == {"session": ["Edit"]}
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_cleared_permission_state_after_resolution_consumed():
+    checkpointer = _MemoryCheckpointer()
+    request_id = "perm-ask"
+    thread_id = "perm-thread"
+    args = {
+        "questions": [
+            {
+                "header": "Choice",
+                "question": "Pick one.",
+                "multiSelect": False,
+                "options": [{"label": "Alpha", "description": "Alpha"}],
+            }
+        ]
+    }
+    app_state = AppState(
+        messages=[HumanMessage(content="existing")],
+        pending_permission_requests={
+            request_id: {
+                "request_id": request_id,
+                "thread_id": thread_id,
+                "tool_name": "AskUserQuestion",
+                "args": args,
+                "message": "Answer questions?",
+            }
+        },
+    )
+    loop = make_loop(
+        model=mock_model_no_tools("seed"),
+        checkpointer=checkpointer,
+        app_state=app_state,
+    )
+
+    resolved_payload = {
+        "request_id": request_id,
+        "thread_id": thread_id,
+        "tool_name": "AskUserQuestion",
+        "args": args,
+        "decision": "allow",
+        "message": "Answer questions?",
+        "answers": [
+            {
+                "header": "Choice",
+                "question": "Pick one.",
+                "selected_options": ["Alpha"],
+            }
+        ],
+    }
+    app_state.set_state(
+        lambda prev: prev.model_copy(
+            update={
+                "pending_permission_requests": {},
+                "resolved_permission_requests": {request_id: resolved_payload},
+            }
+        )
+    )
+
+    await loop.apersist_state(thread_id)
+    persisted = await loop._load_checkpoint_channel_values(thread_id)
+    assert persisted["pending_permission_requests"] == {}
+    assert persisted["resolved_permission_requests"] == {request_id: resolved_payload}
+
+    ctx = loop._build_tool_use_context([], thread_id=thread_id)
+    assert ctx is not None
+    assert ctx.consume_permission_resolution("AskUserQuestion", args, None, None) == {
+        "decision": "allow",
+        "message": "Answer questions?",
+    }
+    assert app_state.pending_permission_requests == {}
+    assert app_state.resolved_permission_requests == {}
+
+    await loop.apersist_state(thread_id)
+    persisted = await loop._load_checkpoint_channel_values(thread_id)
+    assert persisted["pending_permission_requests"] == {}
+    assert persisted["resolved_permission_requests"] == {}
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aupdate_state_appends_start_messages_for_resume():
+    model = mock_model_no_tools("after resume")
+    checkpointer = _MemoryCheckpointer()
+    loop = make_loop(
+        model=model,
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+    config = {"configurable": {"thread_id": "resume-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "first"}]},
+        config=config,
+    ):
+        pass
+
+    await loop.aupdate_state(
+        config,
+        {"messages": [HumanMessage(content="second")]},
+        as_node="__start__",
+    )
+
+    state = await loop.aget_state(config)
+    assert [msg.content for msg in state.values["messages"]] == ["first", "after resume", "second"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aupdate_state_applies_remove_and_insert_message_repairs():
+    checkpointer = _MemoryCheckpointer()
+    broken_ai = AIMessage(
+        content="",
+        tool_calls=[{"name": "Read", "args": {"file_path": "/tmp/a.txt"}, "id": "tc-1"}],
+    )
+    tool_reply = ToolMessage(content="old", tool_call_id="tc-1", name="Read")
+    trailing = HumanMessage(content="after tool")
+    tool_reply.id = "tool-old"
+    trailing.id = "human-after"
+    checkpointer.store["repair-thread"] = {"channel_values": {"messages": [broken_ai, tool_reply, trailing]}}
+
+    loop = make_loop(
+        model=mock_model_no_tools("unused"),
+        checkpointer=checkpointer,
+        app_state=AppState(),
+    )
+    config = {"configurable": {"thread_id": "repair-thread"}}
+
+    await loop.aupdate_state(
+        config,
+        {
+            "messages": [
+                RemoveMessage(id="tool-old"),
+                RemoveMessage(id="human-after"),
+                ToolMessage(content="repaired", tool_call_id="tc-1", name="Read"),
+                HumanMessage(content="after tool"),
+            ]
+        },
+    )
+
+    state = await loop.aget_state(config)
+    contents = [getattr(msg, "content", None) for msg in state.values["messages"]]
+    assert contents == ["", "repaired", "after tool"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_astream_none_resumes_after_state_injection():
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(
+        side_effect=[
+            AIMessage(content="first answer"),
+            AIMessage(content="resumed answer"),
+        ]
+    )
+    checkpointer = _MemoryCheckpointer()
+    loop = QueryLoop(
+        model=model,
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=[],
+        checkpointer=checkpointer,
+        registry=make_registry(),
+        app_state=AppState(),
+        runtime=None,
+        bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+        max_turns=10,
+    )
+    config = {"configurable": {"thread_id": "resume-stream-thread"}}
+
+    async for _ in loop.query(
+        {"messages": [{"role": "user", "content": "first"}]},
+        config=config,
+    ):
+        pass
+
+    await loop.aupdate_state(
+        config,
+        {"messages": [HumanMessage(content="followup")]},
+        as_node="__start__",
+    )
+
+    events = []
+    async for event in loop.astream(None, config=config):
+        events.append(event)
+
+    assert any(msg.content == "resumed answer" for event in events for msg in event.get("agent", {}).get("messages", []))
+
+
+@pytest.mark.asyncio
+async def test_query_loop_aclear_deletes_persisted_summary_for_thread():
+    db_path = Path(tempfile.mkdtemp()) / "memory.db"
+    mm = MemoryMiddleware(db_path=db_path)
+    mm.summary_store.save_summary(
+        thread_id="clear-summary-thread",
+        summary_text="STALE SUMMARY",
+        compact_up_to_index=2,
+        compacted_at=2,
+    )
+
+    loop = QueryLoop(
+        model=mock_model_no_tools("done"),
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=[mm],
+        checkpointer=None,
+        registry=make_registry(),
+        app_state=AppState(total_cost=1.25),
+        runtime=None,
+        bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model", total_cost_usd=1.25),
+        max_turns=10,
+    )
+
+    await loop.aclear("clear-summary-thread")
+
+    assert mm.summary_store.get_latest_summary("clear-summary-thread") is None
+
+
+# ---------------------------------------------------------------------------
+# Tests: with tool calls → agent chunk + tools chunk
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_tool_call_yields_agent_then_tools():
+    model = mock_model_with_tool_call()
+
+    # Register a simple echo tool
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {"type": "object", "properties": {}}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    registry = make_registry(entry)
+    loop = make_loop(model, registry=registry)
+
+    chunks = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call echo"}]}):
+        chunks.append(chunk)
+
+    # First chunk: agent (with tool_calls)
+    # Second chunk: tools (ToolMessage results)
+    # Third chunk: agent (final text response)
+    agent_chunks = [c for c in chunks if "agent" in c]
+    tools_chunks = [c for c in chunks if "tools" in c]
+
+    assert len(agent_chunks) >= 1
+    assert len(tools_chunks) >= 1
+
+    # Tool result should be a ToolMessage
+    tool_msgs = tools_chunks[0]["tools"]["messages"]
+    assert len(tool_msgs) == 1
+    assert isinstance(tool_msgs[0], ToolMessage)
+
+
+@pytest.mark.asyncio
+async def test_tool_call_result_content():
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "test-val"})
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "d", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    loop = make_loop(model, registry=make_registry(entry))
+
+    tool_results = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "x"}]}):
+        if "tools" in chunk:
+            tool_results.extend(chunk["tools"]["messages"])
+
+    assert len(tool_results) == 1
+    assert "echo: test-val" in tool_results[0].content
+
+
+def test_tool_concurrency_safety_does_not_infer_from_read_only():
+    entry = ToolEntry(
+        name="readonly_serial",
+        mode=ToolMode.INLINE,
+        schema={"name": "readonly_serial", "description": "d", "parameters": {}},
+        handler=lambda: "ok",
+        source="test",
+        is_read_only=True,
+        is_concurrency_safe=False,
+    )
+    loop = make_loop(mock_model_no_tools(), registry=make_registry(entry))
+
+    assert loop._tool_is_concurrency_safe({"name": "readonly_serial", "args": {}}) is False
+
+
+# ---------------------------------------------------------------------------
+# Tests: max_turns guard
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_max_turns_stops_loop():
+    """Agent that hits max_turns should fail loudly on the caller-facing astream surface."""
+
+    def noop_handler() -> str:
+        return "ok"
+
+    entry = ToolEntry(
+        name="noop",
+        mode=ToolMode.INLINE,
+        schema={"name": "noop", "description": "d", "parameters": {}},
+        handler=noop_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+
+    # Build a model that always returns a tool call
+    tool_call_msg = AIMessage(
+        content="",
+        tool_calls=[{"name": "noop", "args": {}, "id": "tc-1"}],
+    )
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(return_value=tool_call_msg)
+
+    loop = make_loop(model, registry=make_registry(entry), max_turns=3)
+
+    with pytest.raises(RuntimeError, match="max_turns"):
+        async for _ in loop.astream({"messages": [{"role": "user", "content": "go"}]}):
+            pass
+
+    assert model.ainvoke.call_count == 3
+
+
+# ---------------------------------------------------------------------------
+# Tests: input parsing
+# ---------------------------------------------------------------------------
+
+
+def test_parse_input_dict_messages():
+    msgs = QueryLoop._parse_input({"messages": [{"role": "user", "content": "hello"}]})
+    assert len(msgs) == 1
+    assert isinstance(msgs[0], HumanMessage)
+    assert msgs[0].content == "hello"
+
+
+def test_parse_input_langchain_messages():
+    human = HumanMessage(content="hi")
+    msgs = QueryLoop._parse_input({"messages": [human]})
+    assert msgs[0] is human
+
+
+def test_parse_input_empty():
+    assert QueryLoop._parse_input({}) == []
+    assert QueryLoop._parse_input({"messages": []}) == []
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_app_state_on_completion():
+    model = mock_model_no_tools("AppState wired")
+    app_state = AppState(compact_boundary_index=99)
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=1.25))
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "sync"}]}):
+        pass
+
+    assert app_state.turn_count == 1
+    assert app_state.total_cost == 1.25
+    assert app_state.compact_boundary_index == 0
+    assert len(app_state.messages) == 2
+    assert app_state.messages[0].content == "sync"
+    assert app_state.messages[1].content == "AppState wired"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_does_not_decrease_total_cost_when_runtime_reports_less():
+    model = mock_model_no_tools("cost stays monotonic")
+    app_state = AppState(total_cost=1.25)
+    bootstrap = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model", total_cost_usd=1.25)
+    loop = QueryLoop(
+        model=model,
+        system_prompt=SystemMessage(content="You are a test assistant."),
+        middleware=[],
+        checkpointer=None,
+        registry=make_registry(),
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+        bootstrap=bootstrap,
+        max_turns=10,
+    )
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "sync"}]}):
+        pass
+
+    assert app_state.total_cost == 1.25
+    assert bootstrap.total_cost_usd == 1.25
+
+
+@pytest.mark.asyncio
+async def test_query_loop_resets_dirty_app_state_turn_count_between_runs():
+    model = mock_model_no_tools("fresh")
+    app_state = AppState(turn_count=99, compact_boundary_index=7)
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    first = await loop.ainvoke({"messages": [{"role": "user", "content": "hi"}]})
+    second = await loop.ainvoke({"messages": [{"role": "user", "content": "again"}]})
+
+    assert first["reason"] == "completed"
+    assert second["reason"] == "completed"
+    assert app_state.turn_count == 1
+    assert app_state.compact_boundary_index == 0
+    assert len(app_state.messages) == 2
+
+
+@pytest.mark.asyncio
+async def test_query_loop_refreshes_tools_between_tool_turns():
+    events: list[str] = []
+
+    async def refresh_tools() -> None:
+        events.append("refresh")
+
+    def echo_handler(message: str) -> str:
+        events.append("tool")
+        return f"echo: {message}"
+
+    tool_call_msg = AIMessage(
+        content="",
+        tool_calls=[{"name": "echo", "args": {"message": "hi"}, "id": "tc-1"}],
+    )
+    final_msg = AIMessage(content="done")
+    model = MagicMock()
+    model.bind_tools.return_value = model
+
+    async def ainvoke_side_effect(*args, **kwargs):
+        if not events:
+            events.append("model-1")
+            return tool_call_msg
+        assert events == ["model-1", "tool", "refresh"]
+        events.append("model-2")
+        return final_msg
+
+    model.ainvoke = AsyncMock(side_effect=ainvoke_side_effect)
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {"type": "object", "properties": {}}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(model, registry=make_registry(entry))
+    loop._refresh_tools = refresh_tools
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "call echo"}]}):
+        pass
+
+    assert events == ["model-1", "tool", "refresh", "model-2"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_snapshots_reused_live_chunks_before_final_aggregation():
+    class ReusedChunkModel:
+        def bind_tools(self, tools):
+            return self
+
+        async def astream(self, messages):
+            chunk = AIMessageChunk(
+                content="",
+                response_metadata={"model_provider": "openai"},
+                id="shared-chunk",
+                tool_calls=[],
+                invalid_tool_calls=[],
+                tool_call_chunks=[],
+            )
+            yield chunk
+            chunk.content = "HEL"
+            yield chunk
+            chunk.content = "LO"
+            yield chunk
+            chunk.content = ""
+            chunk.usage_metadata = {"input_tokens": 10, "output_tokens": 2, "total_tokens": 12}
+            yield chunk
+            chunk.chunk_position = "last"
+            yield chunk
+
+    loop = make_loop(ReusedChunkModel())
+
+    agent_messages = []
+    async for event in loop.query({"messages": [{"role": "user", "content": "hi"}]}):
+        if "agent" in event:
+            agent_messages.extend(event["agent"]["messages"])
+
+    assert len(agent_messages) == 1
+    assert agent_messages[0].content == "HELLO"
+    assert agent_messages[0].usage_metadata == {
+        "input_tokens": 10,
+        "output_tokens": 2,
+        "total_tokens": 12,
+    }
+
+
+class _CaptureToolContextMiddleware:
+    def __init__(self):
+        self.messages = None
+        self.boundary = None
+
+    async def awrap_tool_call(self, request, handler):
+        self.messages = list(request.state.messages)
+        self.boundary = request.state.get_app_state().compact_boundary_index
+        return await handler(request)
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_tool_context_messages_to_query_time_array():
+    capture = _CaptureToolContextMiddleware()
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[capture],
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "call echo"}]}):
+        pass
+
+    assert capture.messages is not None
+    assert len(capture.messages) == 1
+    assert capture.messages[0].content == "call echo"
+
+
+class _SummaryBoundaryMiddleware:
+    def __init__(self, boundary_index: int):
+        self.boundary_index = boundary_index
+        self.compact_boundary_index = boundary_index
+
+    async def awrap_model_call(self, request, handler):
+        rewritten = [SystemMessage(content="summary")] + list(request.messages[self.boundary_index :])
+        return await handler(request.override(messages=rewritten))
+
+
+class _ReactiveCompactMiddleware:
+    compact_boundary_index = 2
+
+    async def compact_messages_for_recovery(self, messages):
+        return [SystemMessage(content="[Conversation Summary]\nSUMMARY")] + list(messages[-1:])
+
+
+class _CollapseDrainMiddleware:
+    def __init__(self):
+        self.calls = 0
+
+    async def recover_from_overflow(self, messages):
+        self.calls += 1
+        return {
+            "committed": 1,
+            "messages": [SystemMessage(content="[Collapsed Context]\nDRAINED")] + list(messages[-1:]),
+        }
+
+
+class _EscalationModel:
+    def __init__(self):
+        self.max_tokens_values = []
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            raise RuntimeError("max_output_tokens")
+        return AIMessage(content="after escalate")
+
+
+class _EscalationThenRecoveryModel:
+    def __init__(self):
+        self.max_tokens_values = []
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls in (1, 2):
+            raise RuntimeError("max_output_tokens")
+        return AIMessage(content="after recovery")
+
+
+class _ContextOverflowModel:
+    def __init__(self):
+        self.calls = 0
+        self.max_tokens_values = []
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            raise RuntimeError("input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000")
+        return AIMessage(content="after parsed overflow")
+
+
+class _TransientAPIError(Exception):
+    def __init__(self, status: int, message: str, headers: dict[str, str] | None = None):
+        super().__init__(message)
+        self.status = status
+        self.headers = headers or {}
+
+
+class _RetryOnceModel:
+    def __init__(self, status: int, headers: dict[str, str] | None = None):
+        self.calls = 0
+        self.status = status
+        self.headers = headers or {}
+
+    def bind_tools(self, tools):
+        return self
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            raise _TransientAPIError(self.status, f"transient {self.status}", self.headers)
+        return AIMessage(content=f"after retry {self.status}")
+
+
+class _EmptyStreamModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        if False:
+            yield AIMessageChunk(content="")
+
+
+class _TruncatedResponseModel:
+    def __init__(self, responses):
+        self.responses = list(responses)
+        self.calls = 0
+        self.max_tokens_values = []
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        self.max_tokens_values.append(kwargs.get("max_tokens"))
+        return self
+
+    async def ainvoke(self, messages):
+        response = self.responses[self.calls]
+        self.calls += 1
+        return response
+
+
+class _QueryOkWithFailingCompactorModel:
+    def __init__(self):
+        self.query_calls = 0
+        self.compact_calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    def bind(self, **kwargs):
+        return self
+
+    async def ainvoke(self, messages):
+        system_text = ""
+        if messages and messages[0].__class__.__name__ == "SystemMessage":
+            system_text = getattr(messages[0], "content", "") or ""
+        if "tasked with summarizing conversations" in system_text or "split turn" in system_text.lower():
+            self.compact_calls += 1
+            raise RuntimeError("compaction failed")
+        self.query_calls += 1
+        return AIMessage(content="OK")
+
+
+class _StreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(content="thinking")
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "echo", "args": '{"message":"hi"}', "id": "tc-1", "index": 0}],
+            )
+            await asyncio.sleep(0.05)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _SplitArgsStreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "Read", "args": "", "id": "tc-read", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": None, "args": '{"file_path":"/tmp/a.txt"}', "id": "tc-read", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _SplitStringValueStreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "Read", "args": '{"file_path":"/', "id": "tc-read", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": None, "args": 'tmp/a.txt"}', "id": "tc-read", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _SplitAnyOfStreamingToolModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "read_messages", "args": "", "id": "tc-chat-read", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": None, "args": '{"chat_id":"chat-1"}', "id": "tc-chat-read", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _TwoToolStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "unsafe", "args": '{"message":"u"}', "id": "tc-unsafe", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+            )
+            await asyncio.sleep(0.05)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _FailingStreamingToolModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        yield AIMessageChunk(
+            content="",
+            tool_call_chunks=[{"name": "echo", "args": '{"message":"boom"}', "id": "tc-1", "index": 0}],
+        )
+        await asyncio.sleep(0.005)
+        raise RuntimeError("stream exploded")
+
+
+class _FailingQueuedStreamingToolModel:
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        yield AIMessageChunk(
+            content="",
+            tool_call_chunks=[{"name": "unsafe", "args": '{"message":"u"}', "id": "tc-unsafe", "index": 0}],
+        )
+        yield AIMessageChunk(
+            content="",
+            tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+        )
+        await asyncio.sleep(0.005)
+        raise RuntimeError("stream exploded")
+
+
+class _ToolThenFinalStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "echo", "args": '{"message":"boom"}', "id": "tc-1", "index": 0}],
+            )
+            await asyncio.sleep(0.01)
+            yield AIMessageChunk(content="tool turn")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _UnsafeThenSafeGapStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "unsafe", "args": '{"message":"u"}', "id": "tc-unsafe", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+            )
+            await asyncio.sleep(0.08)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _BashAndSafeStreamingModel:
+    def __init__(self):
+        self.calls = 0
+
+    def bind_tools(self, tools):
+        return self
+
+    async def astream(self, messages):
+        self.calls += 1
+        if self.calls == 1:
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "bash", "args": '{"command":"boom"}', "id": "tc-bash", "index": 0}],
+            )
+            yield AIMessageChunk(
+                content="",
+                tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+            )
+            await asyncio.sleep(0.05)
+            yield AIMessageChunk(content="done")
+            return
+        yield AIMessageChunk(content="final answer")
+
+
+class _ExplodingToolMiddleware:
+    async def awrap_tool_call(self, request, handler):
+        raise RuntimeError("middleware boom")
+
+
+@pytest.mark.asyncio
+async def test_query_loop_does_not_double_apply_compact_boundary_before_memory_middleware():
+    capture = _CaptureToolContextMiddleware()
+    memory = _SummaryBoundaryMiddleware(boundary_index=3)
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+    history = [
+        HumanMessage(content="h0"),
+        AIMessage(content="a1"),
+        HumanMessage(content="h2"),
+        HumanMessage(content="call echo"),
+    ]
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[memory, capture],
+        app_state=AppState(compact_boundary_index=3),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    assert capture.messages is not None
+    assert len(capture.messages) == 2
+    assert capture.messages[0].content == "summary"
+    assert capture.messages[1].content == "call echo"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_compact_boundary_index_from_memory_middleware():
+    memory = _SummaryBoundaryMiddleware(boundary_index=3)
+    model = mock_model_no_tools("done")
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[memory],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": [{"role": "user", "content": "hello"}]}):
+        pass
+
+    assert app_state.compact_boundary_index == 3
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_tool_context_after_real_memory_compaction():
+    capture = _CaptureToolContextMiddleware()
+    memory, _summary_model = _make_summary_memory_middleware()
+
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="call echo"),
+    ]
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[memory, capture],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    assert capture.messages is not None
+    assert isinstance(capture.messages[0], SystemMessage)
+    assert "Conversation Summary" in capture.messages[0].content
+    assert capture.messages[-1].content == "call echo"
+    assert app_state.compact_boundary_index > 0
+
+
+@pytest.mark.asyncio
+async def test_query_loop_syncs_compact_boundary_before_tool_execution():
+    capture = _CaptureToolContextMiddleware()
+    memory, _summary_model = _make_summary_memory_middleware()
+
+    model = mock_model_with_tool_call(tool_name="echo", args={"message": "ctx"}, then_text="done")
+
+    def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="call echo"),
+    ]
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[memory, capture],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    assert capture.messages is not None
+    assert capture.boundary == app_state.compact_boundary_index
+    assert capture.boundary > 0
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_compaction_notice_when_boundary_advances():
+    memory, _summary_model = _make_summary_memory_middleware()
+
+    app_state = AppState()
+    loop = make_loop(
+        mock_model_no_tools("after compact"),
+        middleware=[memory],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    compact_notices = [
+        msg
+        for msg in app_state.messages
+        if msg.__class__.__name__ == "HumanMessage" and ((getattr(msg, "metadata", None) or {}).get("notification_type") == "compact")
+    ]
+
+    assert len(compact_notices) == 1
+    assert "Conversation compacted" in compact_notices[0].content
+    assert compact_notices[0].metadata["source"] == "system"
+    assert compact_notices[0].metadata["compact_boundary_index"] == app_state.compact_boundary_index
+    assert app_state.compact_boundary_index > 0
+
+
+@pytest.mark.asyncio
+async def test_memory_middleware_emits_runtime_compaction_notice():
+    memory, _summary_model = _make_summary_memory_middleware()
+    runtime = SimpleNamespace(cost=0.0, events=[], set_flag=lambda *_args, **_kwargs: None)
+    runtime.emit_activity_event = lambda event: runtime.events.append(event)
+    memory.set_runtime(runtime)
+
+    loop = make_loop(
+        mock_model_no_tools("after compact"),
+        middleware=[memory],
+        app_state=AppState(),
+        runtime=runtime,
+    )
+
+    history = [
+        HumanMessage(content="A" * 80),
+        AIMessage(content="B" * 80),
+        HumanMessage(content="C" * 80),
+        HumanMessage(content="hello after compact"),
+    ]
+
+    async for _ in loop.query({"messages": history}):
+        pass
+
+    compact_events = [event for event in runtime.events if event.get("event") == "notice"]
+
+    assert len(compact_events) == 1
+    payload = json.loads(compact_events[0]["data"])
+    assert payload["notification_type"] == "compact"
+    assert "Conversation compacted" in payload["content"]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_recovers_from_max_output_tokens_with_explicit_continuation():
+    model = _EscalationThenRecoveryModel()
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_recovery"
+    assert model.calls == 3
+    assert model.max_tokens_values == [64000, 64000]
+    assert any(
+        getattr(msg, "content", "") == "Output token limit hit. Resume directly with no apology or recap." for msg in app_state.messages
+    )
+
+
+@pytest.mark.asyncio
+async def test_query_loop_escalates_max_output_tokens_before_continuation_recovery():
+    model = _EscalationModel()
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_escalate"
+    assert model.max_tokens_values == [64000]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_parses_context_overflow_error_into_targeted_max_tokens_override():
+    model = _ContextOverflowModel()
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["messages"][-1].content == "after parsed overflow"
+    assert model.max_tokens_values == [10941]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_once_after_529_capacity_error():
+    model = _RetryOnceModel(529)
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["messages"][-1].content == "after retry 529"
+    assert model.calls == 2
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_once_after_429_rate_limit_error():
+    model = _RetryOnceModel(429, headers={"retry-after": "0"})
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["messages"][-1].content == "after retry 429"
+    assert model.calls == 2
+
+
+@pytest.mark.asyncio
+async def test_query_loop_astream_raises_loudly_on_empty_stream():
+    loop = make_loop(_EmptyStreamModel(), app_state=AppState(), runtime=SimpleNamespace(cost=0.0))
+
+    with pytest.raises(RuntimeError, match="streaming model returned no AIMessageChunk"):
+        async for _ in loop.astream({"messages": [{"role": "user", "content": "hi"}]}, stream_mode=["messages", "updates"]):
+            pass
+
+
+@pytest.mark.asyncio
+async def test_query_loop_detects_truncated_response_and_escalates_without_yielding_partial():
+    model = _TruncatedResponseModel(
+        [
+            AIMessage(content="partial", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="after escalate"),
+        ]
+    )
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_escalate"
+    assert [msg.content for msg in result["messages"]] == ["after escalate"]
+    assert model.max_tokens_values == [64000]
+
+
+@pytest.mark.asyncio
+async def test_query_loop_recovers_from_truncated_response_with_withheld_message_pattern():
+    model = _TruncatedResponseModel(
+        [
+            AIMessage(content="partial-1", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-2", response_metadata={"stop_reason": "max_tokens"}),
+            AIMessage(content="after recovery"),
+        ]
+    )
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "max_output_tokens_recovery"
+    assert any(getattr(msg, "content", "") == "partial-2" for msg in app_state.messages)
+    assert any(
+        getattr(msg, "content", "") == "Output token limit hit. Resume directly with no apology or recap." for msg in app_state.messages
+    )
+
+
+@pytest.mark.asyncio
+async def test_query_loop_surfaces_withheld_truncated_message_after_recovery_exhausts():
+    model = _TruncatedResponseModel(
+        [
+            AIMessage(content="partial-1", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-2", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-3", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-4", response_metadata={"finish_reason": "length"}),
+            AIMessage(content="partial-5", response_metadata={"finish_reason": "length"}),
+        ]
+    )
+    app_state = AppState()
+    loop = make_loop(model, app_state=app_state, runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "model_error"
+    assert result["messages"][-1].content == "partial-5"
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_prompt_too_long_via_reactive_compact():
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        AIMessage(content="after compact"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[_ReactiveCompactMiddleware()],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "reactive_compact_retry"
+    assert model.ainvoke.call_count == 2
+    assert isinstance(app_state.messages[0], SystemMessage)
+    assert "Conversation Summary" in app_state.messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_handle_model_error_recovery_returns_typed_result_object():
+    loop = make_loop(mock_model_no_tools(), app_state=AppState(), runtime=SimpleNamespace(cost=0.0))
+
+    result = await loop._handle_model_error_recovery(
+        exc=RuntimeError("max_output_tokens exceeded"),
+        thread_id="thread-a",
+        messages=[HumanMessage(content="start")],
+        turn=1,
+        transition=None,
+        max_output_tokens_recovery_count=0,
+        has_attempted_reactive_compact=False,
+        max_output_tokens_override=None,
+        transient_api_retry_count=0,
+    )
+
+    assert result is not None
+    assert not isinstance(result, dict)
+    assert result.transition.reason.value == "max_output_tokens_escalate"
+    assert result.max_output_tokens_override == 64000
+
+
+@pytest.mark.asyncio
+async def test_query_loop_retries_prompt_too_long_via_collapse_drain_before_compact():
+    collapse = _CollapseDrainMiddleware()
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        AIMessage(content="after drain"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[collapse],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "collapse_drain_retry"
+    assert collapse.calls == 1
+    assert model.ainvoke.call_count == 2
+    assert isinstance(app_state.messages[0], SystemMessage)
+    assert "Collapsed Context" in app_state.messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_query_loop_collapse_drain_is_single_shot_before_reactive_compact():
+    collapse = _CollapseDrainMiddleware()
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        RuntimeError("prompt is too long"),
+        AIMessage(content="after compact"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[collapse, _ReactiveCompactMiddleware()],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "completed"
+    assert result["transition"].reason.value == "reactive_compact_retry"
+    assert collapse.calls == 1
+    assert model.ainvoke.call_count == 3
+    assert isinstance(app_state.messages[0], SystemMessage)
+    assert "Conversation Summary" in app_state.messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_query_loop_persists_prompt_too_long_notice_after_recovery_exhausts():
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        RuntimeError("prompt is too long"),
+    )
+    app_state = AppState()
+    loop = make_loop(
+        model,
+        middleware=[_ReactiveCompactMiddleware()],
+        app_state=app_state,
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "start"}]})
+
+    assert result["reason"] == "prompt_too_long"
+    notices = [
+        msg
+        for msg in app_state.messages
+        if msg.__class__.__name__ == "HumanMessage" and ((getattr(msg, "metadata", None) or {}).get("source") == "system")
+    ]
+    assert notices
+    assert notices[-1].content == "Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one."
+
+
+@pytest.mark.asyncio
+async def test_query_loop_astream_raises_prompt_too_long_notice_text_after_recovery_exhausts():
+    model = _make_prompt_too_long_model(
+        RuntimeError("prompt is too long"),
+        RuntimeError("prompt is too long"),
+    )
+    loop = make_loop(
+        model,
+        middleware=[_ReactiveCompactMiddleware()],
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    with pytest.raises(
+        RuntimeError,
+        match="Prompt is too long. Automatic recovery exhausted. Clear the thread or start a new one.",
+    ):
+        async for _ in loop.astream({"messages": [{"role": "user", "content": "start"}]}, stream_mode=["updates"]):
+            pass
+
+
+@pytest.mark.asyncio
+async def test_query_loop_opens_and_clears_thread_scoped_compaction_breaker(tmp_path):
+    thread_id = "compact-breaker-thread"
+    checkpointer = _MemoryCheckpointer()
+    model = _QueryOkWithFailingCompactorModel()
+
+    def make_breaker_loop():
+        memory = MemoryMiddleware(
+            context_limit=10000,
+            compaction_threshold=0.5,
+            db_path=tmp_path / "compact-breaker.db",
+            compaction_config=SimpleNamespace(reserve_tokens=0, keep_recent_tokens=10),
+        )
+        memory.set_model(model)
+        return QueryLoop(
+            model=model,
+            system_prompt=SystemMessage(content="You are a test assistant."),
+            middleware=[memory],
+            checkpointer=checkpointer,
+            registry=make_registry(),
+            app_state=AppState(),
+            runtime=SimpleNamespace(cost=0.0),
+            bootstrap=BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model"),
+            max_turns=10,
+        )
+
+    loop = make_breaker_loop()
+    config = {"configurable": {"thread_id": thread_id}}
+
+    for attempt in range(1, 4):
+        result = await loop.ainvoke(
+            {
+                "messages": [
+                    {"role": "user", "content": "A" * 8000},
+                    {"role": "assistant", "content": "B" * 8000},
+                    {"role": "user", "content": f"start {attempt} " + ("C" * 8000)},
+                ]
+            },
+            config=config,
+        )
+        assert result["reason"] == "completed"
+        assert model.compact_calls == attempt
+
+    state = await loop.aget_state(config)
+    breaker_notices = [
+        msg
+        for msg in state.values["messages"]
+        if msg.__class__.__name__ == "HumanMessage"
+        and ((getattr(msg, "metadata", None) or {}).get("notification_type") == "compact_breaker")
+    ]
+    assert len(breaker_notices) == 1
+    assert "Automatic compaction disabled for this thread after repeated failures." in breaker_notices[0].content
+
+    reloaded = make_breaker_loop()
+    result = await reloaded.ainvoke(
+        {
+            "messages": [
+                {"role": "user", "content": "A" * 8000},
+                {"role": "assistant", "content": "B" * 8000},
+                {"role": "user", "content": "after breaker " + ("C" * 8000)},
+            ]
+        },
+        config=config,
+    )
+    assert result["reason"] == "completed"
+    assert model.compact_calls == 3
+
+    await reloaded.aclear(thread_id)
+
+    post_clear = make_breaker_loop()
+    result = await post_clear.ainvoke(
+        {
+            "messages": [
+                {"role": "user", "content": "A" * 8000},
+                {"role": "assistant", "content": "B" * 8000},
+                {"role": "user", "content": "after clear " + ("C" * 8000)},
+            ]
+        },
+        config=config,
+    )
+    assert result["reason"] == "completed"
+    assert model.compact_calls == 4
+
+
+@pytest.mark.asyncio
+async def test_query_loop_can_emit_tool_results_before_final_agent_message():
+    model = _StreamingToolModel()
+
+    async def echo_handler(message: str) -> str:
+        await asyncio.sleep(0.01)
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    event_order: list[str] = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call echo"}]}):
+        if "tools" in chunk:
+            event_order.append("tools")
+        if "agent" in chunk:
+            event_order.append("agent")
+
+    assert "tools" in event_order
+    assert "agent" in event_order
+    assert event_order.index("tools") < event_order.index("agent")
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_blocks_safe_tool_behind_running_unsafe_tool():
+    model = _TwoToolStreamingModel()
+    starts: list[str] = []
+
+    async def unsafe_handler(message: str) -> str:
+        starts.append(f"start-unsafe-{message}")
+        await asyncio.sleep(0.03)
+        starts.append(f"end-unsafe-{message}")
+        return f"unsafe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    unsafe_entry = ToolEntry(
+        name="unsafe",
+        mode=ToolMode.INLINE,
+        schema={"name": "unsafe", "description": "unsafe", "parameters": {}},
+        handler=unsafe_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(unsafe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "call both"}]}):
+        pass
+
+    assert starts == [
+        "start-unsafe-u",
+        "end-unsafe-u",
+        "start-safe-s",
+        "end-safe-s",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_discards_running_tasks_on_stream_failure():
+    model = _FailingStreamingToolModel()
+    events: list[str] = []
+
+    async def echo_handler(message: str) -> str:
+        events.append(f"start-{message}")
+        try:
+            await asyncio.sleep(0.05)
+        except asyncio.CancelledError:
+            events.append(f"cancel-{message}")
+            raise
+        events.append(f"finish-{message}")
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call echo"}]})
+    await asyncio.sleep(0.06)
+
+    assert result["reason"] == "model_error"
+    assert "start-boom" in events
+    assert "cancel-boom" in events
+    assert "finish-boom" not in events
+    assert any("streaming discarded: streaming_error" in msg.content for msg in result["messages"])
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_discards_queued_tools_without_starting_them():
+    model = _FailingQueuedStreamingToolModel()
+    events: list[str] = []
+
+    async def unsafe_handler(message: str) -> str:
+        events.append(f"start-unsafe-{message}")
+        try:
+            await asyncio.sleep(0.05)
+        except asyncio.CancelledError:
+            events.append(f"cancel-unsafe-{message}")
+            raise
+        events.append(f"finish-unsafe-{message}")
+        return f"unsafe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        events.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        events.append(f"finish-safe-{message}")
+        return f"safe: {message}"
+
+    unsafe_entry = ToolEntry(
+        name="unsafe",
+        mode=ToolMode.INLINE,
+        schema={"name": "unsafe", "description": "unsafe", "parameters": {}},
+        handler=unsafe_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(unsafe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call both"}]})
+    await asyncio.sleep(0.06)
+
+    assert result["reason"] == "model_error"
+    assert "start-unsafe-u" in events
+    assert "cancel-unsafe-u" in events
+    assert "finish-unsafe-u" not in events
+    assert "start-safe-s" not in events
+    tool_errors = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert {msg.tool_call_id for msg in tool_errors} == {"tc-unsafe", "tc-safe"}
+    assert all("streaming discarded: streaming_error" in msg.content for msg in tool_errors)
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_uses_per_call_concurrency_safety():
+    class _DynamicConcurrencyStreamingModel:
+        def __init__(self):
+            self.calls = 0
+
+        def bind_tools(self, tools):
+            return self
+
+        async def astream(self, messages):
+            self.calls += 1
+            if self.calls == 1:
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "maybe_parallel", "args": '{"message":"u","parallel":false}', "id": "tc-maybe", "index": 0}],
+                )
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+                )
+                await asyncio.sleep(0.05)
+                yield AIMessageChunk(content="done")
+                return
+            yield AIMessageChunk(content="final answer")
+
+    model = _DynamicConcurrencyStreamingModel()
+    starts: list[str] = []
+
+    async def maybe_parallel_handler(message: str, parallel: bool) -> str:
+        starts.append(f"start-maybe-{message}")
+        await asyncio.sleep(0.02)
+        starts.append(f"end-maybe-{message}")
+        return f"maybe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    maybe_entry = ToolEntry(
+        name="maybe_parallel",
+        mode=ToolMode.INLINE,
+        schema={"name": "maybe_parallel", "description": "maybe", "parameters": {}},
+        handler=maybe_parallel_handler,
+        source="test",
+        is_concurrency_safe=lambda parsed: bool(parsed.get("parallel")),
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(maybe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "call both"}]}):
+        pass
+
+    assert starts == [
+        "start-maybe-u",
+        "end-maybe-u",
+        "start-safe-s",
+        "end-safe-s",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_missing_tool_completes_without_blocking_next_safe_tool():
+    class _MissingThenSafeStreamingModel:
+        def __init__(self):
+            self.calls = 0
+
+        def bind_tools(self, tools):
+            return self
+
+        async def astream(self, messages):
+            self.calls += 1
+            if self.calls == 1:
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "missing_tool", "args": "{}", "id": "tc-missing", "index": 0}],
+                )
+                yield AIMessageChunk(
+                    content="",
+                    tool_call_chunks=[{"name": "safe", "args": '{"message":"s"}', "id": "tc-safe", "index": 1}],
+                )
+                await asyncio.sleep(0.02)
+                yield AIMessageChunk(content="done")
+                return
+            yield AIMessageChunk(content="final answer")
+
+    model = _MissingThenSafeStreamingModel()
+    starts: list[str] = []
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    pre_agent_tool_ids = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call missing then safe"}]}):
+        if "tools" in chunk:
+            pre_agent_tool_ids.extend(msg.tool_call_id for msg in chunk["tools"]["messages"])
+        if "agent" in chunk:
+            break
+
+    assert pre_agent_tool_ids == ["tc-missing", "tc-safe"]
+    assert starts == ["start-safe-s", "end-safe-s"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_missing_tool_is_immediately_completed():
+    async def safe_handler(message: str) -> str:
+        return f"safe:{message}"
+
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        mock_model_no_tools(),
+        registry=make_registry(safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+    executor = _StreamingToolExecutor(loop=loop, tool_context=None)
+
+    await executor.add_tool({"name": "missing_tool", "args": {}, "id": "tc-missing"})
+    await executor.add_tool({"name": "safe", "args": {"message": "s"}, "id": "tc-safe"})
+
+    assert [(tracked.tool_call.get("id"), tracked.status) for tracked in executor._tracked] == [
+        ("tc-missing", "completed"),
+        ("tc-safe", "executing"),
+    ]
+    assert executor._tracked[0].result is not None
+    assert "Tool 'missing_tool' not found" in executor._tracked[0].result.content
+
+
+@pytest.mark.asyncio
+async def test_execute_tools_preserves_order_blocking_for_safe_after_unsafe():
+    model = MagicMock()
+    model.bind_tools.return_value = model
+    model.ainvoke = AsyncMock(
+        side_effect=[
+            AIMessage(
+                content="",
+                tool_calls=[
+                    {"name": "safe_a", "args": {"message": "a"}, "id": "tc-safe-a"},
+                    {"name": "unsafe_b", "args": {"message": "b"}, "id": "tc-unsafe-b"},
+                    {"name": "safe_c", "args": {"message": "c"}, "id": "tc-safe-c"},
+                ],
+            ),
+            AIMessage(content="done"),
+        ]
+    )
+    starts: list[str] = []
+
+    async def safe_a_handler(message: str) -> str:
+        starts.append(f"start-safe-a-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-a-{message}")
+        return f"safe-a: {message}"
+
+    async def unsafe_b_handler(message: str) -> str:
+        starts.append(f"start-unsafe-b-{message}")
+        await asyncio.sleep(0.02)
+        starts.append(f"end-unsafe-b-{message}")
+        return f"unsafe-b: {message}"
+
+    async def safe_c_handler(message: str) -> str:
+        starts.append(f"start-safe-c-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-c-{message}")
+        return f"safe-c: {message}"
+
+    loop = make_loop(
+        model,
+        registry=make_registry(
+            ToolEntry(
+                name="safe_a",
+                mode=ToolMode.INLINE,
+                schema={"name": "safe_a", "description": "safe_a", "parameters": {}},
+                handler=safe_a_handler,
+                source="test",
+                is_concurrency_safe=True,
+            ),
+            ToolEntry(
+                name="unsafe_b",
+                mode=ToolMode.INLINE,
+                schema={"name": "unsafe_b", "description": "unsafe_b", "parameters": {}},
+                handler=unsafe_b_handler,
+                source="test",
+                is_concurrency_safe=False,
+            ),
+            ToolEntry(
+                name="safe_c",
+                mode=ToolMode.INLINE,
+                schema={"name": "safe_c", "description": "safe_c", "parameters": {}},
+                handler=safe_c_handler,
+                source="test",
+                is_concurrency_safe=True,
+            ),
+        ),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    async for _ in loop.astream({"messages": [{"role": "user", "content": "call ordered tools"}]}):
+        pass
+
+    assert starts == [
+        "start-safe-a-a",
+        "end-safe-a-a",
+        "start-unsafe-b-b",
+        "end-unsafe-b-b",
+        "start-safe-c-c",
+        "end-safe-c-c",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_surfaces_middleware_exception_as_tool_error():
+    model = _ToolThenFinalStreamingModel()
+
+    async def echo_handler(message: str) -> str:
+        return f"echo: {message}"
+
+    entry = ToolEntry(
+        name="echo",
+        mode=ToolMode.INLINE,
+        schema={"name": "echo", "description": "echo", "parameters": {}},
+        handler=echo_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        middleware=[_ExplodingToolMiddleware()],
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call echo"}]})
+
+    assert result["reason"] == "completed"
+    assert any(
+        isinstance(msg, ToolMessage) and msg.tool_call_id == "tc-1" and "middleware boom" in msg.content for msg in result["messages"]
+    )
+    assert any(isinstance(msg, AIMessage) and msg.content == "final answer" for msg in result["messages"])
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_restarts_queue_after_unsafe_completion_before_final_chunk():
+    model = _UnsafeThenSafeGapStreamingModel()
+    starts: list[str] = []
+
+    async def unsafe_handler(message: str) -> str:
+        starts.append(f"start-unsafe-{message}")
+        await asyncio.sleep(0.01)
+        starts.append(f"end-unsafe-{message}")
+        return f"unsafe: {message}"
+
+    async def safe_handler(message: str) -> str:
+        starts.append(f"start-safe-{message}")
+        await asyncio.sleep(0.001)
+        starts.append(f"end-safe-{message}")
+        return f"safe: {message}"
+
+    unsafe_entry = ToolEntry(
+        name="unsafe",
+        mode=ToolMode.INLINE,
+        schema={"name": "unsafe", "description": "unsafe", "parameters": {}},
+        handler=unsafe_handler,
+        source="test",
+        is_concurrency_safe=False,
+    )
+    safe_entry = ToolEntry(
+        name="safe",
+        mode=ToolMode.INLINE,
+        schema={"name": "safe", "description": "safe", "parameters": {}},
+        handler=safe_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(unsafe_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    chunks = []
+    async for chunk in loop.astream({"messages": [{"role": "user", "content": "call both"}]}):
+        chunks.append(chunk)
+
+    first_agent_index = next(i for i, chunk in enumerate(chunks) if "agent" in chunk)
+    pre_agent_tool_ids = [msg.tool_call_id for chunk in chunks[:first_agent_index] for msg in chunk.get("tools", {}).get("messages", [])]
+
+    assert starts == [
+        "start-unsafe-u",
+        "end-unsafe-u",
+        "start-safe-s",
+        "end-safe-s",
+    ]
+    assert pre_agent_tool_ids == ["tc-unsafe", "tc-safe"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_executor_bash_error_cancels_siblings_without_killing_parent():
+    model = _BashAndSafeStreamingModel()
+    events: list[str] = []
+
+    async def bash_handler(command: str) -> str:
+        events.append(f"start-bash-{command}")
+        await asyncio.sleep(0.005)
+        raise RuntimeError("bash exploded")
+
+    async def safe_handler(message: str) -> str:
+        events.append(f"start-safe-{message}")
+        try:
+            await asyncio.sleep(0.05)
+        except asyncio.CancelledError:
+            events.append(f"cancel-safe-{message}")
+            raise
+        events.append(f"finish-safe-{message}")
+        return f"safe: {message}"
+
+    bash_entry = make_inline_tool("bash", bash_handler)
+    safe_entry = make_inline_tool("safe", safe_handler)
+    loop = make_loop(
+        model,
+        registry=make_registry(bash_entry, safe_entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call bash and safe"}]})
+
+    assert result["reason"] == "completed"
+    assert "start-bash-boom" in events
+    assert "start-safe-s" in events
+    assert "cancel-safe-s" in events
+    assert "finish-safe-s" not in events
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert {msg.tool_call_id for msg in tool_messages} == {"tc-bash", "tc-safe"}
+    assert any(msg.tool_call_id == "tc-bash" and "bash exploded" in msg.content for msg in tool_messages)
+    assert any(msg.tool_call_id == "tc-safe" and "sibling" in msg.content for msg in tool_messages)
+
+
+@pytest.mark.asyncio
+async def test_query_loop_messages_updates_mode_forwards_live_stream_chunks():
+    model = _StreamingToolModel()
+
+    async def echo_handler(message: str) -> str:
+        await asyncio.sleep(0.01)
+        return f"echo: {message}"
+
+    entry = make_inline_tool("echo", echo_handler)
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    events = []
+    async for chunk in loop.astream(
+        {"messages": [{"role": "user", "content": "call echo"}]},
+        stream_mode=["messages", "updates"],
+    ):
+        events.append(chunk)
+
+    message_events = [data for mode, data in events if mode == "messages"]
+    texts = [msg.content for msg, _ in message_events if getattr(msg, "content", "")]
+    tool_update_index = next(i for i, item in enumerate(events) if item[0] == "updates" and "tools" in item[1])
+    thinking_index = next(i for i, item in enumerate(events) if item[0] == "messages" and item[1][0].content == "thinking")
+    tool_chunk_index = next(
+        i
+        for i, item in enumerate(events)
+        if item[0] == "messages" and getattr(item[1][0], "tool_call_chunks", None) and item[1][0].tool_call_chunks[0]["id"] == "tc-1"
+    )
+
+    assert thinking_index < tool_update_index
+    assert tool_chunk_index < tool_update_index
+    assert any(msg.content == "thinking" for msg, _ in message_events)
+    assert any(getattr(msg, "tool_call_chunks", None) and msg.tool_call_chunks[0]["id"] == "tc-1" for msg, _ in message_events)
+    assert texts == ["thinking", "done", "final answer"]
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_waits_for_split_tool_call_args_before_execution():
+    model = _SplitArgsStreamingToolModel()
+    seen_args = []
+
+    def read_handler(file_path: str) -> str:
+        seen_args.append(file_path)
+        return f"read:{file_path}"
+
+    entry = ToolEntry(
+        name="Read",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "Read",
+            "description": "read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {"file_path": {"type": "string"}},
+            },
+        },
+        handler=read_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call read"}]})
+
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert seen_args == ["/tmp/a.txt"]
+    assert any(msg.tool_call_id == "tc-read" and msg.content == "read:/tmp/a.txt" for msg in tool_messages)
+    assert not any("InputValidationError" in msg.content for msg in tool_messages)
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_waits_for_split_string_value_before_execution():
+    model = _SplitStringValueStreamingToolModel()
+    seen_args = []
+
+    def read_handler(file_path: str) -> str:
+        seen_args.append(file_path)
+        return f"read:{file_path}"
+
+    entry = ToolEntry(
+        name="Read",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "Read",
+            "description": "read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {"file_path": {"type": "string"}},
+            },
+        },
+        handler=read_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "call read"}]})
+
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert seen_args == ["/tmp/a.txt"]
+    assert any(msg.tool_call_id == "tc-read" and msg.content == "read:/tmp/a.txt" for msg in tool_messages)
+    assert not any("InputValidationError" in msg.content for msg in tool_messages)
+
+
+@pytest.mark.asyncio
+async def test_streaming_overlap_waits_for_anyof_tool_args_before_execution():
+    model = _SplitAnyOfStreamingToolModel()
+    seen_calls = []
+
+    def read_messages_handler(entity_id: str | None = None, chat_id: str | None = None) -> str:
+        seen_calls.append({"entity_id": entity_id, "chat_id": chat_id})
+        if chat_id:
+            return f"chat:{chat_id}"
+        if entity_id:
+            return f"entity:{entity_id}"
+        return "Provide entity_id or chat_id."
+
+    entry = ToolEntry(
+        name="read_messages",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "read_messages",
+            "description": "read chat",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        },
+        handler=read_messages_handler,
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        model,
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    result = await loop.ainvoke({"messages": [{"role": "user", "content": "read chat"}]})
+
+    tool_messages = [msg for msg in result["messages"] if isinstance(msg, ToolMessage)]
+    assert seen_calls == [{"entity_id": None, "chat_id": "chat-1"}]
+    assert any(msg.tool_call_id == "tc-chat-read" and msg.content == "chat:chat-1" for msg in tool_messages)
+    assert not any(msg.content == "Provide entity_id or chat_id." for msg in tool_messages)
+
+
+def test_normalize_stream_tool_call_keeps_aggregate_args_when_chunk_args_are_empty():
+    entry = ToolEntry(
+        name="read_messages",
+        mode=ToolMode.INLINE,
+        schema={
+            "name": "read_messages",
+            "description": "read chat",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        },
+        handler=lambda **_kwargs: "ok",
+        source="test",
+        is_concurrency_safe=True,
+    )
+    loop = make_loop(
+        mock_model_no_tools(),
+        registry=make_registry(entry),
+        app_state=AppState(),
+        runtime=SimpleNamespace(cost=0.0),
+    )
+
+    normalized = loop._normalize_stream_tool_call(
+        {"name": "read_messages", "args": {"chat_id": "chat-1"}, "id": "tc-chat-read"},
+        [{"name": "read_messages", "args": "", "id": "tc-chat-read", "index": 0}],
+    )
+
+    assert normalized == {
+        "name": "read_messages",
+        "args": {"chat_id": "chat-1"},
+        "id": "tc-chat-read",
+    }
diff --git a/tests/test_queue_formatters.py b/tests/Unit/core/test_queue_formatters.py
similarity index 85%
rename from tests/test_queue_formatters.py
rename to tests/Unit/core/test_queue_formatters.py
index 9d2e0982a..8ec57d72c 100644
--- a/tests/test_queue_formatters.py
+++ b/tests/Unit/core/test_queue_formatters.py
@@ -2,7 +2,21 @@
 
 import xml.etree.ElementTree as ET
 
-from core.runtime.middleware.queue.formatters import format_command_notification
+from core.runtime.middleware.queue.formatters import format_chat_notification, format_command_notification
+
+
+class TestFormatChatNotification:
+    def test_includes_explicit_read_messages_and_send_message_instructions(self):
+        result = format_chat_notification(
+            sender_name="alice",
+            chat_id="chat-123",
+            unread_count=2,
+        )
+
+        assert 'read_messages(chat_id="chat-123")' in result
+        assert 'send_message(chat_id="chat-123", content="...")' in result
+        assert "Prefer using this exact chat_id directly" in result
+        assert "Do not treat your normal assistant text as a chat reply." in result
 
 
 class TestFormatCommandNotification:
diff --git a/tests/test_runtime.py b/tests/Unit/core/test_runtime.py
similarity index 94%
rename from tests/test_runtime.py
rename to tests/Unit/core/test_runtime.py
index ef168ebbe..74ce15441 100644
--- a/tests/test_runtime.py
+++ b/tests/Unit/core/test_runtime.py
@@ -18,6 +18,7 @@
     RemoteWrappedRuntime,
     _extract_state_from_output,
     _normalize_pty_result,
+    _RemoteRuntimeBase,
 )
 from sandbox.terminal import TerminalState, terminal_from_row
 from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
@@ -89,6 +90,15 @@ def _wrap_remote_state_output(
     return "\n".join(lines) + "\n"
 
 
+def test_remote_runtime_treats_daytona_pty_1011_as_infra_error():
+    text = 'Failed to send input to PTY: received 1011 (internal error) {"exitCode":1}'
+    assert _RemoteRuntimeBase._looks_like_infra_error(text) is True
+
+
+def test_remote_runtime_treats_broken_pipe_as_infra_error():
+    assert _RemoteRuntimeBase._looks_like_infra_error("[Errno 32] Broken pipe") is True
+
+
 # TODO(windows-compat): LocalPersistentShellRuntime uses Unix PTY + /tmp paths.
 # Tracked in: https://github.com/OpenDCAI/Mycel/issues — Windows shell support needed.
 @pytest.mark.skipif(sys.platform == "win32", reason="LocalPersistentShellRuntime requires a Unix shell")
@@ -639,6 +649,36 @@ def _fake_run(handle, command: str, timeout: float | None, on_stdout_chunk=None)
     await runtime.close()
 
 
+@pytest.mark.asyncio
+async def test_daytona_runtime_retries_once_after_broken_pipe(terminal_store, lease_store):
+    terminal = terminal_from_row(terminal_store.create("term-3b", "thread-3b", "lease-3b", "/tmp"), terminal_store.db_path)
+    lease = lease_store.create("lease-3b", "daytona")
+    provider = MagicMock()
+    from sandbox.providers.daytona import DaytonaSessionRuntime
+
+    runtime = DaytonaSessionRuntime(terminal, lease, provider)
+    calls: list[str] = []
+    recover_events: list[str] = []
+
+    def _fake_execute_once_sync(command: str, timeout: float | None = None, on_stdout_chunk=None):
+        calls.append(command)
+        if len(calls) == 1:
+            raise RuntimeError("[Errno 32] Broken pipe")
+        return ExecuteResult(exit_code=0, stdout="ok\n", stderr="")
+
+    runtime._execute_once_sync = _fake_execute_once_sync  # type: ignore[attr-defined]
+    runtime._recover_infra = lambda: recover_events.append("recover")  # type: ignore[attr-defined]
+    runtime._close_shell_sync = lambda: recover_events.append("close")  # type: ignore[attr-defined]
+    runtime._schedule_snapshot = lambda generation, timeout: None  # type: ignore[attr-defined]
+
+    result = await runtime.execute("echo ok")
+
+    assert result.exit_code == 0
+    assert result.stdout == "ok\n"
+    assert calls == ["echo ok", "echo ok"]
+    assert recover_events == ["recover", "close"]
+
+
 def test_extract_state_from_output_ignores_prompt_noise():
     start = "__LEON_STATE_START_deadbeef__"
     end = "__LEON_STATE_END_deadbeef__"
diff --git a/tests/Unit/core/test_runtime_agent.py b/tests/Unit/core/test_runtime_agent.py
new file mode 100644
index 000000000..4999719e5
--- /dev/null
+++ b/tests/Unit/core/test_runtime_agent.py
@@ -0,0 +1,44 @@
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from core.runtime.abort import AbortController
+from core.runtime.agent import LeonAgent
+from core.runtime.state import BootstrapConfig
+
+
+def test_apply_forked_child_context_updates_agent_and_service_seams():
+    agent = object.__new__(LeonAgent)
+    agent.agent = SimpleNamespace(_bootstrap=None, _tool_abort_controller=None)
+    agent._agent_service = SimpleNamespace(_parent_bootstrap=None, _parent_tool_context=None)
+
+    bootstrap = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test-model")
+    tool_context = SimpleNamespace(abort_controller=AbortController())
+
+    LeonAgent.apply_forked_child_context(agent, bootstrap, tool_context=tool_context)
+
+    assert agent._bootstrap is bootstrap
+    assert agent.agent._bootstrap is bootstrap
+    assert agent._agent_service._parent_bootstrap is bootstrap
+    assert agent._agent_service._parent_tool_context is tool_context
+    assert agent.agent._tool_abort_controller is tool_context.abort_controller
+
+
+def test_close_skips_sandbox_cleanup_and_stays_idempotent():
+    agent = object.__new__(LeonAgent)
+    agent._session_started = False
+    agent._session_ended = False
+    agent._closing = False
+    agent._closed = False
+    agent._cleanup_sandbox = MagicMock()
+    agent._mark_terminated = MagicMock()
+    agent._cleanup_mcp_client = MagicMock()
+    agent._cleanup_sqlite_connection = MagicMock()
+
+    LeonAgent.close(agent, cleanup_sandbox=False)
+    LeonAgent.close(agent, cleanup_sandbox=True)
+
+    agent._cleanup_sandbox.assert_not_called()
+    agent._mark_terminated.assert_called_once()
+    agent._cleanup_mcp_client.assert_called_once()
+    agent._cleanup_sqlite_connection.assert_called_once()
diff --git a/tests/Unit/core/test_runtime_support.py b/tests/Unit/core/test_runtime_support.py
new file mode 100644
index 000000000..1fb809a10
--- /dev/null
+++ b/tests/Unit/core/test_runtime_support.py
@@ -0,0 +1,251 @@
+"""Focused runtime support tests for cleanup, fork, and state helpers."""
+
+import asyncio
+import signal
+from pathlib import Path
+from typing import Any, get_type_hints
+
+import pytest
+
+import core.runtime.state as runtime_state
+from core.runtime.abort import AbortController
+from core.runtime.cleanup import CleanupRegistry
+from core.runtime.fork import create_subagent_context, fork_context
+from core.runtime.state import AppState, BootstrapConfig, ToolUseContext
+
+
+@pytest.fixture
+def runtime_parent_bootstrap():
+    return BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        original_cwd=Path("/launcher"),
+        project_root=Path("/workspace/project"),
+        cwd=Path("/workspace/project/src"),
+        model_name="claude-opus-4-5",
+        api_key="sk-parent",
+        block_dangerous_commands=True,
+        block_network_commands=True,
+        enable_audit_log=False,
+        enable_web_tools=True,
+        allowed_file_extensions=[".py"],
+        extra_allowed_paths=["/shared"],
+        max_turns=20,
+        model_provider="anthropic",
+        base_url="https://api.anthropic.com",
+        context_limit=200000,
+        total_cost_usd=1.25,
+        total_tool_duration_ms=42,
+    )
+
+
+@pytest.fixture
+def runtime_parent_tool_context(runtime_parent_bootstrap):
+    app_state = AppState(turn_count=1, tool_overrides={"Bash": True})
+
+    def set_app_state_for_tasks(updater):
+        app_state.set_state(updater)
+
+    return ToolUseContext(
+        bootstrap=runtime_parent_bootstrap,
+        get_app_state=app_state.get_state,
+        set_app_state=app_state.set_state,
+        set_app_state_for_tasks=set_app_state_for_tasks,
+        refresh_tools=None,
+        read_file_state={"/tmp/file.py": {"partial": False}},
+        loaded_nested_memory_paths={"/tmp/memory.md"},
+        discovered_skill_names={"skill-a"},
+        nested_memory_attachment_triggers={"turn-a"},
+        messages=["msg-1"],
+    )
+
+
+def test_bootstrap_config_minimal_creation():
+    bc = BootstrapConfig(workspace_root=Path("/tmp"), model_name="claude-3-5-sonnet-20241022")
+    assert bc.workspace_root == Path("/tmp")
+    assert bc.project_root == Path("/tmp")
+    assert bc.cwd == Path("/tmp")
+    assert bc.model_name == "claude-3-5-sonnet-20241022"
+    assert bc.api_key is None
+
+
+def test_bootstrap_config_directory_lifetimes_can_be_distinct():
+    bc = BootstrapConfig(
+        workspace_root=Path("/workspace"),
+        original_cwd=Path("/launcher"),
+        project_root=Path("/workspace/project"),
+        cwd=Path("/workspace/project/src"),
+        model_name="test",
+    )
+    assert bc.original_cwd == Path("/launcher")
+    assert bc.project_root == Path("/workspace/project")
+    assert bc.cwd == Path("/workspace/project/src")
+    assert bc.workspace_root == Path("/workspace")
+
+
+def test_app_state_defaults_cover_permission_tracks():
+    s = AppState()
+    assert s.messages == []
+    assert s.turn_count == 0
+    assert s.total_cost == 0.0
+    assert s.compact_boundary_index == 0
+    assert s.tool_permission_context.alwaysAllowRules == {}
+    assert s.tool_permission_context.alwaysDenyRules == {}
+    assert s.tool_permission_context.alwaysAskRules == {}
+    assert s.pending_permission_requests == {}
+    assert s.resolved_permission_requests == {}
+
+
+def test_app_state_session_hooks_can_be_added_and_removed_per_event():
+    seen = []
+
+    def start_hook(payload):
+        seen.append(payload["event"])
+
+    s = AppState()
+    s.add_session_hook("SessionStart", start_hook)
+
+    hooks = s.get_session_hooks("SessionStart")
+    assert hooks == [start_hook]
+
+    hooks[0]({"event": "SessionStart"})
+    assert seen == ["SessionStart"]
+
+    s.remove_session_hook("SessionStart", start_hook)
+    assert s.get_session_hooks("SessionStart") == []
+
+
+def test_tool_use_context_subagent_noop_set_state():
+    bc = BootstrapConfig(workspace_root=Path("/tmp"), model_name="test")
+    app_state = AppState(turn_count=5)
+    calls = []
+
+    def noop(_value):
+        calls.append("called")
+
+    ctx = ToolUseContext(bootstrap=bc, get_app_state=lambda: app_state, set_app_state=noop)
+    ctx.set_app_state(AppState(turn_count=99))
+    assert len(calls) == 1
+    assert app_state.turn_count == 5
+
+
+def test_tool_use_context_core_callable_fields_are_not_typed_as_any():
+    hints = get_type_hints(ToolUseContext, globalns=vars(runtime_state))
+
+    assert hints["get_app_state"] is not Any
+    assert hints["set_app_state"] is not Any
+    assert hints["set_app_state_for_tasks"] is not Any
+    assert hints["refresh_tools"] is not Any
+    assert hints["can_use_tool"] is not Any
+    assert hints["request_permission"] is not Any
+    assert hints["consume_permission_resolution"] is not Any
+    assert hints["abort_controller"] is not Any
+
+
+def test_fork_context_copies_bootstrap_and_generates_new_session_id(runtime_parent_bootstrap):
+    child = fork_context(runtime_parent_bootstrap)
+    assert child.workspace_root == runtime_parent_bootstrap.workspace_root
+    assert child.original_cwd == runtime_parent_bootstrap.original_cwd
+    assert child.project_root == runtime_parent_bootstrap.project_root
+    assert child.cwd == runtime_parent_bootstrap.cwd
+    assert child.model_name == runtime_parent_bootstrap.model_name
+    assert child.api_key == runtime_parent_bootstrap.api_key
+    assert child.session_id != runtime_parent_bootstrap.session_id
+    assert child.parent_session_id == runtime_parent_bootstrap.session_id
+
+
+def test_create_subagent_context_keeps_parent_state_isolation(runtime_parent_tool_context):
+    child = create_subagent_context(runtime_parent_tool_context)
+
+    child.set_app_state(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert runtime_parent_tool_context.get_app_state().turn_count == 1
+
+    child.set_app_state_for_tasks(lambda prev: prev.model_copy(update={"turn_count": 9}))
+    assert runtime_parent_tool_context.get_app_state().turn_count == 9
+
+
+def test_create_subagent_context_copies_read_state_and_abort_link(runtime_parent_tool_context):
+    runtime_parent_tool_context.read_file_state = {"/tmp/readme.md": {"partial": False, "meta": {"seen": 1}}}
+    runtime_parent_tool_context.abort_controller = AbortController()
+
+    child = create_subagent_context(runtime_parent_tool_context)
+    child.read_file_state["/tmp/readme.md"]["partial"] = True
+    child.read_file_state["/tmp/readme.md"]["meta"]["seen"] = 9
+    child.abort_controller.abort()
+
+    assert runtime_parent_tool_context.read_file_state["/tmp/readme.md"] == {
+        "partial": False,
+        "meta": {"seen": 1},
+    }
+    assert runtime_parent_tool_context.abort_controller.is_aborted() is False
+
+
+@pytest.mark.asyncio
+async def test_cleanup_registry_runs_in_priority_order_and_survives_failures():
+    order = []
+    reg = CleanupRegistry()
+
+    def failing():
+        raise RuntimeError("boom")
+
+    reg.register(lambda: order.append(3), priority=3)
+    reg.register(failing, priority=1)
+    reg.register(lambda: order.append(2), priority=2)
+    await reg.run_cleanup()
+    assert order == [2, 3]
+
+
+@pytest.mark.asyncio
+async def test_cleanup_registry_reuses_first_inflight_run():
+    order = []
+    release = asyncio.Event()
+    reg = CleanupRegistry()
+
+    async def slow():
+        order.append("start")
+        await release.wait()
+        order.append("done")
+
+    reg.register(slow, priority=1)
+
+    first = asyncio.create_task(reg.run_cleanup())
+    for _ in range(10):
+        if order == ["start"]:
+            break
+        await asyncio.sleep(0)
+
+    second = asyncio.create_task(reg.run_cleanup())
+    await asyncio.sleep(0)
+    release.set()
+    await asyncio.gather(first, second)
+
+    assert order == ["start", "done"]
+
+
+def test_cleanup_registry_register_returns_deregister_handle():
+    order = []
+    reg = CleanupRegistry()
+
+    unregister = reg.register(lambda: order.append("gone"), priority=1)
+    reg.register(lambda: order.append("kept"), priority=2)
+    unregister()
+
+    asyncio.run(reg.run_cleanup())
+    assert order == ["kept"]
+
+
+def test_cleanup_registry_installs_signal_handlers(monkeypatch):
+    registered = []
+
+    class _FakeLoop:
+        def add_signal_handler(self, sig, handler):
+            registered.append(sig)
+
+    monkeypatch.setattr(asyncio, "get_event_loop", lambda: _FakeLoop())
+
+    CleanupRegistry()
+
+    expected = {signal.SIGINT, signal.SIGTERM}
+    if hasattr(signal, "SIGHUP"):
+        expected.add(signal.SIGHUP)
+
+    assert set(registered) == expected
diff --git a/tests/test_spill_buffer.py b/tests/Unit/core/test_spill_buffer.py
similarity index 75%
rename from tests/test_spill_buffer.py
rename to tests/Unit/core/test_spill_buffer.py
index 553011a24..caf07bc5f 100644
--- a/tests/test_spill_buffer.py
+++ b/tests/Unit/core/test_spill_buffer.py
@@ -1,6 +1,6 @@
 """Tests for core.spill_buffer: spill_if_needed() and SpillBufferMiddleware."""
 
-import os
+import posixpath
 from types import SimpleNamespace
 from unittest.mock import MagicMock
 
@@ -61,12 +61,12 @@ def test_large_output_triggers_spill_and_preview(self):
         )
 
         # Verify write_file was called with the correct spill path.
-        expected_path = os.path.join("/workspace", ".leon", "tool-results", "call_big.txt")
+        expected_path = posixpath.join("/workspace", ".leon", "tool-results", "call_big.txt")
         fs.write_file.assert_called_once_with(expected_path, large)
 
         # Result must mention the file path and include a preview.
         assert expected_path in result
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         assert f"{len(large.encode('utf-8'))} bytes" in result
         assert f"Preview (first {PREVIEW_BYTES} bytes)" in result
         # Preview text is the first PREVIEW_BYTES chars of the original.
@@ -101,7 +101,7 @@ def test_threshold_boundary_one_byte_over_triggers(self):
             workspace_root="/w",
         )
         assert result != content
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         fs.write_file.assert_called_once()
 
     def test_unicode_byte_counting(self):
@@ -120,7 +120,7 @@ def test_unicode_byte_counting(self):
             fs_backend=fs,
             workspace_root="/w",
         )
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         assert "30 bytes" in result
         fs.write_file.assert_called_once()
 
@@ -167,7 +167,7 @@ def test_write_failure_graceful_degradation(self):
         )
 
         # Should still return a preview, not raise.
-        assert "Output too large" in result
+        assert result.startswith("<persisted-output")
         assert "Preview" in result
         # Must include the warning note about write failure.
         assert "Warning: failed to save full output to disk" in result
@@ -192,6 +192,74 @@ def test_preview_length_capped(self):
         # But not the full content.
         assert large not in result
 
+    def test_large_output_uses_persisted_output_wrapper(self):
+        """Large spilled output is wrapped as persisted-output, not plain prose."""
+        fs = _make_fs_backend()
+        large = "A" * 60_000
+
+        result = spill_if_needed(
+            content=large,
+            threshold_bytes=50_000,
+            tool_call_id="call_wrapped",
+            fs_backend=fs,
+            workspace_root="/workspace",
+        )
+
+        assert result.startswith("<persisted-output")
+        assert "</persisted-output>" in result
+        assert 'path="/workspace/.leon/tool-results/call_wrapped.txt"' in result
+        assert f'bytes="{len(large.encode("utf-8"))}"' in result
+
+    def test_image_block_content_bypasses_spill(self):
+        """Image-containing blocks should bypass persistence logic."""
+        fs = _make_fs_backend()
+        content = [
+            {"type": "text", "text": "caption"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
+        ]
+
+        result = spill_if_needed(
+            content=content,
+            threshold_bytes=1,
+            tool_call_id="call_image",
+            fs_backend=fs,
+            workspace_root="/workspace",
+        )
+
+        assert result is content
+        fs.write_file.assert_not_called()
+
+    def test_mcp_binary_blocks_are_saved_and_rewritten(self):
+        fs = _make_fs_backend()
+        mw = SpillBufferMiddleware(
+            fs_backend=fs,
+            workspace_root="/workspace",
+            default_threshold=50_000,
+        )
+        request = _make_request("mcp__server__image_tool", "call_mcp")
+        original_msg = ToolMessage(
+            content=[
+                {"type": "text", "text": "caption"},
+                {"type": "image", "base64": "QUJD", "mime_type": "image/png"},
+            ],
+            tool_call_id="call_mcp",
+            additional_kwargs={"tool_result_meta": {"source": "mcp"}},
+        )
+
+        result = mw._maybe_spill(request, original_msg)
+
+        expected_path = posixpath.join(
+            "/workspace",
+            ".leon",
+            "tool-results",
+            "call_mcp-1.png.base64",
+        )
+        fs.write_file.assert_called_once_with(expected_path, "QUJD")
+        assert isinstance(result.content, str)
+        assert "caption" in result.content
+        assert expected_path in result.content
+        assert "QUJD" not in result.content
+
 
 # ===========================================================================
 # SpillBufferMiddleware
@@ -236,7 +304,7 @@ def test_large_output_gets_spilled(self):
 
         handler.assert_called_once_with(request)
         assert result.content != large_content
-        assert "Output too large" in result.content
+        assert result.content.startswith("<persisted-output")
         assert result.tool_call_id == "call_2"
         fs.write_file.assert_called_once()
 
@@ -253,7 +321,7 @@ def test_per_tool_threshold(self):
 
         result = mw.wrap_tool_call(request, handler)
 
-        assert "Output too large" in result.content
+        assert result.content.startswith("<persisted-output")
         fs.write_file.assert_called_once()
 
     def test_per_tool_threshold_not_triggered(self):
@@ -285,7 +353,7 @@ def test_default_threshold_for_unlisted_tool(self):
 
         result = mw.wrap_tool_call(request, handler)
 
-        assert "Output too large" in result.content
+        assert result.content.startswith("<persisted-output")
 
     def test_read_file_is_skipped(self):
         """read_file is in SKIP_TOOLS and must never be spilled."""
@@ -346,7 +414,7 @@ async def async_handler(req):
         finally:
             loop.close()
 
-        assert "Output too large" in result.content
+        assert result.content.startswith("<persisted-output")
         assert result.tool_call_id == "call_async"
         fs.write_file.assert_called_once()
 
@@ -378,6 +446,35 @@ def test_spill_path_uses_tool_call_id(self):
 
         result = mw.wrap_tool_call(request, handler)
 
-        expected_path = os.path.join("/workspace", ".leon", "tool-results", f"{unique_id}.txt")
+        expected_path = posixpath.join("/workspace", ".leon", "tool-results", f"{unique_id}.txt")
         fs.write_file.assert_called_once_with(expected_path, content)
         assert expected_path in result.content
+
+    def test_whitespace_output_is_normalized(self):
+        """Whitespace-only tool output becomes an explicit no-output marker."""
+        mw, fs = self._make_middleware(default_threshold=10)
+        request = _make_request("run_command", "call_empty")
+        original_msg = ToolMessage(content="   \n\t", tool_call_id="call_empty", name="run_command")
+        handler = MagicMock(return_value=original_msg)
+
+        result = mw.wrap_tool_call(request, handler)
+
+        assert result.content == "(run_command completed with no output)"
+        fs.write_file.assert_not_called()
+
+    def test_spilled_tool_message_preserves_name_and_metadata(self):
+        """Spill replacement must not discard tool name or additional metadata."""
+        mw, _fs = self._make_middleware(default_threshold=10)
+        request = _make_request("run_command", "call_meta")
+        original_msg = ToolMessage(
+            content="M" * 100,
+            tool_call_id="call_meta",
+            name="run_command",
+            additional_kwargs={"tool_result_meta": {"kind": "success", "source": "local"}},
+        )
+        handler = MagicMock(return_value=original_msg)
+
+        result = mw.wrap_tool_call(request, handler)
+
+        assert result.name == "run_command"
+        assert result.additional_kwargs == original_msg.additional_kwargs
diff --git a/tests/test_sse_reconnect.py b/tests/Unit/core/test_sse_reconnect.py
similarity index 100%
rename from tests/test_sse_reconnect.py
rename to tests/Unit/core/test_sse_reconnect.py
diff --git a/tests/Unit/core/test_supabase_factory.py b/tests/Unit/core/test_supabase_factory.py
new file mode 100644
index 000000000..409a6931c
--- /dev/null
+++ b/tests/Unit/core/test_supabase_factory.py
@@ -0,0 +1,25 @@
+from supabase_auth import SyncGoTrueClient
+
+from backend.web.core.supabase_factory import create_supabase_auth_client
+
+
+def test_create_supabase_auth_client_prefers_auth_url(monkeypatch):
+    monkeypatch.setenv("SUPABASE_PUBLIC_URL", "http://storage.example.test")
+    monkeypatch.setenv("SUPABASE_AUTH_URL", "http://auth.example.test")
+    monkeypatch.setenv("SUPABASE_ANON_KEY", "anon-key")
+
+    client = create_supabase_auth_client()
+
+    assert isinstance(client, SyncGoTrueClient)
+    assert client._url == "http://auth.example.test"
+
+
+def test_create_supabase_auth_client_uses_direct_gotrue_for_auth_url(monkeypatch):
+    monkeypatch.delenv("SUPABASE_PUBLIC_URL", raising=False)
+    monkeypatch.setenv("SUPABASE_AUTH_URL", "http://auth.example.test")
+    monkeypatch.setenv("SUPABASE_ANON_KEY", "anon-key")
+
+    client = create_supabase_auth_client()
+
+    assert isinstance(client, SyncGoTrueClient)
+    assert client._url == "http://auth.example.test"
diff --git a/tests/test_taskboard_middleware.py b/tests/Unit/core/test_taskboard_middleware.py
similarity index 100%
rename from tests/test_taskboard_middleware.py
rename to tests/Unit/core/test_taskboard_middleware.py
diff --git a/tests/Unit/core/test_terminal_notifications.py b/tests/Unit/core/test_terminal_notifications.py
new file mode 100644
index 000000000..7b3afd295
--- /dev/null
+++ b/tests/Unit/core/test_terminal_notifications.py
@@ -0,0 +1,39 @@
+from core.runtime.notifications import is_terminal_background_notification
+
+
+def test_is_terminal_background_notification_accepts_system_terminal_markers():
+    assert (
+        is_terminal_background_notification(
+            "<task-notification>done</task-notification>",
+            source="system",
+            notification_type="agent",
+        )
+        is True
+    )
+    assert (
+        is_terminal_background_notification(
+            "<CommandNotification>done</CommandNotification>",
+            source="system",
+            notification_type="command",
+        )
+        is True
+    )
+
+
+def test_is_terminal_background_notification_rejects_non_system_or_non_terminal_messages():
+    assert (
+        is_terminal_background_notification(
+            "<task-notification>done</task-notification>",
+            source="owner",
+            notification_type="agent",
+        )
+        is False
+    )
+    assert (
+        is_terminal_background_notification(
+            "plain reminder",
+            source="system",
+            notification_type="agent",
+        )
+        is False
+    )
diff --git a/tests/Unit/core/test_tool_registry_runner.py b/tests/Unit/core/test_tool_registry_runner.py
new file mode 100644
index 000000000..017f750a0
--- /dev/null
+++ b/tests/Unit/core/test_tool_registry_runner.py
@@ -0,0 +1,2371 @@
+"""Tests for ToolRegistry, ToolRunner, and ToolValidator (P0/P1 verification).
+
+Covers:
+- P0: Three-tier error normalization (Layer 1: validation, Layer 2: execution, Layer 3: soft)
+- P1: ToolRegistry inline/deferred mode
+- P1: ToolRunner dispatches registered tools and normalizes errors
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import time
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from langchain_core.tools import tool
+
+from core.runtime.agent import _make_mcp_tool_entry
+from core.runtime.errors import InputValidationError
+from core.runtime.middleware import AgentMiddleware, ToolCallRequest
+from core.runtime.permissions import ToolPermissionContext, can_auto_approve
+from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
+from core.runtime.runner import ToolRunner
+from core.runtime.state import AppState, BootstrapConfig, ToolUseContext
+from core.runtime.tool_result import ToolResultEnvelope, tool_permission_denied
+from core.runtime.validator import ToolValidator
+from core.tools.command.hooks.dangerous_commands import DangerousCommandsHook
+from core.tools.command.service import CommandService
+from core.tools.filesystem.read import ReadLimits
+from core.tools.filesystem.read import read_file as read_file_dispatch
+from core.tools.filesystem.read.readers.pdf import read_pdf
+from core.tools.filesystem.service import FileSystemService
+from core.tools.tool_search.service import ToolSearchService
+from core.tools.web.service import WebService
+from sandbox.interfaces.filesystem import DirListResult, FileReadResult, FileSystemBackend, FileWriteResult
+
+# ---------------------------------------------------------------------------
+# ToolRegistry
+# ---------------------------------------------------------------------------
+
+
+class TestToolRegistry:
+    def _make_entry(self, name: str, mode: ToolMode = ToolMode.INLINE) -> ToolEntry:
+        return ToolEntry(
+            name=name,
+            mode=mode,
+            schema={"name": name, "description": f"{name} tool"},
+            handler=lambda: f"result:{name}",
+            source="test",
+        )
+
+    def test_register_and_get(self):
+        reg = ToolRegistry()
+        entry = self._make_entry("Read")
+        reg.register(entry)
+        assert reg.get("Read") is entry
+
+    def test_get_unknown_returns_none(self):
+        reg = ToolRegistry()
+        assert reg.get("NonExistent") is None
+
+    def test_inline_tools_appear_in_get_inline_schemas(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("Read", ToolMode.INLINE))
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        schemas = reg.get_inline_schemas()
+        names = [s["name"] for s in schemas]
+        assert "Read" in names
+        assert "TaskCreate" not in names  # P1: deferred not in inline
+
+    def test_deferred_tools_not_in_inline_schemas(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        reg.register(self._make_entry("TaskUpdate", ToolMode.DEFERRED))
+        assert reg.get_inline_schemas() == []
+
+    def test_search_finds_by_name(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        reg.register(self._make_entry("Read", ToolMode.INLINE))
+        results = reg.search("task")
+        names = [e.name for e in results]
+        assert "TaskCreate" in names
+
+    def test_search_includes_deferred_tools(self):
+        """tool_search must discover deferred tools too."""
+        reg = ToolRegistry()
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        results = reg.search("TaskCreate")
+        assert any(e.name == "TaskCreate" for e in results)
+
+    def test_search_no_match_returns_empty_results(self):
+        reg = ToolRegistry()
+        reg.register(self._make_entry("Read", ToolMode.INLINE))
+        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
+        assert reg.search("nonesuch") == []
+
+    def test_allowed_tools_filter(self):
+        reg = ToolRegistry(allowed_tools={"Read", "Grep"})
+        reg.register(self._make_entry("Read"))
+        reg.register(self._make_entry("Grep"))
+        reg.register(self._make_entry("Bash"))
+        assert reg.get("Read") is not None
+        assert reg.get("Grep") is not None
+        assert reg.get("Bash") is None  # filtered out
+
+    def test_dynamic_schema_callable(self):
+        call_count = 0
+
+        def schema_fn() -> dict:
+            nonlocal call_count
+            call_count += 1
+            return {"name": "DynTool", "description": "dynamic"}
+
+        reg = ToolRegistry()
+        entry = ToolEntry(
+            name="DynTool",
+            mode=ToolMode.INLINE,
+            schema=schema_fn,
+            handler=lambda: "ok",
+            source="test",
+        )
+        reg.register(entry)
+        schemas = reg.get_inline_schemas()
+        assert call_count >= 1
+        assert any(s["name"] == "DynTool" for s in schemas)
+
+
+def test_agent_middleware_tools_are_not_shared_mutable_state():
+    first = AgentMiddleware()
+    second = AgentMiddleware()
+
+    first.tools = ["x"]
+
+    assert second.tools == ()
+
+    def test_inline_schemas_strip_runtime_only_schema_metadata(self):
+        reg = ToolRegistry()
+        reg.register(
+            ToolEntry(
+                name="ChatRead",
+                mode=ToolMode.INLINE,
+                schema={
+                    "name": "ChatRead",
+                    "description": "chat read",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "chat_id": {"type": "string"},
+                        },
+                        "x-leon-required-any-of": [["chat_id"]],
+                    },
+                },
+                handler=lambda **_kwargs: "ok",
+                source="test",
+            )
+        )
+
+        [schema] = reg.get_inline_schemas()
+
+        assert "x-leon-required-any-of" not in schema["parameters"]
+
+
+# ---------------------------------------------------------------------------
+# ToolValidator
+# ---------------------------------------------------------------------------
+
+
+class TestToolValidator:
+    def _schema(self, required: list[str], props: dict) -> dict:
+        return {
+            "name": "TestTool",
+            "parameters": {
+                "type": "object",
+                "required": required,
+                "properties": {k: {"type": v} for k, v in props.items()},
+            },
+        }
+
+    def test_valid_args_pass(self):
+        v = ToolValidator()
+        schema = self._schema(["file_path"], {"file_path": "string"})
+        result = v.validate(schema, {"file_path": "/tmp/x"})
+        assert result.ok
+
+    def test_missing_required_raises_layer1(self):
+        v = ToolValidator()
+        schema = self._schema(["file_path"], {"file_path": "string"})
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {})
+        assert "file_path" in str(exc_info.value)
+        assert "missing" in str(exc_info.value)
+        assert exc_info.value.error_code == "REQUIRED_FIELD_MISSING"
+        assert exc_info.value.details[0]["field"] == "file_path"
+
+    def test_wrong_type_raises_layer1(self):
+        v = ToolValidator()
+        schema = self._schema(["count"], {"count": "integer"})
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {"count": "not-an-int"})
+        assert exc_info.value.error_code == "INVALID_TYPE"
+        assert exc_info.value.details[0]["field"] == "count"
+
+    def test_extra_params_allowed(self):
+        v = ToolValidator()
+        schema = self._schema(["a"], {"a": "string"})
+        result = v.validate(schema, {"a": "hello", "extra": "ok"})
+        assert result.ok
+
+    def test_required_any_of_requires_one_alternative(self):
+        v = ToolValidator()
+        schema = {
+            "name": "ChatRead",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        }
+
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {})
+
+        assert "entity_id" in str(exc_info.value)
+        assert "chat_id" in str(exc_info.value)
+
+    def test_required_any_of_accepts_present_alternative(self):
+        v = ToolValidator()
+        schema = {
+            "name": "ChatRead",
+            "parameters": {
+                "type": "object",
+                "required": [],
+                "properties": {
+                    "entity_id": {"type": "string"},
+                    "chat_id": {"type": "string"},
+                },
+                "x-leon-required-any-of": [
+                    ["entity_id"],
+                    ["chat_id"],
+                ],
+            },
+        }
+
+        result = v.validate(schema, {"chat_id": "chat-1"})
+        assert result.ok
+
+    def test_string_constraints_raise_layer1(self):
+        v = ToolValidator()
+        schema = {
+            "name": "Read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "minLength": 1,
+                        "pattern": "^/",
+                    }
+                },
+            },
+        }
+
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {"file_path": "relative/path.txt"})
+
+        assert "file_path" in str(exc_info.value)
+        assert "match pattern" in str(exc_info.value)
+        assert exc_info.value.error_code == "PATTERN_MISMATCH"
+        assert exc_info.value.details[0]["error_code"] == "PATTERN_MISMATCH"
+
+    def test_absolute_path_pattern_accepts_windows_drive_paths(self):
+        v = ToolValidator()
+        schema = {
+            "name": "Read",
+            "parameters": {
+                "type": "object",
+                "required": ["file_path"],
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "minLength": 1,
+                        "pattern": r"^(?:/|[A-Za-z]:[\\/])",
+                    }
+                },
+            },
+        }
+
+        result = v.validate(schema, {"file_path": r"C:\tmp\file.txt"})
+
+        assert result.ok
+
+    def test_numeric_maximum_raises_layer1(self):
+        v = ToolValidator()
+        schema = {
+            "name": "TaskOutput",
+            "parameters": {
+                "type": "object",
+                "required": ["timeout"],
+                "properties": {
+                    "timeout": {
+                        "type": "integer",
+                        "maximum": 600000,
+                    }
+                },
+            },
+        }
+
+        with pytest.raises(InputValidationError) as exc_info:
+            v.validate(schema, {"timeout": 600001})
+
+        assert "timeout" in str(exc_info.value)
+        assert "at most" in str(exc_info.value)
+        assert exc_info.value.error_code == "NUMBER_TOO_LARGE"
+        assert exc_info.value.details[0]["field"] == "timeout"
+
+
+# ---------------------------------------------------------------------------
+# ToolRunner — P0 error normalization
+# ---------------------------------------------------------------------------
+
+
+def _make_runner(entries: list[ToolEntry]) -> ToolRunner:
+    reg = ToolRegistry()
+    for e in entries:
+        reg.register(e)
+    return ToolRunner(registry=reg)
+
+
+def _make_tool_call_request(name: str, args: dict, call_id: str = "tc-1"):
+    req = MagicMock()
+    req.tool_call = {"name": name, "args": args, "id": call_id}
+    return req
+
+
+class TestToolRunnerErrorNormalization:
+    """P0: three-tier error normalization."""
+
+    def test_layer1_missing_param_returns_input_validation_error(self):
+        entry = ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Read",
+                "parameters": {
+                    "type": "object",
+                    "required": ["file_path"],
+                    "properties": {"file_path": {"type": "string"}},
+                },
+            },
+            handler=lambda file_path: "content",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Read", {})  # missing file_path
+
+        called_upstream = []
+
+        def upstream(r):
+            called_upstream.append(r)
+            return MagicMock()
+
+        result = runner.wrap_tool_call(req, upstream)
+        # Layer 1 error format: InputValidationError: {name} failed due to...
+        assert "InputValidationError" in result.content
+        assert "Read" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "REQUIRED_FIELD_MISSING"
+        assert not called_upstream  # must not fall through to upstream
+
+    def test_layer1_schema_failure_returns_structured_error_details(self):
+        entry = ToolEntry(
+            name="Bash",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Bash",
+                "parameters": {
+                    "type": "object",
+                    "required": ["timeout"],
+                    "properties": {
+                        "timeout": {"type": "integer", "maximum": 600000},
+                    },
+                },
+            },
+            handler=lambda timeout: timeout,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Bash", {"timeout": 600001})
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert meta["error_type"] == "input_validation"
+        assert meta["error_code"] == "NUMBER_TOO_LARGE"
+        assert meta["error_details"][0]["field"] == "timeout"
+
+    def test_layer2_handler_exception_returns_tool_use_error(self):
+        def bad_handler(**kwargs):
+            raise ValueError("disk full")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": [],
+                    "properties": {},
+                },
+            },
+            handler=bad_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+        # Layer 2 error format: <tool_use_error>...</tool_use_error>
+        assert "<tool_use_error>" in result.content
+        assert "disk full" in result.content
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_preserves_image_blocks_on_local_path(self, tmp_path):
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+        )
+        image = tmp_path / "tiny.png"
+        image.write_bytes(b"fake-png-payload")
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": str(image)})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert isinstance(result.content, list)
+        assert any(block.get("type") == "image" for block in result.content)
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "local"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_preserves_image_blocks_on_remote_path(self, tmp_path):
+        class RemoteImageBackend(FileSystemBackend):
+            is_remote = True
+
+            def __init__(self):
+                self._raw = b"remote-png-payload"
+
+            def read_file(self, path: str) -> FileReadResult:
+                return FileReadResult(content="opaque-binary-placeholder", size=len(self._raw))
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return len(self._raw)
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+            def download_bytes(self, path: str) -> bytes:
+                return self._raw
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root="/workspace",
+            backend=RemoteImageBackend(),
+        )
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": "/workspace/tiny.png"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert isinstance(result.content, list)
+        assert any(block.get("type") == "image" for block in result.content)
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "local"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_remote_pdf_uses_special_reader_path(self, tmp_path):
+        pdf_bytes = b"%PDF-1.4\nnot-a-real-pdf\n"
+        local_pdf = tmp_path / "sample.pdf"
+        local_pdf.write_bytes(pdf_bytes)
+        expected = read_file_dispatch(path=local_pdf, limits=ReadLimits()).format_output()
+        expected = expected.replace(str(local_pdf), "/workspace/sample.pdf")
+
+        class RemotePdfBackend(FileSystemBackend):
+            is_remote = True
+
+            def read_file(self, path: str) -> FileReadResult:
+                return FileReadResult(content="opaque-pdf-placeholder", size=len(pdf_bytes))
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return len(pdf_bytes)
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+            def download_bytes(self, path: str) -> bytes:
+                return pdf_bytes
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root="/workspace",
+            backend=RemotePdfBackend(),
+        )
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": "/workspace/sample.pdf"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == expected
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_remote_special_file_fails_before_download_when_size_known(self):
+        class RemoteLargePdfBackend(FileSystemBackend):
+            is_remote = True
+
+            def read_file(self, path: str) -> FileReadResult:
+                raise AssertionError("read_file should not run for oversize remote preflight")
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return 11 * 1024 * 1024
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+            def download_bytes(self, path: str) -> bytes:
+                raise AssertionError("download_bytes should not run for oversize remote preflight")
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root="/workspace",
+            backend=RemoteLargePdfBackend(),
+        )
+
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": "/workspace/huge.pdf"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "ToolValidationError" in result.content
+        assert "too large" in result.content.lower()
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "FILE_TOO_LARGE"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_service_read_accepts_pdf_pages_argument(self, tmp_path):
+        pdf_bytes = b"%PDF-1.4\nnot-a-real-pdf\n"
+        local_pdf = tmp_path / "paged.pdf"
+        local_pdf.write_bytes(pdf_bytes)
+        expected = read_pdf(local_pdf, ReadLimits(), start_page=1, limit_pages=1).format_output()
+
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+        )
+        runner = _make_runner(registry.list_all())
+        req = _make_tool_call_request("Read", {"file_path": str(local_pdf), "pages": "1"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == expected
+
+    def test_layer3_handler_returns_soft_failure_text(self):
+        def soft_fail(**kwargs):
+            return "No files found"
+
+        entry = ToolEntry(
+            name="Glob",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Glob",
+                "parameters": {
+                    "type": "object",
+                    "required": ["pattern"],
+                    "properties": {"pattern": {"type": "string"}},
+                },
+            },
+            handler=soft_fail,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Glob", {"pattern": "**/*.xyz"})
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+        # Layer 3: plain text, no tags
+        assert result.content == "No files found"
+        assert "<tool_use_error>" not in result.content
+        assert "InputValidationError" not in result.content
+
+    def test_unknown_tool_falls_through_to_upstream(self):
+        runner = _make_runner([])  # empty registry
+        req = _make_tool_call_request("UnknownMCPTool", {})
+        upstream_called = []
+
+        def upstream(r):
+            upstream_called.append(r)
+            msg = MagicMock()
+            msg.content = "mcp result"
+            return msg
+
+        result = runner.wrap_tool_call(req, upstream)
+        assert upstream_called
+        assert result.content == "mcp result"
+
+    @pytest.mark.asyncio
+    async def test_non_mcp_post_tool_use_hook_sees_materialized_tool_message(self):
+        events = []
+
+        def local_handler(**kwargs):
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def post_tool_use(message, request):
+            events.append((type(message).__name__, message.content, message.additional_kwargs["tool_result_meta"]["source"]))
+            return message
+
+        req.state.post_tool_use = post_tool_use
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "plain success"
+        assert events == [("ToolMessage", "plain success", "local")]
+
+    @pytest.mark.asyncio
+    async def test_async_post_tool_use_hooks_run_in_parallel(self):
+        def local_handler(**kwargs):
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def post_hook_one(message, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        async def post_hook_two(message, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        req.state.post_tool_use = [post_hook_one, post_hook_two]
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.09
+
+    @pytest.mark.asyncio
+    async def test_async_post_tool_use_hook_timeout_cancels_hook_and_preserves_result(self):
+        events = []
+
+        def local_handler(**kwargs):
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+        req.state.hook_timeout_ms = 50
+
+        async def stuck_hook(message, request):
+            try:
+                await asyncio.Future()
+            except asyncio.CancelledError:
+                events.append("post-cancelled")
+                raise
+
+        req.state.post_tool_use = stuck_hook
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.2
+        assert events == ["post-cancelled"]
+
+    @pytest.mark.asyncio
+    async def test_async_pre_tool_use_hook_timeout_cancels_hook_and_preserves_execution(self):
+        events = []
+
+        def local_handler(**kwargs):
+            events.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=local_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+        req.state.hook_timeout_ms = 50
+
+        async def stuck_hook(payload, request):
+            try:
+                await asyncio.Future()
+            except asyncio.CancelledError:
+                events.append("pre-cancelled")
+                raise
+
+        req.state.pre_tool_use = stuck_hook
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.2
+        assert events == ["pre-cancelled", "handler"]
+
+    @pytest.mark.asyncio
+    async def test_post_tool_use_failure_hook_runs_on_materialized_error_message(self):
+        seen = []
+
+        def bad_handler(**kwargs):
+            raise ValueError("disk full")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=bad_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def post_tool_use_failure(message, request):
+            seen.append((type(message).__name__, message.additional_kwargs["tool_result_meta"]["kind"]))
+            return message
+
+        req.state.post_tool_use_failure = post_tool_use_failure
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "<tool_use_error>" in result.content
+        assert seen == [("ToolMessage", "error")]
+
+    @pytest.mark.asyncio
+    async def test_permission_denied_result_keeps_distinct_metadata(self):
+        def denied_handler(**kwargs):
+            return tool_permission_denied(
+                "permission denied",
+                top_level_blocks=[{"type": "text", "text": "extra-block"}],
+                metadata={"policy": "workspace"},
+            )
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=denied_handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "permission denied"
+        assert meta["kind"] == "permission_denied"
+        assert meta["source"] == "local"
+        assert meta["top_level_blocks"] == [{"type": "text", "text": "extra-block"}]
+        assert meta["policy"] == "workspace"
+
+    @pytest.mark.asyncio
+    async def test_mcp_post_tool_use_hook_can_modify_result_before_materialization(self):
+        runner = _make_runner([])  # unknown tool => upstream/MCP path
+        req = _make_tool_call_request("mcp__server__tool", {})
+        req.state = MagicMock()
+        seen = []
+
+        def post_tool_use(payload, request):
+            seen.append(type(payload).__name__)
+            assert isinstance(payload, ToolResultEnvelope)
+            return ToolResultEnvelope(
+                kind=payload.kind,
+                content="hooked mcp result",
+                is_error=payload.is_error,
+                top_level_blocks=payload.top_level_blocks,
+                metadata={**payload.metadata, "hooked": True},
+            )
+
+        req.state.post_tool_use = post_tool_use
+
+        async def upstream(_request):
+            return ToolResultEnvelope(kind="success", content="raw mcp result")
+
+        result = await runner.awrap_tool_call(req, upstream)
+
+        assert seen == ["ToolResultEnvelope"]
+        assert result.content == "hooked mcp result"
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "mcp"
+        assert result.additional_kwargs["tool_result_meta"]["hooked"] is True
+
+    @pytest.mark.asyncio
+    async def test_command_hook_denial_uses_permission_denied_result_path(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook()],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": "rm -rf /"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert "SECURITY" in result.content
+        assert meta["kind"] == "permission_denied"
+        assert meta["source"] == "local"
+        assert meta["policy"] == "command_hook"
+
+    @pytest.mark.asyncio
+    async def test_command_hook_does_not_block_quoted_dangerous_text(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": 'echo "rm -rf /"'})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" not in result.content
+        assert "rm -rf /" in result.content
+
+    @pytest.mark.asyncio
+    async def test_command_hook_does_not_block_commented_dangerous_text(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": "echo hi # rm -rf /"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" not in result.content
+        assert "hi" in result.content
+
+    @pytest.mark.asyncio
+    async def test_command_hook_blocks_obfuscated_dangerous_command_name_with_inline_quotes(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": 's"u"do echo hi'})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["kind"] == "permission_denied"
+
+    @pytest.mark.asyncio
+    async def test_command_hook_blocks_ansi_c_quoted_obfuscation(self, tmp_path):
+        registry = ToolRegistry()
+        CommandService(
+            registry=registry,
+            workspace_root=tmp_path,
+            hooks=[DangerousCommandsHook(verbose=False)],
+        )
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("Bash", {"command": "s$'udo' echo hi"})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "SECURITY ERROR" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["kind"] == "permission_denied"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_tool_executes_through_runner_with_mcp_source(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> str:
+            """sample mcp"""
+            return f"mcp:{x}"
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "mcp:3"
+        assert meta["source"] == "mcp"
+        assert meta["kind"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_tool_post_hook_sees_envelope_before_materialization(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> str:
+            """sample mcp"""
+            return f"mcp:{x}"
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+        seen = []
+
+        def post_tool_use(payload, request):
+            seen.append(type(payload).__name__)
+            assert isinstance(payload, ToolResultEnvelope)
+            return payload
+
+        req.state.post_tool_use = post_tool_use
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert seen == ["ToolResultEnvelope"]
+        assert result.content == "mcp:3"
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "mcp"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_tool_preserves_content_blocks_before_spill(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> list[dict[str, str]]:
+            """sample mcp"""
+            return [
+                {"type": "text", "text": f"mcp:{x}"},
+                {"type": "image", "base64": "QUJD", "mime_type": "image/png"},
+            ]
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == [
+            {"type": "text", "text": "mcp:3"},
+            {"type": "image", "base64": "QUJD", "mime_type": "image/png"},
+        ]
+        assert result.additional_kwargs["tool_result_meta"]["source"] == "mcp"
+
+    @pytest.mark.asyncio
+    async def test_registered_mcp_hook_rematerialization_keeps_mcp_source(self):
+        @tool
+        async def sample_mcp_tool(x: int) -> str:
+            """sample mcp"""
+            return f"mcp:{x}"
+
+        registry = ToolRegistry()
+        registry.register(_make_mcp_tool_entry(sample_mcp_tool))
+        runner = ToolRunner(registry=registry)
+        req = _make_tool_call_request("sample_mcp_tool", {"x": 3})
+        req.state = MagicMock()
+
+        def post_tool_use(payload, request):
+            return ToolResultEnvelope(
+                kind="success",
+                content="hooked-remat",
+                metadata={"hooked": True},
+            )
+
+        req.state.post_tool_use = post_tool_use
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "hooked-remat"
+        assert meta["source"] == "mcp"
+        assert meta["hooked"] is True
+
+    @pytest.mark.asyncio
+    async def test_pre_tool_use_does_not_run_before_schema_validation(self):
+        events = []
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": ["path"],
+                    "properties": {"path": {"type": "string"}},
+                },
+            },
+            handler=lambda path: f"ok:{path}",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            events.append("pre")
+            return payload
+
+        req.state.pre_tool_use = pre_tool_use
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "InputValidationError" in result.content
+        assert events == []
+
+    @pytest.mark.asyncio
+    async def test_tool_specific_validation_runs_before_pre_tool_use_and_handler(self):
+        events = []
+
+        def validate_input(args, request):
+            events.append("tool-validate")
+            return {"path": args["path"], "normalized": True}
+
+        def handler(path, normalized=False):
+            events.append(("handler", path, normalized))
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": ["path"],
+                    "properties": {"path": {"type": "string"}},
+                },
+            },
+            handler=handler,
+            source="test",
+            validate_input=validate_input,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {"path": "/tmp/a"})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            events.append(("pre", dict(payload["args"])))
+            return payload
+
+        req.state.pre_tool_use = pre_tool_use
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert events == [
+            "tool-validate",
+            ("pre", {"path": "/tmp/a", "normalized": True}),
+            ("handler", "/tmp/a", True),
+        ]
+
+    @pytest.mark.asyncio
+    async def test_tool_specific_validation_failure_object_stops_before_handler(self):
+        events = []
+
+        def validate_input(args, request):
+            events.append("tool-validate")
+            return {"result": False, "message": "tool says no", "errorCode": "E_NO"}
+
+        def handler(**kwargs):
+            events.append(("handler", kwargs))
+            return "should-not-run"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": [],
+                    "properties": {},
+                },
+            },
+            handler=handler,
+            source="test",
+            validate_input=validate_input,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "ToolValidationError" in result.content
+        assert "tool says no" in result.content
+        assert result.additional_kwargs["tool_result_meta"]["error_type"] == "tool_input_validation"
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "E_NO"
+        assert events == ["tool-validate"]
+
+    @pytest.mark.asyncio
+    async def test_filesystem_list_dir_outside_workspace_fails_with_structured_error_code(self, tmp_path):
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+        )
+        runner = _make_runner(registry.list_all())
+        outside = (tmp_path.parent / "outside").resolve()
+        req = _make_tool_call_request("list_dir", {"path": str(outside)})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "ToolValidationError" in result.content
+        assert "outside workspace" in result.content.lower()
+        assert result.additional_kwargs["tool_result_meta"]["error_type"] == "tool_input_validation"
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "PATH_OUTSIDE_WORKSPACE"
+
+    @pytest.mark.asyncio
+    async def test_filesystem_read_large_file_fails_before_handler_as_tool_validation(self, tmp_path):
+        class LargeFileBackend(FileSystemBackend):
+            is_remote = False
+
+            def __init__(self):
+                self.read_calls = 0
+
+            def read_file(self, path: str) -> FileReadResult:
+                self.read_calls += 1
+                raise AssertionError("read_file should not run for oversize preflight")
+
+            def write_file(self, path: str, content: str) -> FileWriteResult:
+                return FileWriteResult(success=True)
+
+            def file_exists(self, path: str) -> bool:
+                return True
+
+            def file_mtime(self, path: str) -> float | None:
+                return None
+
+            def file_size(self, path: str) -> int | None:
+                return 11 * 1024 * 1024
+
+            def is_dir(self, path: str) -> bool:
+                return False
+
+            def list_dir(self, path: str) -> DirListResult:
+                return DirListResult(entries=[])
+
+        backend = LargeFileBackend()
+        registry = ToolRegistry()
+        FileSystemService(
+            registry=registry,
+            workspace_root=tmp_path,
+            backend=backend,
+        )
+        runner = _make_runner(registry.list_all())
+        target = (tmp_path / "too-large.txt").resolve()
+        req = _make_tool_call_request("Read", {"file_path": str(target)})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "ToolValidationError" in result.content
+        assert "too large" in result.content.lower()
+        assert result.additional_kwargs["tool_result_meta"]["error_type"] == "tool_input_validation"
+        assert result.additional_kwargs["tool_result_meta"]["error_code"] == "FILE_TOO_LARGE"
+        assert backend.read_calls == 0
+
+    @pytest.mark.asyncio
+    async def test_hook_allow_cannot_bypass_permission_deny_rule(self):
+        def handler(**kwargs):
+            raise AssertionError("handler should not run when permission denies")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            return {"permission": "allow"}
+
+        def can_use_tool(name, args, context, request):
+            return {"decision": "deny", "message": "settings deny"}
+
+        req.state.pre_tool_use = pre_tool_use
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "settings deny"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    @pytest.mark.asyncio
+    async def test_parallel_pre_tool_use_hooks_keep_deny_precedence(self):
+        def handler(**kwargs):
+            raise AssertionError("handler should not run when a hook denies")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def allow_hook(payload, request):
+            await asyncio.sleep(0.01)
+            return {"permission": "allow", "message": "hook allow"}
+
+        async def deny_hook(payload, request):
+            await asyncio.sleep(0.01)
+            return {"decision": "deny", "message": "hook deny"}
+
+        req.state.pre_tool_use = [allow_hook, deny_hook]
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "hook deny"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    @pytest.mark.asyncio
+    async def test_pre_tool_use_can_update_args_before_permission_and_handler(self):
+        seen = []
+
+        def handler(path):
+            seen.append(("handler", path))
+            return f"ok:{path}"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={
+                "name": "Write",
+                "parameters": {
+                    "type": "object",
+                    "required": ["path"],
+                    "properties": {"path": {"type": "string"}},
+                },
+            },
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {"path": "raw"})
+        req.state = MagicMock()
+
+        def pre_tool_use(payload, request):
+            return {"args": {"path": "mutated"}}
+
+        def can_use_tool(name, args, context, request):
+            seen.append(("permission", args["path"]))
+            return {"decision": "allow"}
+
+        req.state.pre_tool_use = pre_tool_use
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok:mutated"
+        assert seen == [("permission", "mutated"), ("handler", "mutated")]
+
+    @pytest.mark.asyncio
+    async def test_async_pre_tool_use_hooks_run_in_parallel(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def hook_one(payload, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        async def hook_two(payload, request):
+            await asyncio.sleep(0.05)
+            return None
+
+        req.state.pre_tool_use = [hook_one, hook_two]
+
+        started = time.perf_counter()
+        result = await runner.awrap_tool_call(req, AsyncMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "ok"
+        assert elapsed < 0.09
+
+    @pytest.mark.asyncio
+    async def test_permission_checker_receives_permission_context_not_scheduler_flag(self):
+        seen = []
+
+        entry = ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={"name": "Read", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+            is_read_only=True,
+            is_concurrency_safe=True,
+            is_destructive=True,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Read", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append((context.is_read_only, context.is_destructive, hasattr(context, "is_concurrency_safe")))
+            return {"decision": "allow"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert seen == [(True, True, False)]
+
+    @pytest.mark.asyncio
+    async def test_async_permission_checker_is_awaited_before_handler(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when async permission denies")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "async deny"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "async deny"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    def test_sync_wrap_tool_call_awaits_async_permission_checker(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when async permission denies on sync path")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "async deny sync-path"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "async deny sync-path"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    @pytest.mark.asyncio
+    async def test_sync_wrap_tool_call_awaits_async_permission_checker_inside_running_loop(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when async permission denies on nested-loop sync path")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "async deny nested-loop"}
+
+        req.state.can_use_tool = can_use_tool
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "async deny nested-loop"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    def test_sync_wrap_tool_call_awaits_async_post_tool_use_hook(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def post_hook(result, request):
+            seen.append("post-start")
+            await asyncio.sleep(0)
+            seen.append("post-end")
+            return result
+
+        req.state.post_tool_use = post_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        assert result.content == "plain success"
+        assert seen == ["handler", "post-start", "post-end"]
+
+    def test_sync_wrap_tool_call_awaits_async_pre_tool_use_hook(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def pre_hook(payload, request):
+            seen.append("pre-start")
+            await asyncio.sleep(0)
+            seen.append("pre-end")
+            return payload
+
+        req.state.pre_tool_use = pre_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        assert result.content == "plain success"
+        assert seen == ["pre-start", "pre-end", "handler"]
+
+    def test_sync_wrap_tool_call_times_out_async_post_tool_use_hook(self):
+        events = []
+
+        def handler():
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+        req.state.hook_timeout_ms = 50
+
+        async def stuck_hook(result, request):
+            try:
+                await asyncio.Future()
+            except asyncio.CancelledError:
+                events.append("post-cancelled")
+                raise
+
+        req.state.post_tool_use = stuck_hook
+
+        started = time.perf_counter()
+        result = runner.wrap_tool_call(req, lambda _req: MagicMock())
+        elapsed = time.perf_counter() - started
+
+        assert result.content == "plain success"
+        assert elapsed < 0.2
+        assert events == ["post-cancelled"]
+
+    @pytest.mark.asyncio
+    async def test_sync_wrap_tool_call_awaits_async_post_tool_use_hook_inside_running_loop(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "plain success"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        async def post_hook(result, request):
+            seen.append("post-start")
+            await asyncio.sleep(0)
+            seen.append("post-end")
+            return result
+
+        req.state.post_tool_use = post_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        assert result.content == "plain success"
+        assert seen == ["handler", "post-start", "post-end"]
+
+    @pytest.mark.asyncio
+    async def test_permission_request_hook_can_allow_without_creating_request(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(*args, **kwargs):
+            raise AssertionError("request surface should not run when permission_request hook allows")
+
+        async def permission_request_hook(payload, request):
+            seen.append("permission-request-hook")
+            return {"decision": "allow"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+        req.state.permission_request_hooks = permission_request_hook
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert seen == ["checker", "permission-request-hook", "handler"]
+
+    def test_sync_wrap_tool_call_runs_permission_request_hook_before_prompt(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(*args, **kwargs):
+            raise AssertionError("request surface should not run when permission_request hook denies")
+
+        async def permission_request_hook(payload, request):
+            seen.append("permission-request-hook")
+            return {"decision": "deny", "message": "hook blocked"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+        req.state.permission_request_hooks = permission_request_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "hook blocked"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker", "permission-request-hook"]
+
+    @pytest.mark.asyncio
+    async def test_sync_wrap_tool_call_runs_permission_request_hook_inside_running_loop(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(*args, **kwargs):
+            raise AssertionError("request surface should not run when permission_request hook allows")
+
+        async def permission_request_hook(payload, request):
+            seen.append("permission-request-hook")
+            await asyncio.sleep(0)
+            return {"decision": "allow"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+        req.state.permission_request_hooks = permission_request_hook
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        assert result.content == "ok"
+        assert seen == ["checker", "permission-request-hook", "handler"]
+
+    @pytest.mark.asyncio
+    async def test_ask_permission_returns_permission_request_when_request_surface_exists(self):
+        requests = {}
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            return {"decision": "ask", "message": "needs approval"}
+
+        def request_permission(name, args, context, request, message):
+            requests["perm-1"] = {
+                "thread_id": "thread-a",
+                "tool_name": name,
+                "args": dict(args),
+                "message": message,
+            }
+            return {"request_id": "perm-1"}
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = request_permission
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "needs approval"
+        assert meta["kind"] == "permission_request"
+        assert meta["decision"] == "ask"
+        assert meta["request_id"] == "perm-1"
+        assert requests["perm-1"]["message"] == "needs approval"
+
+    @pytest.mark.asyncio
+    async def test_ask_permission_fails_loud_when_request_surface_is_missing(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            return {
+                "decision": "ask",
+                "message": "Permission required by rule: Write. No interactive permission resolver is available for this run.",
+            }
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = None
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "Permission required by rule: Write. No interactive permission resolver is available for this run."
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    def test_sync_ask_permission_fails_loud_when_request_surface_is_missing(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def can_use_tool(name, args, context, request):
+            return {
+                "decision": "ask",
+                "message": "Permission required by rule: Write. No interactive permission resolver is available for this run.",
+            }
+
+        req.state.can_use_tool = can_use_tool
+        req.state.request_permission = None
+        req.state.consume_permission_resolution = lambda *args, **kwargs: None
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "Permission required by rule: Write. No interactive permission resolver is available for this run."
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+
+    @pytest.mark.asyncio
+    async def test_consumed_permission_resolution_allows_single_retry_without_reprompt(self):
+        seen = []
+        resolution = {"decision": "allow", "message": "approved"}
+
+        def handler():
+            seen.append("handler")
+            return "ok"
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def consume_permission_resolution(name, args, context, request):
+            nonlocal resolution
+            current = resolution
+            resolution = None
+            return current
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "ask", "message": "needs approval"}
+
+        req.state.consume_permission_resolution = consume_permission_resolution
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+        assert seen == ["checker", "handler"]
+
+    @pytest.mark.asyncio
+    async def test_stale_resolved_allow_does_not_override_current_async_deny(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when current deny overrides stale approval")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def consume_permission_resolution(name, args, context, request):
+            seen.append("resolution")
+            return {"decision": "allow", "message": "approved earlier"}
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "deny now"}
+
+        req.state.consume_permission_resolution = consume_permission_resolution
+        req.state.can_use_tool = can_use_tool
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "deny now"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    def test_stale_resolved_allow_does_not_override_current_sync_deny(self):
+        seen = []
+
+        def handler():
+            seen.append("handler")
+            raise AssertionError("handler should not run when current deny overrides stale approval")
+
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=handler,
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        def consume_permission_resolution(name, args, context, request):
+            seen.append("resolution")
+            return {"decision": "allow", "message": "approved earlier"}
+
+        def can_use_tool(name, args, context, request):
+            seen.append("checker")
+            return {"decision": "deny", "message": "deny now"}
+
+        req.state.consume_permission_resolution = consume_permission_resolution
+        req.state.can_use_tool = can_use_tool
+
+        result = runner.wrap_tool_call(req, lambda _req: None)
+
+        meta = result.additional_kwargs["tool_result_meta"]
+        assert result.content == "deny now"
+        assert meta["kind"] == "permission_denied"
+        assert meta["decision"] == "deny"
+        assert seen == ["checker"]
+
+    @pytest.mark.asyncio
+    async def test_destructive_metadata_is_advisory_not_runtime_deny(self):
+        entry = ToolEntry(
+            name="Write",
+            mode=ToolMode.INLINE,
+            schema={"name": "Write", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda: "ok",
+            source="test",
+            is_destructive=True,
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Write", {})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == "ok"
+
+    @pytest.mark.asyncio
+    async def test_runner_injects_tool_context_into_handler_when_requested(self):
+        entry = ToolEntry(
+            name="Agent",
+            mode=ToolMode.INLINE,
+            schema={"name": "Agent", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=lambda tool_context: f"context:{tool_context.turn_id}",
+            source="test",
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("Agent", {})
+        app_state = AppState()
+        req.state = ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root="/tmp/workspace", model_name="gpt-test"),
+            get_app_state=app_state.get_state,
+            set_app_state=app_state.set_state,
+        )
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert result.content == f"context:{req.state.turn_id}"
+
+    @pytest.mark.asyncio
+    async def test_runner_maps_context_schema_fields_into_handler_kwargs(self):
+        seen = {}
+
+        def needs_ctx(*, boot):
+            seen["boot"] = boot
+            return f"boot:{boot}"
+
+        entry = ToolEntry(
+            name="NeedsCtx",
+            mode=ToolMode.INLINE,
+            schema={"name": "NeedsCtx", "parameters": {"type": "object", "required": [], "properties": {}}},
+            handler=needs_ctx,
+            source="test",
+            context_schema={"boot": "bootstrap.model_name"},
+        )
+        runner = _make_runner([entry])
+        req = _make_tool_call_request("NeedsCtx", {})
+        app_state = AppState()
+        req.state = ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root="/tmp/workspace", model_name="MODEL_X"),
+            get_app_state=app_state.get_state,
+            set_app_state=app_state.set_state,
+        )
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert seen == {"boot": "MODEL_X"}
+        assert result.content == "boot:MODEL_X"
+
+
+class TestToolRunnerInlineInjection:
+    """P1: ToolRunner injects inline schemas into model call."""
+
+    def test_inline_schemas_injected(self):
+        entry = ToolEntry(
+            name="Read",
+            mode=ToolMode.INLINE,
+            schema={"name": "Read", "description": "read file"},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([entry])
+
+        # Build a mock ModelRequest
+        request = MagicMock()
+        request.tools = []
+
+        captured = []
+
+        def handler(req):
+            captured.append(req)
+            return MagicMock()
+
+        request.override.return_value = request
+        runner.wrap_model_call(request, handler)
+
+        # Should have called override with tools containing Read
+        assert request.override.called
+        call_kwargs = request.override.call_args
+        _tools_arg = call_kwargs[1].get("tools") or (call_kwargs[0][0] if call_kwargs[0] else None)
+        # override was called — inline tools were injected
+
+    def test_deferred_schemas_not_injected(self):
+        deferred = ToolEntry(
+            name="TaskCreate",
+            mode=ToolMode.DEFERRED,
+            schema={"name": "TaskCreate", "description": "create task"},
+            handler=lambda: "ok",
+            source="test",
+        )
+        runner = _make_runner([deferred])
+        schemas = runner._registry.get_inline_schemas()
+        assert all(s["name"] != "TaskCreate" for s in schemas)
+
+
+# ---------------------------------------------------------------------------
+# P1: tool_modes from config honored
+# ---------------------------------------------------------------------------
+
+
+class TestToolModeFromConfig:
+    """Verify tool_modes config is applied during service init."""
+
+    def test_task_service_registers_deferred(self, tmp_path):
+        reg = ToolRegistry()
+        from core.tools.task.service import TaskService
+
+        _svc = TaskService(registry=reg, db_path=tmp_path / "test.db")
+        # TaskCreate/TaskUpdate/TaskList/TaskGet should be DEFERRED
+        for tool_name in ["TaskCreate", "TaskGet", "TaskList", "TaskUpdate"]:
+            entry = reg.get(tool_name)
+            assert entry is not None, f"{tool_name} not registered"
+            assert entry.mode == ToolMode.DEFERRED, f"{tool_name} should be DEFERRED, got {entry.mode}"
+
+    def test_search_service_registers_inline(self, tmp_path):
+        reg = ToolRegistry()
+        from core.tools.search.service import SearchService
+
+        _svc = SearchService(registry=reg, workspace_root=tmp_path)
+        for tool_name in ["Grep", "Glob"]:
+            entry = reg.get(tool_name)
+            assert entry is not None, f"{tool_name} not registered"
+            assert entry.mode == ToolMode.INLINE, f"{tool_name} should be INLINE, got {entry.mode}"
+
+    def test_task_service_read_only_queries_are_concurrency_safe(self, tmp_path):
+        reg = ToolRegistry()
+        from core.tools.task.service import TaskService
+
+        _svc = TaskService(registry=reg, db_path=tmp_path / "test.db")
+
+        for tool_name in ["TaskGet", "TaskList"]:
+            entry = reg.get(tool_name)
+            assert entry is not None, f"{tool_name} not registered"
+            assert entry.is_read_only is True
+            assert entry.is_concurrency_safe is True
+
+
+class TestToolSearchService:
+    def test_tool_search_schema_says_exact_lookup_is_for_deferred_tools(self):
+        reg = ToolRegistry()
+        ToolSearchService(reg)
+
+        schema = reg.get("tool_search").get_schema()
+
+        assert "deferred" in schema["description"].lower()
+        assert "deferred" in schema["parameters"]["properties"]["query"]["description"].lower()
+
+    def _make_ctx(self) -> ToolUseContext:
+        app = AppState()
+        return ToolUseContext(
+            bootstrap=BootstrapConfig(workspace_root="/tmp", model_name="test-model"),
+            get_app_state=lambda: app,
+            set_app_state=lambda fn: None,
+        )
+
+    def test_tool_search_keyword_results_are_capped_to_five(self):
+        reg = ToolRegistry()
+        for index in range(7):
+            reg.register(
+                ToolEntry(
+                    name=f"Deferred{index}",
+                    mode=ToolMode.DEFERRED,
+                    schema={"name": f"Deferred{index}", "description": "alpha helper"},
+                    handler=lambda: "ok",
+                    source="test",
+                )
+            )
+        ToolSearchService(reg)
+        runner = _make_runner(reg.list_all())
+        req = ToolCallRequest(
+            tool_call={"name": "tool_search", "args": {"query": "alpha"}, "id": "tc-search"},
+            state=self._make_ctx(),
+        )
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        payload = json.loads(result.content)
+        assert len(payload) == 5
+
+    def test_tool_search_excludes_inline_tools(self):
+        reg = ToolRegistry()
+        reg.register(
+            ToolEntry(
+                name="Read",
+                mode=ToolMode.INLINE,
+                schema={"name": "Read", "description": "read file content"},
+                handler=lambda: "read",
+                source="test",
+            )
+        )
+        reg.register(
+            ToolEntry(
+                name="TaskCreate",
+                mode=ToolMode.DEFERRED,
+                schema={"name": "TaskCreate", "description": "create task"},
+                handler=lambda: "task",
+                source="test",
+            )
+        )
+        ToolSearchService(reg)
+        ctx = self._make_ctx()
+        runner = _make_runner(reg.list_all())
+        req = ToolCallRequest(
+            tool_call={"name": "tool_search", "args": {"query": "read"}, "id": "tc-search"},
+            state=ctx,
+        )
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        assert json.loads(result.content) == []
+        assert ctx.discovered_tool_names == set()
+
+    def test_tool_search_exact_select_fails_loudly_for_inline_tools(self):
+        reg = ToolRegistry()
+        reg.register(
+            ToolEntry(
+                name="Read",
+                mode=ToolMode.INLINE,
+                schema={"name": "Read", "description": "read file content"},
+                handler=lambda: "read",
+                source="test",
+            )
+        )
+        reg.register(
+            ToolEntry(
+                name="TaskCreate",
+                mode=ToolMode.DEFERRED,
+                schema={"name": "TaskCreate", "description": "create task"},
+                handler=lambda: "task",
+                source="test",
+            )
+        )
+        ToolSearchService(reg)
+        runner = _make_runner(reg.list_all())
+        req = ToolCallRequest(
+            tool_call={"name": "tool_search", "args": {"query": "select:Read,TaskCreate"}, "id": "tc-search"},
+            state=self._make_ctx(),
+        )
+
+        result = runner.wrap_tool_call(req, lambda r: MagicMock())
+
+        assert "<tool_use_error>" in result.content
+        assert "Read" in result.content
+        assert "inline" in result.content.lower()
+        assert "TaskCreate" not in result.content
+
+
+class TestWebToolRegistration:
+    def test_web_tools_are_deferred_not_inline(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+
+        assert reg.get("WebSearch").mode == ToolMode.DEFERRED
+        assert reg.get("WebFetch").mode == ToolMode.DEFERRED
+        assert [schema["name"] for schema in reg.get_inline_schemas()] == []
+
+    @pytest.mark.asyncio
+    async def test_web_search_schema_uses_allowed_and_blocked_domains(self):
+        reg = ToolRegistry()
+        service = WebService(registry=reg)
+        seen: dict[str, object] = {}
+
+        class _FakeSearcher:
+            async def search(self, *, query, max_results, include_domains=None, exclude_domains=None):
+                seen["query"] = query
+                seen["max_results"] = max_results
+                seen["include_domains"] = include_domains
+                seen["exclude_domains"] = exclude_domains
+                return SimpleNamespace(error=None, format_output=lambda: "fake results")
+
+        service._searchers = [("fake", _FakeSearcher())]
+
+        schema = reg.get("WebSearch").schema
+        props = schema["parameters"]["properties"]
+        assert "allowed_domains" in props
+        assert "blocked_domains" in props
+        assert "include_domains" not in props
+        assert "exclude_domains" not in props
+
+        result = await service._web_search(
+            query="docs",
+            allowed_domains=["example.com"],
+            blocked_domains=["bad.com"],
+        )
+
+        assert result == "fake results"
+        assert seen["include_domains"] == ["example.com"]
+        assert seen["exclude_domains"] == ["bad.com"]
+
+    def test_web_search_schema_carries_query_and_max_result_constraints(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+
+        schema = reg.get("WebSearch").get_schema()
+        props = schema["parameters"]["properties"]
+
+        assert props["query"]["minLength"] == 1
+        assert props["max_results"]["minimum"] == 1
+        assert props["max_results"]["maximum"] == 10
+
+    @pytest.mark.asyncio
+    async def test_web_search_rejects_out_of_range_max_results_at_validation_layer(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+        runner = _make_runner(reg.list_all())
+        req = _make_tool_call_request("WebSearch", {"query": "docs", "max_results": 11})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "InputValidationError" in result.content
+        assert "max_results" in result.content
+        assert "at most 10" in result.content
+
+    def test_web_fetch_schema_carries_non_empty_url_and_prompt_constraints(self):
+        reg = ToolRegistry()
+        WebService(registry=reg)
+
+        schema = reg.get("WebFetch").get_schema()
+        props = schema["parameters"]["properties"]
+
+        assert props["url"]["minLength"] == 1
+        assert props["prompt"]["minLength"] == 1
+
+    def test_list_dir_schema_uses_path(self, tmp_path):
+        reg = ToolRegistry()
+        FileSystemService(
+            registry=reg,
+            workspace_root=tmp_path,
+        )
+
+        schema = reg.get("list_dir").schema
+        props = schema["parameters"]["properties"]
+        assert "path" in props
+        assert "directory_path" not in props
+        assert schema["parameters"]["required"] == ["path"]
+
+    def test_bash_schema_carries_command_and_timeout_constraints(self, tmp_path):
+        reg = ToolRegistry()
+        CommandService(
+            registry=reg,
+            workspace_root=tmp_path,
+        )
+
+        schema = reg.get("Bash").get_schema()
+        props = schema["parameters"]["properties"]
+
+        assert props["command"]["minLength"] == 1
+        assert props["timeout"]["minimum"] == 1
+        assert props["timeout"]["maximum"] == 600000
+
+    @pytest.mark.asyncio
+    async def test_bash_rejects_out_of_range_timeout_at_validation_layer(self, tmp_path):
+        reg = ToolRegistry()
+        CommandService(
+            registry=reg,
+            workspace_root=tmp_path,
+        )
+        runner = _make_runner(reg.list_all())
+        req = _make_tool_call_request("Bash", {"command": "echo hi", "timeout": 600001})
+        req.state = MagicMock()
+
+        result = await runner.awrap_tool_call(req, AsyncMock())
+
+        assert "InputValidationError" in result.content
+        assert "timeout" in result.content
+        assert "at most 600000" in result.content
+
+    def test_can_auto_approve_only_for_read_only_non_destructive_tools(self):
+        assert can_auto_approve(ToolPermissionContext(is_read_only=True, is_destructive=False)) is True
+        assert can_auto_approve(ToolPermissionContext(is_read_only=False, is_destructive=False)) is False
+        assert can_auto_approve(ToolPermissionContext(is_read_only=True, is_destructive=True)) is False
diff --git a/tests/test_filesystem_extra_paths.py b/tests/Unit/filesystem/test_filesystem_extra_paths.py
similarity index 100%
rename from tests/test_filesystem_extra_paths.py
rename to tests/Unit/filesystem/test_filesystem_extra_paths.py
diff --git a/tests/Unit/filesystem/test_filesystem_service.py b/tests/Unit/filesystem/test_filesystem_service.py
new file mode 100644
index 000000000..a24a1455c
--- /dev/null
+++ b/tests/Unit/filesystem/test_filesystem_service.py
@@ -0,0 +1,395 @@
+from __future__ import annotations
+
+import threading
+import time
+from pathlib import Path, PurePosixPath
+
+from core.runtime.registry import ToolRegistry
+from core.tools.filesystem.service import FileSystemService, _ReadFileStateCache
+from sandbox.interfaces.filesystem import DirListResult, FileReadResult, FileSystemBackend, FileWriteResult
+
+
+def _make_service(
+    workspace: Path,
+    *,
+    max_read_cache_entries: int = 100,
+    max_edit_file_size: int | None = None,
+) -> FileSystemService:
+    return FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=workspace,
+        max_read_cache_entries=max_read_cache_entries,
+        max_edit_file_size=max_edit_file_size,
+    )
+
+
+def test_edit_rejects_if_last_read_was_partial_view(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "sample.txt"
+    target.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
+
+    read_result = service._read_file(str(target), offset=2, limit=1)
+    assert "<file" in read_result
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "full file" in edit_result.lower()
+    assert "read" in edit_result.lower()
+    assert target.read_text(encoding="utf-8") == "alpha\nbeta\ngamma\n"
+
+
+def test_edit_allows_read_that_covered_entire_file_with_offset_one(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "sample.txt"
+    target.write_text("alpha\nbeta\n", encoding="utf-8")
+
+    read_result = service._read_file(str(target), offset=1, limit=2000)
+    assert "<file" in read_result
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "File edited" in edit_result
+    assert target.read_text(encoding="utf-8") == "alpha\nBETA\n"
+
+
+def test_edit_rejects_notebook_files_even_after_read(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "nb.ipynb"
+    target.write_text('{"cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}\n', encoding="utf-8")
+
+    read_result = service._read_file(str(target))
+    assert "nb.ipynb" in read_result
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="[]",
+        new_string='[{"cell_type":"markdown","source":["hi"]}]',
+    )
+
+    assert "ipynb" in edit_result.lower()
+    assert "write" in edit_result.lower()
+
+
+def test_write_normalizes_crlf_to_lf(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "created.txt"
+
+    result = service._write_file(str(target), "a\r\nb\r\n")
+
+    assert "File created" in result
+    assert target.read_bytes() == b"a\nb\n"
+
+
+def test_write_overwrites_existing_file_with_full_replacement(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "existing.txt"
+    target.write_text("old\r\ncontent\r\n", encoding="utf-8")
+
+    result = service._write_file(str(target), "new\r\ncontent\r\n")
+
+    assert "File created" in result
+    assert target.read_bytes() == b"new\ncontent\n"
+
+
+def test_read_tracking_lru_eviction_restores_read_before_edit_gate(tmp_path: Path):
+    service = _make_service(tmp_path, max_read_cache_entries=2)
+
+    first = tmp_path / "first.txt"
+    second = tmp_path / "second.txt"
+    third = tmp_path / "third.txt"
+    for path in (first, second, third):
+        path.write_text(f"{path.stem}\n", encoding="utf-8")
+
+    assert "<file" in service._read_file(str(first))
+    assert "<file" in service._read_file(str(second))
+    assert "<file" in service._read_file(str(third))
+
+    edit_result = service._edit_file(
+        str(first),
+        old_string="first",
+        new_string="FIRST",
+    )
+
+    assert "read" in edit_result.lower()
+    assert "full file" in edit_result.lower()
+    assert first.read_text(encoding="utf-8") == "first\n"
+
+
+def test_edit_preserves_crlf_line_endings(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "windows.txt"
+    target.write_bytes(b"alpha\r\nbeta\r\n")
+
+    assert "<file" in service._read_file(str(target))
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "File edited" in edit_result
+    assert target.read_bytes() == b"alpha\r\nBETA\r\n"
+
+
+def test_edit_with_empty_old_string_creates_missing_file(tmp_path: Path):
+    service = _make_service(tmp_path)
+    target = tmp_path / "created-via-edit.txt"
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="",
+        new_string="hello\n",
+    )
+
+    assert "File created" in edit_result
+    assert target.read_text(encoding="utf-8") == "hello\n"
+
+
+def test_edit_rejects_file_larger_than_edit_cap(tmp_path: Path):
+    service = _make_service(tmp_path, max_edit_file_size=8)
+    target = tmp_path / "large.txt"
+    target.write_text("123456789\n", encoding="utf-8")
+
+    assert "<file" in service._read_file(str(target))
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="123",
+        new_string="abc",
+    )
+
+    assert "too large" in edit_result.lower()
+    assert "8" in edit_result
+
+
+def test_default_edit_size_cap_matches_default_read_size_cap(tmp_path: Path):
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=tmp_path,
+    )
+
+    assert service.max_edit_file_size == service.max_file_size
+
+
+def test_read_state_cache_clone_is_independent(tmp_path: Path):
+    first = (tmp_path / "a.txt").resolve()
+    cache = _ReadFileStateCache(max_entries=2)
+    cache.set(first, state=cache.make_state(timestamp=1.0, is_partial=False))
+
+    clone = cache.clone()
+    second = (tmp_path / "b.txt").resolve()
+    clone.set(second, state=clone.make_state(timestamp=2.0, is_partial=True))
+
+    assert cache.get(first) is not None
+    assert cache.get(second) is None
+    assert clone.get(second) is not None
+
+
+def test_read_state_cache_merge_prefers_newer_timestamp(tmp_path: Path):
+    target = (tmp_path / "shared.txt").resolve()
+    older = _ReadFileStateCache(max_entries=2)
+    older.set(target, state=older.make_state(timestamp=1.0, is_partial=False))
+
+    newer = _ReadFileStateCache(max_entries=2)
+    newer.set(target, state=newer.make_state(timestamp=2.0, is_partial=True))
+
+    older.merge(newer)
+
+    merged = older.get(target)
+    assert merged is not None
+    assert merged.timestamp == 2.0
+    assert merged.is_partial is True
+
+
+def test_edit_rechecks_staleness_inside_critical_section(tmp_path: Path):
+    class RacingBackend(FileSystemBackend):
+        is_remote = False
+
+        def __init__(self):
+            self._mtime = 1.0
+            self._content = "alpha\nbeta\n"
+            self.writes: list[str] = []
+
+        def read_file(self, path: str) -> FileReadResult:
+            before = self._content
+            self._content = "alpha\nEXTERNAL\n"
+            self._mtime = 2.0
+            return FileReadResult(content=before, size=len(before))
+
+        def write_file(self, path: str, content: str) -> FileWriteResult:
+            self.writes.append(content)
+            self._content = content
+            return FileWriteResult(success=True)
+
+        def file_exists(self, path: str) -> bool:
+            return True
+
+        def file_mtime(self, path: str) -> float | None:
+            return self._mtime
+
+        def file_size(self, path: str) -> int | None:
+            return len(self._content.encode("utf-8"))
+
+        def is_dir(self, path: str) -> bool:
+            return False
+
+        def list_dir(self, path: str) -> DirListResult:
+            return DirListResult(entries=[])
+
+    backend = RacingBackend()
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=tmp_path,
+        backend=backend,
+    )
+    target = (tmp_path / "race.txt").resolve()
+    service._read_files.set(
+        target,
+        state=service._read_files.make_state(timestamp=1.0, is_partial=False),
+    )
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="beta",
+        new_string="BETA",
+    )
+
+    assert "modified since last read" in edit_result
+    assert backend.writes == []
+    assert backend._content == "alpha\nEXTERNAL\n"
+
+
+def test_concurrent_edits_do_not_both_commit_from_same_stale_read(tmp_path: Path):
+    class ConcurrentBackend(FileSystemBackend):
+        is_remote = False
+
+        def __init__(self):
+            self._mtime = 1.0
+            self._content = "alpha\nbeta\n"
+            self._write_lock = threading.Lock()
+            self.writes: list[str] = []
+
+        def read_file(self, path: str) -> FileReadResult:
+            return FileReadResult(content=self._content, size=len(self._content))
+
+        def write_file(self, path: str, content: str) -> FileWriteResult:
+            time.sleep(0.05)
+            with self._write_lock:
+                self.writes.append(content)
+                self._content = content
+                self._mtime += 1.0
+            return FileWriteResult(success=True)
+
+        def file_exists(self, path: str) -> bool:
+            return True
+
+        def file_mtime(self, path: str) -> float | None:
+            return self._mtime
+
+        def file_size(self, path: str) -> int | None:
+            return len(self._content.encode("utf-8"))
+
+        def is_dir(self, path: str) -> bool:
+            return False
+
+        def list_dir(self, path: str) -> DirListResult:
+            return DirListResult(entries=[])
+
+    backend = ConcurrentBackend()
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=tmp_path,
+        backend=backend,
+    )
+    target = (tmp_path / "race.txt").resolve()
+    service._read_files.set(
+        target,
+        state=service._read_files.make_state(timestamp=1.0, is_partial=False),
+    )
+
+    results: list[str] = []
+
+    def run_edit(new_string: str) -> None:
+        results.append(
+            service._edit_file(
+                str(target),
+                old_string="beta",
+                new_string=new_string,
+            )
+        )
+
+    t1 = threading.Thread(target=run_edit, args=("BETA-ONE",))
+    t2 = threading.Thread(target=run_edit, args=("BETA-TWO",))
+    t1.start()
+    t2.start()
+    t1.join()
+    t2.join()
+
+    success_count = sum("File edited" in result for result in results)
+    failure_count = sum(("modified since last read" in result) or ("String not found in file" in result) for result in results)
+
+    assert success_count == 1
+    assert failure_count == 1
+    assert len(backend.writes) == 1
+
+
+def test_remote_edit_does_not_trust_false_negative_exists_probe(tmp_path: Path):
+    class FlakyRemoteBackend(FileSystemBackend):
+        is_remote = True
+
+        def __init__(self):
+            self._content = "result = 3\n"
+            self.writes: list[str] = []
+
+        def read_file(self, path: str) -> FileReadResult:
+            return FileReadResult(content=self._content, size=len(self._content))
+
+        def write_file(self, path: str, content: str) -> FileWriteResult:
+            self.writes.append(content)
+            self._content = content
+            return FileWriteResult(success=True)
+
+        def file_exists(self, path: str) -> bool:
+            return False
+
+        def file_mtime(self, path: str) -> float | None:
+            return None
+
+        def file_size(self, path: str) -> int | None:
+            return len(self._content.encode("utf-8"))
+
+        def is_dir(self, path: str) -> bool:
+            return False
+
+        def list_dir(self, path: str) -> DirListResult:
+            return DirListResult(entries=[])
+
+    backend = FlakyRemoteBackend()
+    service = FileSystemService(
+        registry=ToolRegistry(),
+        workspace_root=Path("/home/daytona"),
+        backend=backend,
+    )
+    target = PurePosixPath("/home/daytona/interleave.py")
+    service._read_files.set(
+        target,
+        state=service._read_files.make_state(timestamp=None, is_partial=False),
+    )
+
+    edit_result = service._edit_file(
+        str(target),
+        old_string="result = 3",
+        new_string="result = 5",
+    )
+
+    assert "File edited" in edit_result
+    assert backend.writes == ["result = 5\n"]
diff --git a/tests/test_read_file_limits.py b/tests/Unit/filesystem/test_read_file_limits.py
similarity index 100%
rename from tests/test_read_file_limits.py
rename to tests/Unit/filesystem/test_read_file_limits.py
diff --git a/tests/test_monitor_resource_overview_cache.py b/tests/Unit/monitor/test_monitor_resource_overview_cache.py
similarity index 55%
rename from tests/test_monitor_resource_overview_cache.py
rename to tests/Unit/monitor/test_monitor_resource_overview_cache.py
index d0426c967..2f0440fb6 100644
--- a/tests/test_monitor_resource_overview_cache.py
+++ b/tests/Unit/monitor/test_monitor_resource_overview_cache.py
@@ -53,3 +53,50 @@ def _raise():
     assert degraded["providers"][0]["id"] == "docker"
     assert degraded["summary"]["refresh_status"] == "error"
     assert degraded["summary"]["refresh_error"] == "probe failed"
+
+
+def test_resource_overview_cache_refreshes_when_live_session_counts_drift(monkeypatch):
+    cache.clear_resource_overview_cache()
+
+    stale_payload = {
+        "summary": {
+            "snapshot_at": "2026-03-03T00:00:00Z",
+            "total_providers": 1,
+            "active_providers": 0,
+            "unavailable_providers": 0,
+            "running_sessions": 0,
+        },
+        "providers": [
+            {
+                "id": "local",
+                "sessions": [],
+                "telemetry": {"running": {"used": 0}},
+            }
+        ],
+    }
+    fresh_payload = {
+        "summary": {
+            "snapshot_at": "2026-03-03T00:01:00Z",
+            "total_providers": 1,
+            "active_providers": 1,
+            "unavailable_providers": 0,
+            "running_sessions": 1,
+        },
+        "providers": [
+            {
+                "id": "local",
+                "sessions": [{"id": "lease-1:m_thread"}],
+                "telemetry": {"running": {"used": 1}},
+            }
+        ],
+    }
+
+    calls = iter([stale_payload, fresh_payload])
+    monkeypatch.setattr(cache.resource_service, "list_resource_providers", lambda: next(calls))
+    monkeypatch.setattr(cache.resource_service, "visible_resource_session_stats", lambda: {"local": {"sessions": 1, "running": 1}})
+
+    cache.refresh_resource_overview_sync()
+    payload = cache.get_resource_overview_snapshot()
+
+    assert payload["providers"][0]["telemetry"]["running"]["used"] == 1
+    assert len(payload["providers"][0]["sessions"]) == 1
diff --git a/tests/test_monitor_resource_probe.py b/tests/Unit/monitor/test_monitor_resource_probe.py
similarity index 100%
rename from tests/test_monitor_resource_probe.py
rename to tests/Unit/monitor/test_monitor_resource_probe.py
diff --git a/tests/Unit/monitor/test_sqlite_sandbox_monitor_repo.py b/tests/Unit/monitor/test_sqlite_sandbox_monitor_repo.py
new file mode 100644
index 000000000..d8e7a217c
--- /dev/null
+++ b/tests/Unit/monitor/test_sqlite_sandbox_monitor_repo.py
@@ -0,0 +1,97 @@
+import sqlite3
+
+from storage.providers.sqlite.sandbox_monitor_repo import SQLiteSandboxMonitorRepo
+
+
+def _bootstrap_monitor_db(db_path):
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.executescript(
+            """
+            CREATE TABLE sandbox_leases (
+                lease_id TEXT PRIMARY KEY,
+                provider_name TEXT,
+                desired_state TEXT,
+                observed_state TEXT,
+                current_instance_id TEXT,
+                created_at TEXT,
+                updated_at TEXT
+            );
+
+            CREATE TABLE abstract_terminals (
+                terminal_id TEXT PRIMARY KEY,
+                lease_id TEXT,
+                thread_id TEXT,
+                cwd TEXT,
+                created_at TEXT
+            );
+
+            CREATE TABLE chat_sessions (
+                chat_session_id TEXT PRIMARY KEY,
+                thread_id TEXT,
+                lease_id TEXT,
+                status TEXT,
+                started_at TEXT
+            );
+            """
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def test_list_sessions_with_leases_keeps_raw_newest_terminal_truth(tmp_path):
+    db_path = tmp_path / "sandbox.db"
+    _bootstrap_monitor_db(db_path)
+
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.execute(
+            """
+            INSERT INTO sandbox_leases (
+                lease_id, provider_name, desired_state, observed_state, current_instance_id, created_at, updated_at
+            ) VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                "lease-1",
+                "daytona_selfhost",
+                "paused",
+                "paused",
+                "instance-1",
+                "2026-04-05T13:00:00",
+                "2026-04-05T23:59:00",
+            ),
+        )
+        conn.executemany(
+            """
+            INSERT INTO abstract_terminals (terminal_id, lease_id, thread_id, cwd, created_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            [
+                ("term-parent", "lease-1", "thread-parent", "/home/daytona/files/app", "2026-04-05T13:35:08"),
+                ("term-subagent", "lease-1", "subagent-deadbeef", "/home/daytona/files/app", "2026-04-05T23:51:40"),
+            ],
+        )
+        conn.executemany(
+            """
+            INSERT INTO chat_sessions (chat_session_id, thread_id, lease_id, status, started_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            [
+                ("sess-parent", "thread-parent", "lease-1", "closed", "2026-04-05T23:24:06"),
+                ("sess-subagent", "subagent-deadbeef", "lease-1", "closed", "2026-04-05T23:51:42"),
+            ],
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+    repo = SQLiteSandboxMonitorRepo(db_path=db_path)
+    try:
+        rows = repo.list_sessions_with_leases()
+    finally:
+        repo.close()
+
+    assert len(rows) == 2
+    assert {row["thread_id"] for row in rows} == {"thread-parent", "subagent-deadbeef"}
+    assert all(row["lease_id"] == "lease-1" for row in rows)
diff --git a/tests/test_agentbay_capability_override.py b/tests/Unit/platform/test_agentbay_capability_override.py
similarity index 61%
rename from tests/test_agentbay_capability_override.py
rename to tests/Unit/platform/test_agentbay_capability_override.py
index f54d6ccd7..ed0d08b23 100644
--- a/tests/test_agentbay_capability_override.py
+++ b/tests/Unit/platform/test_agentbay_capability_override.py
@@ -6,13 +6,35 @@
 
 def _install_fake_agentbay_module(monkeypatch) -> None:
     fake_mod = types.ModuleType("agentbay")
+    fake_api_mod = types.ModuleType("agentbay.api")
+    fake_api_models_mod = types.ModuleType("agentbay.api.models")
 
     class FakeAgentBay:
         def __init__(self, api_key: str):
             self.api_key = api_key
 
+    class FakeCreateSessionParams:
+        def __init__(self):
+            self.image_id = None
+            self.context_syncs = None
+
+    class FakeContextSync:
+        @staticmethod
+        def new(context_id: str, path: str):
+            return {"context_id": context_id, "path": path}
+
+    class FakeGetSessionRequest:
+        def __init__(self, authorization: str, session_id: str):
+            self.authorization = authorization
+            self.session_id = session_id
+
     fake_mod.AgentBay = FakeAgentBay
+    fake_mod.CreateSessionParams = FakeCreateSessionParams
+    fake_mod.ContextSync = FakeContextSync
+    fake_api_models_mod.GetSessionRequest = FakeGetSessionRequest
     monkeypatch.setitem(sys.modules, "agentbay", fake_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api", fake_api_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api.models", fake_api_models_mod)
 
 
 def test_agentbay_capability_default_from_class(monkeypatch):
@@ -55,7 +77,12 @@ def screenshot(self):
             return _ScreenshotResult()
 
     class _FakeSession:
-        computer = _FakeComputer()
+        def __init__(self) -> None:
+            self.session_id = "sess-1"
+            self.token = "tok"
+            self.link_url = "https://link"
+            self.mcpTools = [object()]
+            self.computer = _FakeComputer()
 
     provider._sessions["sess-1"] = _FakeSession()
     screenshot = provider.screenshot("sess-1")
diff --git a/tests/test_cron_api.py b/tests/Unit/platform/test_cron_api.py
similarity index 100%
rename from tests/test_cron_api.py
rename to tests/Unit/platform/test_cron_api.py
diff --git a/tests/test_cron_job_service.py b/tests/Unit/platform/test_cron_job_service.py
similarity index 100%
rename from tests/test_cron_job_service.py
rename to tests/Unit/platform/test_cron_job_service.py
diff --git a/tests/test_cron_service.py b/tests/Unit/platform/test_cron_service.py
similarity index 100%
rename from tests/test_cron_service.py
rename to tests/Unit/platform/test_cron_service.py
diff --git a/tests/Unit/platform/test_cron_tool_service.py b/tests/Unit/platform/test_cron_tool_service.py
new file mode 100644
index 000000000..69f546450
--- /dev/null
+++ b/tests/Unit/platform/test_cron_tool_service.py
@@ -0,0 +1,87 @@
+"""Tests for CronToolService — agent-callable cron CRUD surface."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import cast
+
+from core.runtime.registry import ToolRegistry
+from core.tools.cron.service import CronToolService
+
+
+def _redirect_cron_repo(monkeypatch, tmp_path: Path) -> None:
+    from storage.providers.sqlite.cron_job_repo import SQLiteCronJobRepo
+
+    db_path = tmp_path / "cron-tools.db"
+    monkeypatch.setattr(
+        "backend.web.services.cron_job_service.make_cron_job_repo",
+        lambda: SQLiteCronJobRepo(db_path=db_path),
+    )
+
+
+def test_cron_tool_registry_exposes_canonical_surface(monkeypatch, tmp_path: Path) -> None:
+    _redirect_cron_repo(monkeypatch, tmp_path)
+    registry = ToolRegistry()
+
+    CronToolService(registry)
+
+    for tool_name in ("CronCreate", "CronDelete", "CronList"):
+        assert registry.get(tool_name) is not None
+
+
+def test_cron_create_list_delete_roundtrip(monkeypatch, tmp_path: Path) -> None:
+    _redirect_cron_repo(monkeypatch, tmp_path)
+    registry = ToolRegistry()
+
+    CronToolService(registry)
+
+    create = registry.get("CronCreate")
+    list_jobs = registry.get("CronList")
+    delete = registry.get("CronDelete")
+
+    assert create is not None
+    assert list_jobs is not None
+    assert delete is not None
+
+    created_raw = create.handler(
+        name="nightly backup",
+        cron_expression="0 2 * * *",
+        description="backup prod",
+        task_template='{"title":"backup"}',
+        enabled=True,
+    )
+    created = json.loads(cast(str, created_raw))
+    job = created["item"]
+    assert job["name"] == "nightly backup"
+    assert job["cron_expression"] == "0 2 * * *"
+
+    listed = json.loads(cast(str, list_jobs.handler()))
+    assert listed["total"] == 1
+    assert listed["items"][0]["id"] == job["id"]
+
+    deleted = json.loads(cast(str, delete.handler(job_id=job["id"])))
+    assert deleted == {"ok": True, "id": job["id"]}
+
+    listed_after = json.loads(cast(str, list_jobs.handler()))
+    assert listed_after == {"items": [], "total": 0}
+
+
+def test_cron_create_requires_valid_json_template(monkeypatch, tmp_path: Path) -> None:
+    _redirect_cron_repo(monkeypatch, tmp_path)
+    registry = ToolRegistry()
+
+    CronToolService(registry)
+    create = registry.get("CronCreate")
+    assert create is not None
+
+    try:
+        create.handler(
+            name="broken",
+            cron_expression="0 2 * * *",
+            task_template="{not json}",
+        )
+    except ValueError as exc:
+        assert "task_template must be valid JSON" in str(exc)
+    else:
+        raise AssertionError("CronCreate should fail loudly on invalid JSON")
diff --git a/tests/Unit/platform/test_lsp_service.py b/tests/Unit/platform/test_lsp_service.py
new file mode 100644
index 000000000..8e851850e
--- /dev/null
+++ b/tests/Unit/platform/test_lsp_service.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import json
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from core.runtime.registry import ToolRegistry
+from core.tools.lsp.service import LSPService
+
+
+class _FakeSession:
+    def __init__(self):
+        self.calls: list[tuple[str, str, int, int]] = []
+
+    async def request_definition(self, rel_path: str, line: int, character: int):
+        self.calls.append(("definition", rel_path, line, character))
+        return [
+            {
+                "absolutePath": "/tmp/example.py",
+                "range": {"start": {"line": line, "character": character}},
+            }
+        ]
+
+
+class _FakePyright:
+    def __init__(self):
+        self.calls: list[tuple[str, str, int, int]] = []
+
+    async def request_implementation(self, rel_path: str, line: int, character: int):
+        self.calls.append(("implementation", rel_path, line, character))
+        return [
+            {
+                "absolutePath": "/tmp/example.py",
+                "range": {"start": {"line": line, "character": character}},
+            }
+        ]
+
+
+def test_lsp_schema_uses_one_based_character_positions(tmp_path):
+    reg = ToolRegistry()
+    LSPService(registry=reg, workspace_root=tmp_path)
+
+    schema = reg.get("LSP").get_schema()
+    props = schema["parameters"]["properties"]
+
+    assert "character" in props
+    assert "column" not in props
+    assert "1-based" in props["line"]["description"]
+    assert "1-based" in props["character"]["description"]
+
+
+@pytest.mark.asyncio
+async def test_lsp_handle_converts_one_based_positions_to_zero_based_for_definition(tmp_path):
+    reg = ToolRegistry()
+    service = LSPService(registry=reg, workspace_root=tmp_path)
+    fake = _FakeSession()
+    service._get_session = AsyncMock(return_value=fake)
+
+    file_path = tmp_path / "example.py"
+    file_path.write_text("x = 1\n", encoding="utf-8")
+
+    result = await service._handle(
+        operation="goToDefinition",
+        file_path=str(file_path),
+        line=5,
+        character=3,
+    )
+
+    assert fake.calls == [("definition", "example.py", 4, 2)]
+    payload = json.loads(result)
+    assert payload[0]["line"] == 4
+    assert payload[0]["column"] == 2
+
+
+@pytest.mark.asyncio
+async def test_lsp_handle_offloads_gitignored_filtering_from_event_loop(tmp_path, monkeypatch):
+    reg = ToolRegistry()
+    service = LSPService(registry=reg, workspace_root=tmp_path)
+    fake = _FakeSession()
+    service._get_session = AsyncMock(return_value=fake)
+
+    file_path = tmp_path / "example.py"
+    file_path.write_text("x = 1\n", encoding="utf-8")
+
+    filter_results = [
+        {
+            "absolutePath": "/tmp/example.py",
+            "range": {"start": {"line": 0, "character": 0}},
+        }
+    ]
+    filter_mock = MagicMock(return_value=filter_results)
+    service._filter_gitignored_batched = filter_mock
+
+    calls: list[tuple[object, tuple[object, ...]]] = []
+
+    async def fake_to_thread(func, *args, **kwargs):
+        calls.append((func, args))
+        return func(*args, **kwargs)
+
+    monkeypatch.setattr("core.tools.lsp.service.asyncio.to_thread", fake_to_thread)
+
+    result = await service._handle(
+        operation="goToDefinition",
+        file_path=str(file_path),
+        line=1,
+        character=1,
+    )
+
+    assert calls == [(filter_mock, (filter_mock.call_args.args[0],))]
+    assert filter_mock.call_count == 1
+    payload = json.loads(result)
+    assert payload[0]["file"] == "/tmp/example.py"
+
+
+@pytest.mark.asyncio
+async def test_lsp_handle_converts_one_based_positions_to_zero_based_for_pyright_ops(tmp_path):
+    reg = ToolRegistry()
+    service = LSPService(registry=reg, workspace_root=tmp_path)
+    fake = _FakePyright()
+    service._get_pyright = AsyncMock(return_value=fake)
+
+    file_path = tmp_path / "example.py"
+    file_path.write_text("x = 1\n", encoding="utf-8")
+
+    result = await service._handle(
+        operation="goToImplementation",
+        file_path=str(file_path),
+        line=7,
+        character=4,
+    )
+
+    assert fake.calls == [("implementation", "example.py", 6, 3)]
+    payload = json.loads(result)
+    assert payload[0]["line"] == 6
+    assert payload[0]["column"] == 3
diff --git a/tests/test_marketplace_client.py b/tests/Unit/platform/test_marketplace_client.py
similarity index 100%
rename from tests/test_marketplace_client.py
rename to tests/Unit/platform/test_marketplace_client.py
diff --git a/tests/test_marketplace_models.py b/tests/Unit/platform/test_marketplace_models.py
similarity index 100%
rename from tests/test_marketplace_models.py
rename to tests/Unit/platform/test_marketplace_models.py
diff --git a/tests/Unit/platform/test_mcp_resource_tool_service.py b/tests/Unit/platform/test_mcp_resource_tool_service.py
new file mode 100644
index 000000000..1377c4cbd
--- /dev/null
+++ b/tests/Unit/platform/test_mcp_resource_tool_service.py
@@ -0,0 +1,191 @@
+from __future__ import annotations
+
+import json
+from collections.abc import Awaitable
+from contextlib import asynccontextmanager
+from types import SimpleNamespace
+from typing import Any, cast
+
+import pytest
+from pydantic import AnyUrl, TypeAdapter
+
+from core.runtime.registry import ToolRegistry
+from core.runtime.tool_result import ToolResultEnvelope
+from core.tools.mcp_resources.service import McpResourceToolService
+
+
+class _FakeSession:
+    def __init__(self, resources: list[SimpleNamespace], contents_by_uri: dict[str, list[SimpleNamespace]]) -> None:
+        self._resources = resources
+        self._contents_by_uri = contents_by_uri
+
+    async def list_resources(self):
+        return SimpleNamespace(resources=self._resources)
+
+    async def read_resource(self, uri: str):
+        return SimpleNamespace(contents=self._contents_by_uri[uri])
+
+
+class _FakeClient:
+    def __init__(self, sessions: dict[str, _FakeSession]) -> None:
+        self.connections = {name: object() for name in sessions}
+        self._sessions = sessions
+
+    @asynccontextmanager
+    async def session(self, server_name: str, *, auto_initialize: bool = True):
+        assert auto_initialize is True
+        yield self._sessions[server_name]
+
+
+def _unwrap_text(result: str | ToolResultEnvelope) -> str:
+    if isinstance(result, ToolResultEnvelope):
+        return cast(str, result.content)
+    return result
+
+
+async def _invoke_handler(handler: Any, /, **kwargs: Any) -> str | ToolResultEnvelope:
+    result = handler(**kwargs)
+    if isinstance(result, Awaitable):
+        return await result
+    return result
+
+
+@pytest.mark.asyncio
+async def test_mcp_resource_tool_service_registers_list_and_read_tools() -> None:
+    registry = ToolRegistry()
+    client = _FakeClient(
+        {
+            "demo": _FakeSession(
+                resources=[
+                    SimpleNamespace(
+                        uri="memo://alpha",
+                        name="alpha",
+                        mimeType="text/plain",
+                        description="first resource",
+                    )
+                ],
+                contents_by_uri={
+                    "memo://alpha": [
+                        SimpleNamespace(
+                            uri="memo://alpha",
+                            mimeType="text/plain",
+                            text="hello from resource",
+                        )
+                    ]
+                },
+            )
+        }
+    )
+
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: client,
+        server_configs_fn=lambda: {"demo": object()},
+    )
+
+    list_entry = registry.get("ListMcpResources")
+    read_entry = registry.get("ReadMcpResource")
+    assert list_entry is not None
+    assert read_entry is not None
+
+    listed = json.loads(_unwrap_text(await _invoke_handler(list_entry.handler)))
+    assert listed == {
+        "items": [
+            {
+                "server": "demo",
+                "uri": "memo://alpha",
+                "name": "alpha",
+                "mime_type": "text/plain",
+                "description": "first resource",
+            }
+        ],
+        "total": 1,
+    }
+
+    content = json.loads(_unwrap_text(await _invoke_handler(read_entry.handler, server="demo", uri="memo://alpha")))
+    assert content == {
+        "server": "demo",
+        "uri": "memo://alpha",
+        "contents": [
+            {
+                "uri": "memo://alpha",
+                "mime_type": "text/plain",
+                "text": "hello from resource",
+            }
+        ],
+    }
+
+
+def test_mcp_resource_tool_service_skips_registration_without_servers() -> None:
+    registry = ToolRegistry()
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: None,
+        server_configs_fn=lambda: {},
+    )
+
+    assert registry.get("ListMcpResources") is None
+    assert registry.get("ReadMcpResource") is None
+
+
+@pytest.mark.asyncio
+async def test_mcp_resource_tool_service_fails_loudly_for_unknown_server() -> None:
+    registry = ToolRegistry()
+    client = _FakeClient({"demo": _FakeSession(resources=[], contents_by_uri={})})
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: client,
+        server_configs_fn=lambda: {"demo": object()},
+    )
+
+    read_entry = registry.get("ReadMcpResource")
+    assert read_entry is not None
+
+    with pytest.raises(ValueError, match='MCP server not found: "missing"'):
+        await _invoke_handler(read_entry.handler, server="missing", uri="memo://alpha")
+
+
+@pytest.mark.asyncio
+async def test_mcp_resource_tool_service_serializes_url_like_resource_uris() -> None:
+    registry = ToolRegistry()
+    uri = TypeAdapter(AnyUrl).validate_python("memo://alpha")
+    client = _FakeClient(
+        {
+            "demo": _FakeSession(
+                resources=[
+                    SimpleNamespace(
+                        uri=uri,
+                        name="alpha",
+                        mimeType="text/plain",
+                        description="first resource",
+                    )
+                ],
+                contents_by_uri={
+                    "memo://alpha": [
+                        SimpleNamespace(
+                            uri=uri,
+                            mimeType="text/plain",
+                            text="hello from resource",
+                        )
+                    ]
+                },
+            )
+        }
+    )
+
+    McpResourceToolService(
+        registry=registry,
+        client_fn=lambda: client,
+        server_configs_fn=lambda: {"demo": object()},
+    )
+
+    list_entry = registry.get("ListMcpResources")
+    read_entry = registry.get("ReadMcpResource")
+    assert list_entry is not None
+    assert read_entry is not None
+
+    listed = json.loads(_unwrap_text(await _invoke_handler(list_entry.handler)))
+    assert listed["items"][0]["uri"] == "memo://alpha"
+
+    content = json.loads(_unwrap_text(await _invoke_handler(read_entry.handler, server="demo", uri="memo://alpha")))
+    assert content["contents"][0]["uri"] == "memo://alpha"
diff --git a/tests/Unit/platform/test_mcp_transport.py b/tests/Unit/platform/test_mcp_transport.py
new file mode 100644
index 000000000..f560f4d50
--- /dev/null
+++ b/tests/Unit/platform/test_mcp_transport.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from config.schema import MCPConfig, MCPServerConfig
+from core.runtime.agent import LeonAgent
+
+
+@pytest.mark.asyncio
+async def test_init_mcp_tools_respects_explicit_websocket_transport(monkeypatch):
+    captured: dict[str, object] = {}
+
+    class FakeClient:
+        def __init__(self, configs, tool_name_prefix=False):
+            captured["configs"] = configs
+
+        async def get_tools(self):
+            return []
+
+        async def close(self):
+            return None
+
+    agent = LeonAgent.__new__(LeonAgent)
+    agent.config = SimpleNamespace(
+        mcp=MCPConfig(
+            enabled=True,
+            servers={
+                "wsdemo": MCPServerConfig(
+                    transport="websocket",
+                    url="ws://example.test/mcp",
+                )
+            },
+        )
+    )
+    agent.verbose = False
+    agent._mcp_client = None
+
+    monkeypatch.setattr(
+        "langchain_mcp_adapters.client.MultiServerMCPClient",
+        FakeClient,
+    )
+
+    await LeonAgent._init_mcp_tools(agent)
+
+    assert captured["configs"] == {
+        "wsdemo": {
+            "transport": "websocket",
+            "url": "ws://example.test/mcp",
+        }
+    }
diff --git a/tests/test_model_config_enrichment.py b/tests/Unit/platform/test_model_config_enrichment.py
similarity index 90%
rename from tests/test_model_config_enrichment.py
rename to tests/Unit/platform/test_model_config_enrichment.py
index 6e1e3e53d..6fc470582 100644
--- a/tests/test_model_config_enrichment.py
+++ b/tests/Unit/platform/test_model_config_enrichment.py
@@ -1,9 +1,12 @@
 """Tests for model config enrichment (based_on + context_limit)."""
 
+import importlib
+
 import pytest
 from pydantic import ValidationError
 
 from config.models_schema import ActiveModel, CustomModelConfig, ModelsConfig, ModelSpec, PoolConfig
+from core.runtime.middleware.monitor import cost as cost_module
 from core.runtime.middleware.monitor.cost import fetch_openrouter_pricing, get_model_context_limit
 from core.runtime.middleware.monitor.middleware import MonitorMiddleware
 
@@ -131,6 +134,25 @@ def test_update_model_based_on_affects_cost_calculator(self):
         mw.update_model("Alice", overrides={"based_on": "claude-sonnet-4.5"})
         assert mw._token_monitor.cost_calculator.costs != {}
 
+    def test_empty_cached_pricing_falls_back_to_bundled_models(self, monkeypatch: pytest.MonkeyPatch):
+        importlib.reload(cost_module)
+
+        monkeypatch.setattr(
+            cost_module,
+            "_load_cache",
+            lambda: (
+                {},
+                {"claude-sonnet-4.5": SONNET_LIMIT},
+                {"claude-sonnet-4.5": "anthropic"},
+            ),
+        )
+        monkeypatch.setattr(cost_module, "_fetch_from_openrouter", lambda: None)
+
+        prices = cost_module.fetch_openrouter_pricing()
+
+        assert prices.get("claude-sonnet-4.5") is not None
+        assert cost_module.CostCalculator("claude-sonnet-4.5").costs != {}
+
 
 class TestThreeLevelPriority:
     """Level 1 用户配置 > Level 2 OpenRouter > Level 3 Bundled"""
diff --git a/tests/test_model_params.py b/tests/Unit/platform/test_model_params.py
similarity index 100%
rename from tests/test_model_params.py
rename to tests/Unit/platform/test_model_params.py
diff --git a/tests/test_search_tools.py b/tests/Unit/platform/test_search_tools.py
similarity index 100%
rename from tests/test_search_tools.py
rename to tests/Unit/platform/test_search_tools.py
diff --git a/tests/test_task_service.py b/tests/Unit/platform/test_task_service.py
similarity index 92%
rename from tests/test_task_service.py
rename to tests/Unit/platform/test_task_service.py
index e3105c5da..8fd33d775 100644
--- a/tests/test_task_service.py
+++ b/tests/Unit/platform/test_task_service.py
@@ -2,6 +2,7 @@
 
 import sqlite3
 import time
+from types import SimpleNamespace
 
 import pytest
 
@@ -120,6 +121,19 @@ def test_list_returns_all(self):
         tasks = task_service.list_tasks()
         assert len(tasks) >= 2
 
+    def test_list_enriches_member_id_from_thread_repo(self, monkeypatch):
+        task_service.create_task(title="task with thread", thread_id="thread-1")
+
+        thread_repo = SimpleNamespace(
+            get_by_id=lambda thread_id: {"member_id": "member-1"} if thread_id == "thread-1" else None,
+            close=lambda: None,
+        )
+        monkeypatch.setattr(task_service, "build_thread_repo", lambda **_: thread_repo)
+
+        tasks = task_service.list_tasks()
+
+        assert tasks[0]["member_id"] == "member-1"
+
     def test_delete_existing(self):
         task = task_service.create_task(title="to delete")
         assert task_service.delete_task(task["id"]) is True
diff --git a/tests/Unit/sandbox/test_agentbay_provider.py b/tests/Unit/sandbox/test_agentbay_provider.py
new file mode 100644
index 000000000..593757e22
--- /dev/null
+++ b/tests/Unit/sandbox/test_agentbay_provider.py
@@ -0,0 +1,290 @@
+import json
+import sys
+import types
+from dataclasses import replace
+from types import SimpleNamespace
+
+from sandbox.providers.agentbay import AgentBayProvider
+
+
+def _install_fake_agentbay_module(monkeypatch) -> None:
+    fake_mod = types.ModuleType("agentbay")
+    fake_api_mod = types.ModuleType("agentbay.api")
+    fake_api_models_mod = types.ModuleType("agentbay.api.models")
+
+    class FakeCreateSessionParams:
+        def __init__(self):
+            self.image_id = None
+            self.context_syncs = None
+
+    class FakeContextSync:
+        @staticmethod
+        def new(context_id: str, path: str):
+            return {"context_id": context_id, "path": path}
+
+    class FakeGetSessionRequest:
+        def __init__(self, authorization: str, session_id: str):
+            self.authorization = authorization
+            self.session_id = session_id
+
+    fake_mod.CreateSessionParams = FakeCreateSessionParams
+    fake_mod.ContextSync = FakeContextSync
+    fake_api_models_mod.GetSessionRequest = FakeGetSessionRequest
+    monkeypatch.setitem(sys.modules, "agentbay", fake_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api", fake_api_mod)
+    monkeypatch.setitem(sys.modules, "agentbay.api.models", fake_api_models_mod)
+
+
+def _provider_with_fake_client(fake_client) -> AgentBayProvider:
+    provider = AgentBayProvider.__new__(AgentBayProvider)
+    provider.name = "agentbay"
+    provider.client = fake_client
+    provider.default_context_path = "/home/wuying"
+    provider.image_id = None
+    provider._sessions = {}
+    provider._capability = AgentBayProvider.CAPABILITY
+    return provider
+
+
+def test_create_session_refreshes_agentbay_session_when_direct_call_fields_missing(monkeypatch):
+    _install_fake_agentbay_module(monkeypatch)
+    raw_session = SimpleNamespace(session_id="sess-123", token="", link_url="", mcpTools=[])
+    hydrated_session = SimpleNamespace(session_id="sess-123", token="tok", link_url="https://link", mcpTools=[object()])
+    fake_client = SimpleNamespace(
+        context=SimpleNamespace(get=lambda *args, **kwargs: None),
+        create=lambda params: SimpleNamespace(success=True, session=raw_session, error_message=""),
+        get=lambda session_id: SimpleNamespace(success=True, session=hydrated_session, error_message=""),
+    )
+    provider = _provider_with_fake_client(fake_client)
+
+    info = provider.create_session()
+
+    assert info.session_id == "sess-123"
+    assert provider._sessions["sess-123"] is hydrated_session
+
+
+def test_get_session_refreshes_stale_cached_agentbay_session():
+    stale_session = SimpleNamespace(session_id="sess-123", token="", link_url="", mcpTools=[])
+    hydrated_session = SimpleNamespace(session_id="sess-123", token="tok", link_url="https://link", mcpTools=[object()])
+    fake_client = SimpleNamespace(
+        get=lambda session_id: SimpleNamespace(success=True, session=hydrated_session, error_message=""),
+    )
+    provider = _provider_with_fake_client(fake_client)
+    provider._sessions["sess-123"] = stale_session
+
+    session = provider._get_session("sess-123")
+
+    assert session is hydrated_session
+    assert provider._sessions["sess-123"] is hydrated_session
+
+
+def test_destroy_session_skips_sync_when_pause_capability_is_disabled():
+    calls: list[bool] = []
+
+    class _DeleteResult:
+        success = True
+
+    class _Session:
+        def __init__(self) -> None:
+            self.session_id = "sess-123"
+            self.token = "tok"
+            self.link_url = "https://link"
+            self.mcpTools = [object()]
+
+        def delete(self, *, sync_context: bool):
+            calls.append(sync_context)
+            return _DeleteResult()
+
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._capability = replace(AgentBayProvider.CAPABILITY, can_pause=False, can_resume=False)
+    provider._sessions["sess-123"] = _Session()
+
+    assert provider.destroy_session("sess-123") is True
+    assert calls == [False]
+    assert "sess-123" not in provider._sessions
+
+
+def test_execute_prefers_link_url_shell_path_when_session_has_direct_call_metadata():
+    calls: list[tuple[str, object]] = []
+
+    class _Tool:
+        name = "shell"
+        server = "wuying_shell"
+
+    def _command_execute(**kwargs):
+        calls.append(("command", kwargs))
+        return SimpleNamespace(success=False, output="", error_message="should not be used")
+
+    session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        link_url="https://link",
+        mcpTools=[_Tool()],
+        _get_mcp_server_for_tool=lambda tool_name: "wuying_shell" if tool_name == "shell" else None,
+        command=SimpleNamespace(execute_command=_command_execute),
+    )
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._sessions["sess-123"] = session
+    provider._call_link_url_tool = lambda session, tool_name, args, server_name: (
+        calls.append(("link", {"tool": tool_name, "args": args, "server": server_name}))
+        or AgentBayProvider._provider_exec_result_from_tool_result(
+            SimpleNamespace(
+                success=True,
+                data=json.dumps({"stdout": "/home/wuying\n", "stderr": "", "exit_code": 0}),
+                error_message="",
+            )
+        )
+    )
+
+    result = provider.execute("sess-123", "pwd", timeout_ms=5000, cwd="/home/wuying")
+
+    assert result.output == "/home/wuying\n"
+    assert result.exit_code == 0
+    assert result.error is None
+    assert calls == [
+        (
+            "link",
+            {
+                "tool": "shell",
+                "args": {"command": "pwd", "timeout_ms": 5000, "cwd": "/home/wuying"},
+                "server": "wuying_shell",
+            },
+        )
+    ]
+
+
+def test_get_session_hydrates_sdk_shape_session_from_raw_get_session_metadata(monkeypatch):
+    _install_fake_agentbay_module(monkeypatch)
+    sdk_shape_session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        resource_url="https://resource",
+        mcp_tools=[],
+    )
+    fake_response = SimpleNamespace(
+        to_map=lambda: {
+            "body": {
+                "Success": True,
+                "Data": {
+                    "LinkUrl": "https://link",
+                    "Token": "tok",
+                    "ToolList": [{"Name": "shell", "Server": "wuying_shell"}],
+                },
+            }
+        }
+    )
+    fake_client = SimpleNamespace(
+        api_key="api-key",
+        get=lambda session_id: SimpleNamespace(success=True, session=sdk_shape_session, error_message=""),
+        client=SimpleNamespace(get_session=lambda request: fake_response),
+    )
+    provider = _provider_with_fake_client(fake_client)
+
+    session = provider._get_session("sess-123")
+
+    assert session is sdk_shape_session
+    assert getattr(session, "link_url") == "https://link"
+    assert getattr(session, "token") == "tok"
+    assert len(getattr(session, "mcp_tools")) == 1
+    assert getattr(session, "mcpTools") == getattr(session, "mcp_tools")
+    assert provider._resolve_shell_server(session) == "wuying_shell"
+
+
+def test_execute_prefers_link_url_shell_path_for_sdk_shape_session():
+    calls: list[tuple[str, object]] = []
+
+    def _command_execute(**kwargs):
+        calls.append(("command", kwargs))
+        return SimpleNamespace(success=False, output="", error_message="should not be used")
+
+    session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        link_url="https://link",
+        mcp_tools=[SimpleNamespace(name="shell", server="wuying_shell")],
+        _find_server_for_tool=lambda tool_name: "wuying_shell" if tool_name == "shell" else "",
+        command=SimpleNamespace(execute_command=_command_execute),
+    )
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._sessions["sess-123"] = session
+    provider._call_link_url_tool = lambda session, tool_name, args, server_name: (
+        calls.append(("link", {"tool": tool_name, "args": args, "server": server_name}))
+        or AgentBayProvider._provider_exec_result_from_tool_result(
+            SimpleNamespace(
+                success=True,
+                data=json.dumps({"stdout": "/home/wuying\n", "stderr": "", "exit_code": 0}),
+                error_message="",
+            )
+        )
+    )
+
+    result = provider.execute("sess-123", "pwd", timeout_ms=5000, cwd="/home/wuying")
+
+    assert result.output == "/home/wuying\n"
+    assert result.exit_code == 0
+    assert result.error is None
+    assert calls == [
+        (
+            "link",
+            {
+                "tool": "shell",
+                "args": {"command": "pwd", "timeout_ms": 5000, "cwd": "/home/wuying"},
+                "server": "wuying_shell",
+            },
+        )
+    ]
+
+
+def test_resolve_shell_server_falls_back_to_mcp_tools_when_sdk_resolver_raises():
+    session = SimpleNamespace(
+        mcp_tools=[SimpleNamespace(name="shell", server="wuying_shell")],
+        _find_server_for_tool=lambda tool_name: (_ for _ in ()).throw(StopIteration()),
+    )
+
+    assert AgentBayProvider._resolve_shell_server(session) == "wuying_shell"
+
+
+def test_execute_uses_provider_owned_link_call_instead_of_sdk_private_method():
+    calls: list[tuple[str, object]] = []
+
+    def _sdk_link(*args, **kwargs):
+        raise StopIteration()
+
+    def _provider_link(session: object, tool_name: str, args: dict, server_name: str):
+        calls.append(("provider-link", {"tool": tool_name, "args": args, "server": server_name}))
+        return AgentBayProvider._provider_exec_result_from_tool_result(
+            SimpleNamespace(
+                success=True,
+                data=json.dumps({"stdout": "/home/wuying\n", "stderr": "", "exit_code": 0}),
+                error_message="",
+            )
+        )
+
+    session = SimpleNamespace(
+        session_id="sess-123",
+        token="tok",
+        link_url="https://link",
+        mcp_tools=[SimpleNamespace(name="shell", server="wuying_shell")],
+        _find_server_for_tool=lambda tool_name: "wuying_shell",
+        _call_mcp_tool_link_url=_sdk_link,
+        command=SimpleNamespace(execute_command=lambda **kwargs: None),
+    )
+    provider = _provider_with_fake_client(SimpleNamespace())
+    provider._sessions["sess-123"] = session
+    provider._call_link_url_tool = _provider_link
+
+    result = provider.execute("sess-123", "pwd", timeout_ms=5000, cwd="/home/wuying")
+
+    assert result.output == "/home/wuying\n"
+    assert result.exit_code == 0
+    assert result.error is None
+    assert calls == [
+        (
+            "provider-link",
+            {
+                "tool": "shell",
+                "args": {"command": "pwd", "timeout_ms": 5000, "cwd": "/home/wuying"},
+                "server": "wuying_shell",
+            },
+        )
+    ]
diff --git a/tests/test_chat_session.py b/tests/Unit/sandbox/test_chat_session.py
similarity index 100%
rename from tests/test_chat_session.py
rename to tests/Unit/sandbox/test_chat_session.py
diff --git a/tests/test_daytona_provider.py b/tests/Unit/sandbox/test_daytona_provider.py
similarity index 100%
rename from tests/test_daytona_provider.py
rename to tests/Unit/sandbox/test_daytona_provider.py
diff --git a/tests/Unit/sandbox/test_daytona_provider_proxy.py b/tests/Unit/sandbox/test_daytona_provider_proxy.py
new file mode 100644
index 000000000..32f7f9533
--- /dev/null
+++ b/tests/Unit/sandbox/test_daytona_provider_proxy.py
@@ -0,0 +1,21 @@
+"""Unit tests for Daytona local toolbox URL normalization."""
+
+from sandbox.providers.daytona import DaytonaProvider
+
+
+def test_daytona_provider_rewrites_local_toolbox_proxy_url_to_loopback():
+    provider = object.__new__(DaytonaProvider)
+    provider.api_url = "http://localhost:3986/api"
+
+    rewritten = provider._normalize_toolbox_proxy_url("http://172.18.0.1:4000/toolbox")
+
+    assert rewritten == "http://127.0.0.1:4000/toolbox"
+
+
+def test_daytona_provider_leaves_remote_toolbox_proxy_url_unchanged():
+    provider = object.__new__(DaytonaProvider)
+    provider.api_url = "https://daytona.example.com/api"
+
+    untouched = provider._normalize_toolbox_proxy_url("https://proxy.example.com/toolbox")
+
+    assert untouched == "https://proxy.example.com/toolbox"
diff --git a/tests/test_e2b_provider.py b/tests/Unit/sandbox/test_e2b_provider.py
similarity index 57%
rename from tests/test_e2b_provider.py
rename to tests/Unit/sandbox/test_e2b_provider.py
index 8c88b614d..d64f72663 100644
--- a/tests/test_e2b_provider.py
+++ b/tests/Unit/sandbox/test_e2b_provider.py
@@ -1,16 +1,60 @@
 """Smoke test for E2B provider and sandbox."""
 
+import builtins
 import os
 import sys
+from types import SimpleNamespace
+
+import pytest
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from sandbox.providers.e2b import E2BProvider
 
 
+def test_e2b_provider_requires_sdk(monkeypatch):
+    real_import = builtins.__import__
+
+    def fake_import(name, globals=None, locals=None, fromlist=(), level=0):
+        if name == "e2b":
+            raise ModuleNotFoundError("No module named 'e2b'")
+        return real_import(name, globals, locals, fromlist, level)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+
+    with pytest.raises(ModuleNotFoundError, match="No module named 'e2b'"):
+        E2BProvider(api_key="test-key", timeout=60)
+
+
+def test_e2b_create_session_bootstraps_workspace_files_dir(monkeypatch):
+    calls: list[tuple[str, str | None, float | None]] = []
+
+    class _FakeCommands:
+        def run(self, command, cwd=None, timeout=None):
+            calls.append((command, cwd, timeout))
+            return SimpleNamespace(stdout="", stderr="", exit_code=0)
+
+    class _FakeSandbox:
+        def __init__(self):
+            self.sandbox_id = "sbx-123"
+            self.commands = _FakeCommands()
+
+        @classmethod
+        def beta_create(cls, template, timeout, auto_pause, api_key):
+            return cls()
+
+    monkeypatch.setitem(sys.modules, "e2b", SimpleNamespace(Sandbox=_FakeSandbox))
+
+    provider = E2BProvider(api_key="test-key", timeout=60)
+    info = provider.create_session()
+
+    assert info.session_id == "sbx-123"
+    assert calls == [("mkdir -p /home/user/workspace/files", "/home/user", 10.0)]
+
+
 def test_e2b_provider():
     api_key = os.getenv("E2B_API_KEY")
-    if not api_key:
+    if not api_key or not api_key.startswith("e2b_"):
         print("E2B_API_KEY not set, skipping")
         return
 
diff --git a/tests/test_lease.py b/tests/Unit/sandbox/test_lease.py
similarity index 100%
rename from tests/test_lease.py
rename to tests/Unit/sandbox/test_lease.py
diff --git a/tests/test_lifecycle.py b/tests/Unit/sandbox/test_lifecycle.py
similarity index 100%
rename from tests/test_lifecycle.py
rename to tests/Unit/sandbox/test_lifecycle.py
diff --git a/tests/Unit/sandbox/test_local_provider_metrics.py b/tests/Unit/sandbox/test_local_provider_metrics.py
new file mode 100644
index 000000000..1cb1daabc
--- /dev/null
+++ b/tests/Unit/sandbox/test_local_provider_metrics.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+import builtins
+import io
+from types import SimpleNamespace
+
+from sandbox.providers import local as local_module
+from sandbox.providers.local import LocalSessionProvider
+
+
+def test_local_provider_reads_linux_procfs_metrics_without_top_or_free(monkeypatch) -> None:
+    provider = LocalSessionProvider()
+
+    cpu_samples = iter(
+        [
+            "cpu  100 0 100 800 0 0 0 0 0 0\n",
+            "cpu  130 0 120 850 0 0 0 0 0 0\n",
+        ]
+    )
+
+    def fake_open(path: str, *args, **kwargs):
+        if path == "/proc/stat":
+            return io.StringIO(next(cpu_samples))
+        if path == "/proc/meminfo":
+            return io.StringIO("MemTotal:       1048576 kB\nMemAvailable:    524288 kB\n")
+        raise FileNotFoundError(path)
+
+    monkeypatch.setattr("sandbox.providers.local.platform.system", lambda: "Linux")
+    monkeypatch.setattr(builtins, "open", fake_open)
+    monkeypatch.setattr(
+        local_module.os,
+        "statvfs",
+        lambda _path: SimpleNamespace(f_frsize=4096, f_blocks=262144, f_bavail=131072),
+        raising=False,
+    )
+
+    metrics = provider.get_metrics("host")
+
+    assert metrics is not None
+    assert metrics.cpu_percent == 50.0
+    assert metrics.memory_total_mb == 1024.0
+    assert metrics.memory_used_mb == 512.0
+    assert metrics.disk_total_gb == 1.0
+    assert metrics.disk_used_gb == 0.5
diff --git a/tests/Unit/sandbox/test_remote_sandbox_init_commands.py b/tests/Unit/sandbox/test_remote_sandbox_init_commands.py
new file mode 100644
index 000000000..72ad58a1e
--- /dev/null
+++ b/tests/Unit/sandbox/test_remote_sandbox_init_commands.py
@@ -0,0 +1,32 @@
+from types import SimpleNamespace
+
+import pytest
+
+from sandbox.base import RemoteSandbox
+from sandbox.config import SandboxConfig
+
+
+class _RecordingCommand:
+    def __init__(self) -> None:
+        self.calls: list[str] = []
+
+    async def execute(self, command: str):
+        self.calls.append(command)
+        return SimpleNamespace(exit_code=0, stderr="", stdout="")
+
+
+@pytest.mark.asyncio
+async def test_run_init_commands_avoids_same_loop_threadsafe_wait(monkeypatch: pytest.MonkeyPatch):
+    command = _RecordingCommand()
+    capability = SimpleNamespace(command=command)
+    sandbox = RemoteSandbox.__new__(RemoteSandbox)
+    sandbox._config = SandboxConfig(init_commands=["echo init"])
+
+    def _unexpected_threadsafe(*args, **kwargs):
+        raise AssertionError("same-loop run_coroutine_threadsafe path should not be used")
+
+    monkeypatch.setattr("sandbox.base.asyncio.run_coroutine_threadsafe", _unexpected_threadsafe)
+
+    sandbox._run_init_commands(capability)
+
+    assert command.calls == ["echo init"]
diff --git a/tests/Unit/sandbox/test_sandbox_manager_volume_repo.py b/tests/Unit/sandbox/test_sandbox_manager_volume_repo.py
new file mode 100644
index 000000000..82b9c76eb
--- /dev/null
+++ b/tests/Unit/sandbox/test_sandbox_manager_volume_repo.py
@@ -0,0 +1,536 @@
+import json
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+import sandbox.manager as sandbox_manager_module
+from sandbox.manager import SandboxManager
+from sandbox.providers.local import LocalSessionProvider
+from sandbox.volume_source import DaytonaVolume, HostVolume
+
+
+class _FakeVolumeRepo:
+    def __init__(self, source: dict[str, str]) -> None:
+        self._source = source
+        self.closed = False
+        self.requested_ids: list[str] = []
+        self.created: list[tuple[str, str | None]] = []
+
+    def get(self, volume_id: str):
+        self.requested_ids.append(volume_id)
+        if self.created and volume_id == self.created[-1][0]:
+            return {"source": json.dumps(self._source)}
+        return {"source": json.dumps(self._source)}
+
+    def create(self, volume_id: str, source_json: str, name: str | None, created_at: str) -> None:
+        self.created.append((volume_id, name))
+        self._source = json.loads(source_json)
+
+    def close(self) -> None:
+        self.closed = True
+
+
+class _FakeVolume:
+    def __init__(self) -> None:
+        self.mount_calls: list[tuple[str, str]] = []
+        self.upload_calls: list[tuple[str, str]] = []
+        self.download_calls: list[tuple[str, str]] = []
+        self.cleared: list[str] = []
+
+    def resolve_mount_path(self) -> str:
+        return "/workspace"
+
+    def mount(self, thread_id: str, source, remote_path: str) -> None:
+        self.mount_calls.append((thread_id, remote_path))
+
+    def mount_managed_volume(self, thread_id: str, volume_name: str, remote_path: str) -> None:
+        self.mount_calls.append((thread_id, remote_path))
+
+    def sync_upload(self, thread_id: str, session_id: str, source, remote_path: str, files=None) -> None:
+        self.upload_calls.append((thread_id, session_id))
+
+    def sync_download(self, thread_id: str, session_id: str, source, remote_path: str) -> None:
+        self.download_calls.append((thread_id, session_id))
+
+    def clear_sync_state(self, thread_id: str) -> None:
+        self.cleared.append(thread_id)
+
+
+class _FakeThreadRepo:
+    def __init__(self, row):
+        self._row = row
+        self.closed = False
+
+    def get_by_id(self, _thread_id: str):
+        return self._row
+
+    def close(self) -> None:
+        self.closed = True
+
+
+class _FakeUpdateRepo:
+    def __init__(self) -> None:
+        self.updated: list[tuple[str, str]] = []
+        self.closed = False
+
+    def update_source(self, volume_id: str, source_json: str) -> None:
+        self.updated.append((volume_id, source_json))
+
+    def close(self) -> None:
+        self.closed = True
+
+
+class _FakeLeaseStore:
+    def __init__(self) -> None:
+        self.volume_updates: list[tuple[str, str]] = []
+
+    def set_volume_id(self, lease_id: str, volume_id: str) -> None:
+        self.volume_updates.append((lease_id, volume_id))
+
+
+class _FakeSessionManager:
+    def __init__(self, active_rows) -> None:
+        self._active_rows = active_rows
+        self.deleted: list[tuple[str, str]] = []
+
+    def list_active(self):
+        return list(self._active_rows)
+
+    def delete(self, session_id: str, reason: str) -> None:
+        self.deleted.append((session_id, reason))
+
+
+class _FakeDaytonaProvider:
+    def __init__(self) -> None:
+        self.calls: list[tuple[str, str]] = []
+        self.ready_waits: list[str] = []
+
+    def create_managed_volume(self, member_id: str, mount_path: str) -> str:
+        self.calls.append((member_id, mount_path))
+        return f"leon-volume-{member_id}"
+
+    def wait_managed_volume_ready(self, volume_name: str) -> None:
+        self.ready_waits.append(volume_name)
+
+
+def test_setup_mounts_reads_volume_from_active_storage_repo(tmp_path):
+    manager = object.__new__(SandboxManager)
+    manager.provider_capability = SimpleNamespace(runtime_kind="local")
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    manager._get_lease = lambda _lease_id: SimpleNamespace(volume_id="volume-1")
+    repo = _FakeVolumeRepo(HostVolume(Path(tmp_path) / "vol").serialize())
+    manager._sandbox_volume_repo = lambda: repo
+
+    result = manager._setup_mounts("thread-1")
+
+    assert repo.requested_ids == ["volume-1"]
+    assert repo.closed is True
+    assert isinstance(result["source"], HostVolume)
+    assert manager.volume.mount_calls == [("thread-1", "/workspace")]
+
+
+def test_resolve_volume_source_reads_volume_from_active_storage_repo(tmp_path):
+    manager = object.__new__(SandboxManager)
+    manager.provider_capability = SimpleNamespace(runtime_kind="agentbay")
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    manager._get_lease = lambda _lease_id: SimpleNamespace(volume_id="volume-1")
+    repo = _FakeVolumeRepo(HostVolume(Path(tmp_path) / "vol").serialize())
+    manager._sandbox_volume_repo = lambda: repo
+
+    source = manager.resolve_volume_source("thread-1")
+
+    assert repo.requested_ids == ["volume-1"]
+    assert repo.closed is True
+    assert isinstance(source, HostVolume)
+
+
+def test_setup_mounts_provisions_missing_remote_volume_metadata(monkeypatch, tmp_path):
+    manager = object.__new__(SandboxManager)
+    manager.provider_capability = SimpleNamespace(runtime_kind="agentbay")
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    lease = SimpleNamespace(lease_id="lease-1", volume_id=None)
+    manager._get_lease = lambda _lease_id: lease
+    manager.lease_store = _FakeLeaseStore()
+    repo = _FakeVolumeRepo(HostVolume(Path(tmp_path) / "vol").serialize())
+    manager._sandbox_volume_repo = lambda: repo
+    monkeypatch.setenv("LEON_SANDBOX_VOLUME_ROOT", str(tmp_path / "volumes"))
+
+    result = manager._setup_mounts("thread-1")
+
+    assert lease.volume_id is not None
+    assert repo.created == [(lease.volume_id, "vol-thread-1")]
+    assert manager.lease_store.volume_updates == [("lease-1", lease.volume_id)]
+    assert repo.requested_ids == [lease.volume_id]
+    assert isinstance(result["source"], HostVolume)
+
+
+def test_setup_mounts_recreates_missing_remote_volume_row_for_existing_volume_id(monkeypatch, tmp_path):
+    class _MissingRowRepo(_FakeVolumeRepo):
+        def __init__(self) -> None:
+            super().__init__(HostVolume(tmp_path / "vol").serialize())
+            self._rows: dict[str, dict[str, str]] = {}
+
+        def get(self, volume_id: str):
+            self.requested_ids.append(volume_id)
+            return self._rows.get(volume_id)
+
+        def create(self, volume_id: str, source_json: str, name: str | None, created_at: str) -> None:
+            super().create(volume_id, source_json, name, created_at)
+            self._rows[volume_id] = {"source": source_json}
+
+        def update_source(self, volume_id: str, source_json: str) -> None:
+            self._rows[volume_id] = {"source": source_json}
+            self._source = json.loads(source_json)
+
+    manager = object.__new__(SandboxManager)
+    manager.provider_capability = SimpleNamespace(runtime_kind="daytona_pty")
+    manager.provider = _FakeDaytonaProvider()
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(lease_id="lease-1")
+    lease = SimpleNamespace(lease_id="lease-1", volume_id="volume-missing")
+    manager._get_lease = lambda _lease_id: lease
+    manager.lease_store = _FakeLeaseStore()
+    repo = _MissingRowRepo()
+    manager._sandbox_volume_repo = lambda: repo
+    thread_repo = _FakeThreadRepo({"member_id": "member-daytona"})
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "build_thread_repo",
+        lambda **_kwargs: thread_repo,
+        raising=False,
+    )
+    monkeypatch.setenv("LEON_SANDBOX_VOLUME_ROOT", str(tmp_path / "volumes"))
+
+    result = manager._setup_mounts("thread-1")
+
+    assert repo.created == [("volume-missing", "vol-thread-1")]
+    assert manager.lease_store.volume_updates == []
+    assert repo.requested_ids == ["volume-missing", "volume-missing"]
+    assert isinstance(result["source"], DaytonaVolume)
+    assert manager.provider.calls == [("member-daytona", "/workspace")]
+    assert thread_repo.closed is True
+
+
+def test_enforce_idle_timeouts_destroys_when_provider_cannot_pause(monkeypatch):
+    manager = object.__new__(SandboxManager)
+    manager.provider = SimpleNamespace(
+        name="agentbay",
+        get_capability=lambda: SimpleNamespace(can_pause=False, can_destroy=True),
+    )
+    manager.terminal_store = SimpleNamespace(
+        db_path=Path("/tmp/fake-sandbox.db"),
+        get_by_id=lambda _terminal_id: {"terminal_id": "term-1", "lease_id": "lease-1"},
+    )
+    active_rows = [
+        {
+            "session_id": "sess-1",
+            "thread_id": "thread-1",
+            "terminal_id": "term-1",
+            "lease_id": "lease-1",
+            "started_at": "2026-04-04T00:00:00",
+            "last_active_at": "2026-04-04T00:00:00",
+            "idle_ttl_sec": 1,
+            "max_duration_sec": 3600,
+            "status": "active",
+        }
+    ]
+    manager.session_manager = _FakeSessionManager(active_rows)
+    fake_lease = SimpleNamespace(
+        lease_id="lease-1",
+        provider_name="agentbay",
+        refresh_instance_status=lambda _provider: "running",
+        pause_instance=lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("pause should not be used")),
+        destroy_instance=lambda *_args, **_kwargs: destroy_calls.append(True),
+    )
+    destroy_calls: list[bool] = []
+    manager._get_lease = lambda _lease_id: fake_lease
+    manager._terminal_is_busy = lambda _terminal_id: False
+    manager._lease_is_busy = lambda _lease_id: False
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "terminal_from_row",
+        lambda _row, _db_path: SimpleNamespace(terminal_id="term-1", lease_id="lease-1"),
+    )
+
+    manager.enforce_idle_timeouts()
+
+    assert destroy_calls == [True]
+    assert manager.session_manager.deleted == [("sess-1", "idle_timeout")]
+
+
+def test_destroy_thread_resources_skips_local_sync_when_lease_has_no_volume_id():
+    manager = object.__new__(SandboxManager)
+    manager.provider_capability = SimpleNamespace(runtime_kind="local")
+    manager.provider = SimpleNamespace(name="local")
+    manager.volume = _FakeVolume()
+    manager._get_thread_lease = lambda _thread_id: lease
+    manager._get_lease = lambda _lease_id: lease
+    manager._resolve_volume_entry = lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("volume lookup should not happen"))
+    manager.terminal_store = SimpleNamespace(
+        list_by_thread=lambda _thread_id: [{"terminal_id": "term-1", "lease_id": "lease-1", "thread_id": "thread-1"}],
+        delete=lambda _terminal_id: deleted_terminals.append(_terminal_id),
+        list_all=lambda: [],
+        db_path=Path("/tmp/fake-sandbox.db"),
+    )
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: SimpleNamespace(session_id="sess-1"),
+        delete=lambda session_id, reason: deleted_sessions.append((session_id, reason)),
+    )
+    deleted_terminals: list[str] = []
+    deleted_sessions: list[tuple[str, str]] = []
+    destroy_calls: list[str] = []
+
+    class _Lease:
+        lease_id = "lease-1"
+        observed_state = "running"
+        volume_id = None
+
+        def get_instance(self):
+            return SimpleNamespace(instance_id="instance-1")
+
+        def destroy_instance(self, _provider):
+            destroy_calls.append("lease-1")
+
+    lease = _Lease()
+    manager.lease_store = SimpleNamespace(delete=lambda lease_id: deleted_leases.append(lease_id))
+    deleted_leases: list[str] = []
+
+    assert manager.destroy_thread_resources("thread-1") is True
+    assert manager.volume.download_calls == []
+    assert manager.volume.cleared == ["thread-1"]
+    assert deleted_sessions == [("sess-1", "thread_deleted")]
+    assert deleted_terminals == ["term-1"]
+    assert destroy_calls == ["lease-1"]
+    assert deleted_leases == ["lease-1"]
+
+
+def test_sync_uploads_skips_local_volume_sync_when_lease_has_no_volume_id():
+    manager = object.__new__(SandboxManager)
+    manager.provider_capability = SimpleNamespace(runtime_kind="local")
+    manager.volume = _FakeVolume()
+    manager._get_active_terminal = lambda _thread_id: SimpleNamespace(terminal_id="term-1", lease_id="lease-1")
+    manager._get_lease = lambda _lease_id: SimpleNamespace(volume_id=None)
+    manager._get_thread_lease = lambda _thread_id: SimpleNamespace(volume_id=None)
+    manager._resolve_volume_entry = lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("volume lookup should not happen"))
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: SimpleNamespace(
+            lease=SimpleNamespace(get_instance=lambda: SimpleNamespace(instance_id="instance-1"))
+        )
+    )
+
+    assert manager.sync_uploads("thread-1") is True
+    assert manager.volume.upload_calls == []
+
+
+def test_get_sandbox_local_provider_does_not_require_volume_bootstrap(tmp_path):
+    manager = SandboxManager(
+        provider=LocalSessionProvider(default_cwd=str(tmp_path)),
+        db_path=tmp_path / "sandbox.db",
+    )
+
+    capability = manager.get_sandbox("thread-local")
+
+    assert capability.command.runtime_owns_cwd is True
+    session = manager.session_manager.get("thread-local")
+    assert session is not None
+    assert session.lease.provider_name == "local"
+
+
+def test_get_sandbox_auto_resumes_paused_lease_when_reconstructing_session():
+    manager = object.__new__(SandboxManager)
+    manager.provider = SimpleNamespace(name="local")
+    manager.provider_capability = SimpleNamespace(runtime_kind="local", eager_instance_binding=False)
+    manager.volume = _FakeVolume()
+    terminal = SimpleNamespace(
+        terminal_id="term-1",
+        lease_id="lease-1",
+        get_state=lambda: SimpleNamespace(cwd="/tmp", env_delta={}, state_version=0),
+        update_state=lambda _state: None,
+    )
+    lease = SimpleNamespace(
+        provider_name="local",
+        observed_state="paused",
+        bind_mounts=None,
+        recipe=None,
+        get_instance=lambda: SimpleNamespace(instance_id="instance-1"),
+    )
+    manager._get_active_terminal = lambda _thread_id: terminal
+    manager._get_lease = lambda _lease_id: lease
+    manager._assert_lease_provider = lambda _lease, _thread_id: None
+    manager._ensure_bound_instance = lambda _lease: None
+    resume_calls: list[tuple[str, str]] = []
+    manager.resume_session = lambda thread_id, source="user_resume": resume_calls.append((thread_id, source)) or True
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: None,
+        create=lambda **_kwargs: SimpleNamespace(session_id="sess-1", terminal=terminal, lease=lease),
+    )
+
+    manager.get_sandbox("thread-1")
+
+    assert resume_calls == [("thread-1", "auto_resume")]
+
+
+def test_get_sandbox_auto_resumes_live_session_when_lease_state_is_paused():
+    manager = object.__new__(SandboxManager)
+    terminal = SimpleNamespace(
+        terminal_id="term-1",
+        lease_id="lease-1",
+        get_state=lambda: SimpleNamespace(cwd="/tmp", env_delta={}, state_version=0),
+    )
+    paused_lease = SimpleNamespace(
+        lease_id="lease-1",
+        provider_name="local",
+        observed_state="paused",
+        bind_mounts=None,
+    )
+    resumed_lease = SimpleNamespace(
+        lease_id="lease-1",
+        provider_name="local",
+        observed_state="running",
+        bind_mounts=None,
+    )
+    live_session = SimpleNamespace(
+        terminal=terminal,
+        lease=paused_lease,
+        status="active",
+    )
+
+    manager.provider = SimpleNamespace(name="local")
+    manager.provider_capability = SimpleNamespace(runtime_kind="local", eager_instance_binding=False)
+    manager.volume = _FakeVolume()
+    manager._assert_lease_provider = lambda _lease, _thread_id: None
+    manager._ensure_bound_instance = lambda _lease: None
+    resume_calls: list[tuple[str, str]] = []
+
+    def _get_session(_thread_id, _terminal_id):
+        if resume_calls:
+            return SimpleNamespace(terminal=terminal, lease=resumed_lease, status="active")
+        return live_session
+
+    manager._get_active_terminal = lambda _thread_id: terminal
+    manager.resume_session = lambda thread_id, source="user_resume": resume_calls.append((thread_id, source)) or True
+    manager.session_manager = SimpleNamespace(get=_get_session)
+
+    capability = manager.get_sandbox("thread-1")
+
+    assert resume_calls == [("thread-1", "auto_resume")]
+    assert capability._session.lease is resumed_lease
+
+
+def test_resume_session_rebinds_live_session_lease_after_resume():
+    manager = object.__new__(SandboxManager)
+    terminal = SimpleNamespace(terminal_id="term-1", lease_id="lease-1")
+    resumed_lease = SimpleNamespace(
+        lease_id="lease-1",
+        observed_state="running",
+        get_instance=lambda: SimpleNamespace(instance_id="instance-1"),
+        resume_instance=lambda _provider, source="user_resume": True,
+    )
+    stale_lease = SimpleNamespace(lease_id="lease-1", observed_state="paused")
+    runtime = SimpleNamespace(lease=stale_lease)
+    live_session = SimpleNamespace(
+        session_id="sess-1",
+        terminal=terminal,
+        lease=stale_lease,
+        runtime=runtime,
+        status="paused",
+    )
+    manager.provider = SimpleNamespace(name="local")
+    manager._get_thread_terminals = lambda _thread_id: [terminal]
+    manager._get_thread_lease = lambda _thread_id: resumed_lease
+    manager._sync_to_sandbox = lambda *_args, **_kwargs: None
+    manager._ensure_chat_session = lambda _thread_id: None
+    manager.session_manager = SimpleNamespace(
+        get=lambda _thread_id, _terminal_id: live_session,
+        resume=lambda _session_id: setattr(live_session, "status", "active"),
+    )
+
+    ok = manager.resume_session("thread-1", source="auto_resume")
+
+    assert ok is True
+    assert live_session.lease is resumed_lease
+    assert runtime.lease is resumed_lease
+
+
+def test_upgrade_to_daytona_volume_uses_runtime_thread_repo_for_member_lookup(monkeypatch, tmp_path):
+    manager = object.__new__(SandboxManager)
+    manager.provider = _FakeDaytonaProvider()
+    update_repo = _FakeUpdateRepo()
+    manager._sandbox_volume_repo = lambda: update_repo
+
+    thread_repo = _FakeThreadRepo({"member_id": "member-supabase"})
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "build_thread_repo",
+        lambda **_kwargs: thread_repo,
+        raising=False,
+    )
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
+
+    new_source = manager._upgrade_to_daytona_volume(
+        "thread-supabase",
+        HostVolume(tmp_path / "staging"),
+        "volume-1",
+        "/workspace",
+    )
+
+    assert manager.provider.calls == [("member-supabase", "/workspace")]
+    assert thread_repo.closed is True
+    assert isinstance(new_source, DaytonaVolume)
+    assert update_repo.closed is True
+    assert update_repo.updated
+
+
+def test_upgrade_to_daytona_volume_waits_when_reusing_existing_daytona_volume(monkeypatch, tmp_path):
+    manager = object.__new__(SandboxManager)
+    provider = _FakeDaytonaProvider()
+    update_repo = _FakeUpdateRepo()
+    manager.provider = provider
+    manager._sandbox_volume_repo = lambda: update_repo
+
+    thread_repo = _FakeThreadRepo({"member_id": "member-supabase"})
+    monkeypatch.setattr(
+        sandbox_manager_module,
+        "build_thread_repo",
+        lambda **_kwargs: thread_repo,
+        raising=False,
+    )
+
+    def _already_exists(member_id: str, mount_path: str) -> str:
+        provider.calls.append((member_id, mount_path))
+        raise RuntimeError("volume already exists")
+
+    provider.create_managed_volume = _already_exists
+
+    new_source = manager._upgrade_to_daytona_volume(
+        "thread-supabase",
+        HostVolume(tmp_path / "staging"),
+        "volume-1",
+        "/workspace",
+    )
+
+    assert isinstance(new_source, DaytonaVolume)
+    assert provider.ready_waits == ["leon-volume-member-supabase"]
+
+
+@pytest.mark.parametrize(
+    ("strategy", "expected_class_name"),
+    [
+        ("sqlite", "SQLiteSandboxMonitorRepo"),
+        ("supabase", "SQLiteSandboxMonitorRepo"),
+    ],
+)
+def test_make_sandbox_monitor_repo_uses_runtime_sandbox_db(monkeypatch, strategy, expected_class_name):
+    from backend.web.core import storage_factory
+
+    monkeypatch.setenv("LEON_STORAGE_STRATEGY", strategy)
+    storage_factory.make_sandbox_monitor_repo.cache_clear() if hasattr(storage_factory.make_sandbox_monitor_repo, "cache_clear") else None
+
+    repo = storage_factory.make_sandbox_monitor_repo()
+    try:
+        assert repo.__class__.__name__ == expected_class_name
+    finally:
+        repo.close()
diff --git a/tests/test_sandbox_state.py b/tests/Unit/sandbox/test_sandbox_state.py
similarity index 100%
rename from tests/test_sandbox_state.py
rename to tests/Unit/sandbox/test_sandbox_state.py
diff --git a/tests/test_terminal.py b/tests/Unit/sandbox/test_terminal.py
similarity index 100%
rename from tests/test_terminal.py
rename to tests/Unit/sandbox/test_terminal.py
diff --git a/tests/test_terminal_persistence.py b/tests/Unit/sandbox/test_terminal_persistence.py
similarity index 100%
rename from tests/test_terminal_persistence.py
rename to tests/Unit/sandbox/test_terminal_persistence.py
diff --git a/tests/test_checkpoint_repo.py b/tests/Unit/storage/test_checkpoint_repo.py
similarity index 100%
rename from tests/test_checkpoint_repo.py
rename to tests/Unit/storage/test_checkpoint_repo.py
diff --git a/tests/test_eval_repo.py b/tests/Unit/storage/test_eval_repo.py
similarity index 100%
rename from tests/test_eval_repo.py
rename to tests/Unit/storage/test_eval_repo.py
diff --git a/tests/test_file_operation_repo.py b/tests/Unit/storage/test_file_operation_repo.py
similarity index 100%
rename from tests/test_file_operation_repo.py
rename to tests/Unit/storage/test_file_operation_repo.py
diff --git a/tests/test_run_event_repo.py b/tests/Unit/storage/test_run_event_repo.py
similarity index 100%
rename from tests/test_run_event_repo.py
rename to tests/Unit/storage/test_run_event_repo.py
diff --git a/tests/test_sqlite_kernel.py b/tests/Unit/storage/test_sqlite_kernel.py
similarity index 100%
rename from tests/test_sqlite_kernel.py
rename to tests/Unit/storage/test_sqlite_kernel.py
diff --git a/tests/Unit/storage/test_storage_container_contract.py b/tests/Unit/storage/test_storage_container_contract.py
new file mode 100644
index 000000000..503f9dd3a
--- /dev/null
+++ b/tests/Unit/storage/test_storage_container_contract.py
@@ -0,0 +1,82 @@
+from pathlib import Path
+
+import pytest
+
+from storage import StorageContainer
+from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
+from storage.providers.sqlite.eval_repo import SQLiteEvalRepo
+from storage.providers.supabase.checkpoint_repo import SupabaseCheckpointRepo
+from storage.providers.supabase.eval_repo import SupabaseEvalRepo
+from storage.providers.supabase.file_operation_repo import SupabaseFileOperationRepo
+from storage.providers.supabase.run_event_repo import SupabaseRunEventRepo
+from storage.providers.supabase.summary_repo import SupabaseSummaryRepo
+
+
+class _FakeSupabaseClient:
+    def table(self, table_name: str):
+        raise AssertionError(f"table() should not be called in this container test: {table_name}")
+
+
+def test_storage_container_sqlite_strategy_uses_sqlite_checkpoint_repo(tmp_path: Path) -> None:
+    container = StorageContainer(main_db_path=tmp_path / "leon.db", strategy="sqlite")
+    assert isinstance(container.checkpoint_repo(), SQLiteCheckpointRepo)
+
+
+def test_storage_container_supabase_strategy_builds_concrete_repos() -> None:
+    container = StorageContainer(strategy="supabase", supabase_client=_FakeSupabaseClient())
+
+    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
+    assert isinstance(container.run_event_repo(), SupabaseRunEventRepo)
+    assert isinstance(container.file_operation_repo(), SupabaseFileOperationRepo)
+    assert isinstance(container.summary_repo(), SupabaseSummaryRepo)
+    assert isinstance(container.eval_repo(), SupabaseEvalRepo)
+
+
+@pytest.mark.parametrize(
+    ("strategy", "repo_providers", "repo_method", "expected_type"),
+    [
+        ("sqlite", {"checkpoint_repo": "supabase"}, "checkpoint_repo", SupabaseCheckpointRepo),
+        ("supabase", {"eval_repo": "sqlite"}, "eval_repo", SQLiteEvalRepo),
+    ],
+)
+def test_storage_container_repo_level_overrides(
+    strategy: str,
+    repo_providers: dict[str, str],
+    repo_method: str,
+    expected_type: type,
+) -> None:
+    container = StorageContainer(
+        strategy=strategy,
+        repo_providers=repo_providers,
+        supabase_client=_FakeSupabaseClient(),
+    )
+    assert isinstance(getattr(container, repo_method)(), expected_type)
+
+
+@pytest.mark.parametrize(
+    ("repo_method", "message"),
+    [
+        ("checkpoint_repo", "Supabase strategy checkpoint_repo requires supabase_client"),
+        ("run_event_repo", "Supabase strategy run_event_repo requires supabase_client"),
+        ("file_operation_repo", "Supabase strategy file_operation_repo requires supabase_client"),
+        ("summary_repo", "Supabase strategy summary_repo requires supabase_client"),
+        ("eval_repo", "Supabase strategy eval_repo requires supabase_client"),
+    ],
+)
+def test_storage_container_supabase_repos_require_client(repo_method: str, message: str) -> None:
+    container = StorageContainer(strategy="supabase")
+    with pytest.raises(RuntimeError, match=message):
+        getattr(container, repo_method)()
+
+
+@pytest.mark.parametrize(
+    ("kwargs", "message"),
+    [
+        ({"strategy": "redis"}, "Unsupported storage strategy: redis. Supported strategies: sqlite, supabase"),
+        ({"repo_providers": {"foo_repo": "sqlite"}}, "Unknown repo provider bindings: foo_repo"),
+        ({"repo_providers": {"checkpoint_repo": "mysql"}}, "Unsupported provider for checkpoint_repo"),
+    ],
+)
+def test_storage_container_rejects_invalid_configuration(kwargs: dict[str, object], message: str) -> None:
+    with pytest.raises(ValueError, match=message):
+        StorageContainer(**kwargs)  # type: ignore[arg-type]
diff --git a/tests/test_summary_repo.py b/tests/Unit/storage/test_summary_repo.py
similarity index 100%
rename from tests/test_summary_repo.py
rename to tests/Unit/storage/test_summary_repo.py
diff --git a/tests/middleware/memory/test_summary_store.py b/tests/Unit/storage/test_summary_store.py
similarity index 100%
rename from tests/middleware/memory/test_summary_store.py
rename to tests/Unit/storage/test_summary_store.py
diff --git a/tests/Unit/storage/test_supabase_chat_repo.py b/tests/Unit/storage/test_supabase_chat_repo.py
new file mode 100644
index 000000000..95422182d
--- /dev/null
+++ b/tests/Unit/storage/test_supabase_chat_repo.py
@@ -0,0 +1,98 @@
+from storage.providers.supabase.chat_repo import SupabaseChatMessageRepo
+from tests.fakes.supabase import FakeSupabaseClient
+
+
+def test_supabase_chat_message_repo_has_unread_mention_tracks_mentions_after_last_read():
+    tables = {
+        "chat_entities": [
+            {
+                "chat_id": "chat-1",
+                "user_id": "entity-target",
+                "joined_at": 1.0,
+                "last_read_at": 5.0,
+            }
+        ],
+        "chat_messages": [
+            {
+                "id": "msg-old",
+                "chat_id": "chat-1",
+                "sender_id": "entity-other",
+                "content": "old mention",
+                "mentions": '["entity-target"]',
+                "created_at": 4.0,
+            },
+            {
+                "id": "msg-self",
+                "chat_id": "chat-1",
+                "sender_id": "entity-target",
+                "content": "self mention",
+                "mentions": '["entity-target"]',
+                "created_at": 6.0,
+            },
+            {
+                "id": "msg-unread",
+                "chat_id": "chat-1",
+                "sender_id": "entity-other",
+                "content": "new mention",
+                "mentions": '["entity-target"]',
+                "created_at": 7.0,
+            },
+            {
+                "id": "msg-unread-no-mention",
+                "chat_id": "chat-1",
+                "sender_id": "entity-other",
+                "content": "plain unread",
+                "mentions": "[]",
+                "created_at": 8.0,
+            },
+        ],
+    }
+    repo = SupabaseChatMessageRepo(FakeSupabaseClient(tables))
+
+    assert repo.has_unread_mention("chat-1", "entity-target") is True
+
+
+def test_supabase_chat_message_repo_has_unread_mention_false_without_matching_unread_mentions():
+    tables = {
+        "chat_entities": [
+            {
+                "chat_id": "chat-1",
+                "user_id": "entity-target",
+                "joined_at": 1.0,
+                "last_read_at": 5.0,
+            }
+        ],
+        "chat_messages": [
+            {
+                "id": "msg-unread",
+                "chat_id": "chat-1",
+                "sender_id": "entity-other",
+                "content": "plain unread",
+                "mentions": "[]",
+                "created_at": 7.0,
+            }
+        ],
+    }
+    repo = SupabaseChatMessageRepo(FakeSupabaseClient(tables))
+
+    assert repo.has_unread_mention("chat-1", "entity-target") is False
+
+
+def test_supabase_chat_message_repo_has_unread_mention_false_without_membership_row():
+    tables = {
+        "chat_entities": [],
+        "chat_messages": [
+            {
+                "id": "msg-unread",
+                "chat_id": "chat-1",
+                "sender_id": "entity-other",
+                "content": "new mention",
+                "mentions": '["entity-target"]',
+                "created_at": 7.0,
+            }
+        ],
+    }
+    repo = SupabaseChatMessageRepo(FakeSupabaseClient(tables))
+
+    assert repo.count_unread("chat-1", "entity-target") == 0
+    assert repo.has_unread_mention("chat-1", "entity-target") is False
diff --git a/tests/Unit/storage/test_supabase_entity_repo.py b/tests/Unit/storage/test_supabase_entity_repo.py
new file mode 100644
index 000000000..3a9180e0d
--- /dev/null
+++ b/tests/Unit/storage/test_supabase_entity_repo.py
@@ -0,0 +1,31 @@
+from storage.providers.supabase.entity_repo import SupabaseEntityRepo
+from tests.fakes.supabase import FakeSupabaseClient
+
+
+def test_supabase_entity_repo_get_by_thread_id_returns_matching_entity():
+    tables = {
+        "entities": [
+            {
+                "id": "entity-1",
+                "type": "agent",
+                "member_id": "member-1",
+                "name": "worker-1",
+                "avatar": None,
+                "thread_id": "thread-1",
+                "created_at": 1.0,
+            }
+        ]
+    }
+    repo = SupabaseEntityRepo(FakeSupabaseClient(tables))
+
+    row = repo.get_by_thread_id("thread-1")
+
+    assert row is not None
+    assert row.id == "entity-1"
+    assert row.thread_id == "thread-1"
+
+
+def test_supabase_entity_repo_get_by_thread_id_returns_none_when_missing():
+    repo = SupabaseEntityRepo(FakeSupabaseClient({"entities": []}))
+
+    assert repo.get_by_thread_id("thread-missing") is None
diff --git a/tests/Unit/storage/test_supabase_thread_repo.py b/tests/Unit/storage/test_supabase_thread_repo.py
new file mode 100644
index 000000000..7f684797b
--- /dev/null
+++ b/tests/Unit/storage/test_supabase_thread_repo.py
@@ -0,0 +1,74 @@
+from storage.providers.supabase.thread_repo import SupabaseThreadRepo
+
+
+class _FakeTable:
+    def __init__(self) -> None:
+        self.insert_payload = None
+        self.update_payload = None
+        self.eq_calls: list[tuple[str, object]] = []
+        self.rows = [
+            {
+                "id": "thread-1",
+                "member_id": "member-1",
+                "sandbox_type": "local",
+                "model": None,
+                "cwd": None,
+                "observation_provider": None,
+                "is_main": 1,
+                "branch_index": 0,
+                "created_at": 1.0,
+            }
+        ]
+
+    def insert(self, payload):
+        self.insert_payload = payload
+        return self
+
+    def update(self, payload):
+        self.update_payload = payload
+        return self
+
+    def select(self, _cols):
+        return self
+
+    def eq(self, key, value):
+        self.eq_calls.append((key, value))
+        return self
+
+    def execute(self):
+        return type("Resp", (), {"data": self.rows})()
+
+
+class _FakeClient:
+    def __init__(self) -> None:
+        self.table_obj = _FakeTable()
+
+    def table(self, _name):
+        return self.table_obj
+
+
+def test_supabase_thread_repo_create_writes_integer_main_flag():
+    client = _FakeClient()
+    repo = SupabaseThreadRepo(client)
+
+    repo.create(
+        thread_id="thread-1",
+        member_id="member-1",
+        sandbox_type="local",
+        created_at=1.0,
+        is_main=True,
+        branch_index=0,
+    )
+
+    assert client.table_obj.insert_payload["is_main"] == 1
+
+
+def test_supabase_thread_repo_update_writes_integer_main_flag():
+    client = _FakeClient()
+    client.table_obj.rows[0]["branch_index"] = 1
+    client.table_obj.rows[0]["is_main"] = 0
+    repo = SupabaseThreadRepo(client)
+
+    repo.update("thread-1", is_main=False)
+
+    assert client.table_obj.update_payload["is_main"] == 0
diff --git a/tests/test_sync_state_thread_safety.py b/tests/Unit/storage/test_sync_state_thread_safety.py
similarity index 100%
rename from tests/test_sync_state_thread_safety.py
rename to tests/Unit/storage/test_sync_state_thread_safety.py
diff --git a/tests/test_sync_strategy.py b/tests/Unit/storage/test_sync_strategy.py
similarity index 100%
rename from tests/test_sync_strategy.py
rename to tests/Unit/storage/test_sync_strategy.py
diff --git a/tests/test_thread_repo.py b/tests/Unit/storage/test_thread_repo.py
similarity index 100%
rename from tests/test_thread_repo.py
rename to tests/Unit/storage/test_thread_repo.py
diff --git a/tests/middleware/memory/test_summary_store_performance.py b/tests/middleware/memory/test_summary_store_performance.py
deleted file mode 100644
index ce3b0c3bb..000000000
--- a/tests/middleware/memory/test_summary_store_performance.py
+++ /dev/null
@@ -1,266 +0,0 @@
-"""Performance tests for SummaryStore.
-
-This module tests the performance characteristics of SummaryStore operations
-to ensure they meet production requirements.
-
-Test Cases:
-1. Query performance with many summaries (1000 summaries, query < 50ms)
-2. Concurrent write performance (10 threads, avg write < 100ms)
-3. Database size growth (100 summaries, DB < 1MB)
-"""
-
-import sys
-import threading
-import time
-from pathlib import Path
-
-import pytest
-
-_SKIP_WINDOWS = pytest.mark.skipif(
-    sys.platform == "win32", reason="SQLite connection-per-call is slow on Windows; performance tests not meaningful there"
-)
-
-from core.runtime.middleware.memory.summary_store import SummaryStore
-
-
-@_SKIP_WINDOWS
-def test_query_performance_with_many_summaries(temp_db):
-    """Test query performance with 1000 summaries.
-
-    Requirements:
-    - Create 1000 summaries across multiple threads
-    - Query for latest summary should complete in < 50ms
-    - Index should enable fast lookups even with large dataset
-    """
-    store = SummaryStore(temp_db)
-
-    # Create 1000 summaries across 100 threads (10 summaries per thread)
-    num_threads = 100
-    summaries_per_thread = 10
-
-    print(f"\n[Performance Test] Creating {num_threads * summaries_per_thread} summaries...")
-    start_time = time.perf_counter()
-
-    for thread_idx in range(num_threads):
-        thread_id = f"thread-{thread_idx:04d}"
-        for summary_idx in range(summaries_per_thread):
-            store.save_summary(
-                thread_id=thread_id,
-                summary_text=f"Summary {summary_idx} for {thread_id}. " * 10,  # ~500 chars
-                compact_up_to_index=summary_idx * 10,
-                compacted_at=summary_idx * 20,
-            )
-
-    creation_time = time.perf_counter() - start_time
-    print(f"[Performance Test] Created 1000 summaries in {creation_time:.2f}s")
-
-    # Now test query performance on a thread with many summaries
-    # Query the middle thread to avoid edge cases
-    target_thread = "thread-0050"
-
-    # Warm up query (first query might be slower due to cold cache)
-    store.get_latest_summary(target_thread)
-
-    # Measure query performance over 10 iterations
-    query_times = []
-    for _ in range(10):
-        start = time.perf_counter()
-        summary = store.get_latest_summary(target_thread)
-        elapsed = (time.perf_counter() - start) * 1000  # Convert to ms
-        query_times.append(elapsed)
-
-        assert summary is not None
-        assert summary.thread_id == target_thread
-
-    avg_query_time = sum(query_times) / len(query_times)
-    max_query_time = max(query_times)
-
-    print(f"[Performance Test] Query times: avg={avg_query_time:.2f}ms, max={max_query_time:.2f}ms")
-
-    # Assert performance requirements
-    assert avg_query_time < 50, f"Average query time {avg_query_time:.2f}ms exceeds 50ms threshold"
-    assert max_query_time < 100, f"Max query time {max_query_time:.2f}ms exceeds 100ms threshold"
-
-
-@_SKIP_WINDOWS
-def test_concurrent_write_performance(temp_db):
-    """Test concurrent write performance with 10 threads.
-
-    Requirements:
-    - 10 threads writing concurrently
-    - Each thread writes 10 summaries
-    - Average write time per summary < 100ms
-    - No database locks or corruption
-    """
-    store = SummaryStore(temp_db)
-
-    num_threads = 10
-    summaries_per_thread = 10
-
-    results = []
-    errors = []
-
-    def write_summaries(thread_idx: int):
-        """Worker function to write summaries."""
-        thread_id = f"concurrent-thread-{thread_idx:02d}"
-        thread_times = []
-
-        try:
-            for summary_idx in range(summaries_per_thread):
-                start = time.perf_counter()
-
-                store.save_summary(
-                    thread_id=thread_id,
-                    summary_text=f"Concurrent summary {summary_idx} from thread {thread_idx}. " * 10,
-                    compact_up_to_index=summary_idx * 10,
-                    compacted_at=summary_idx * 20,
-                )
-
-                elapsed = (time.perf_counter() - start) * 1000  # Convert to ms
-                thread_times.append(elapsed)
-
-            results.append(
-                {
-                    "thread_idx": thread_idx,
-                    "times": thread_times,
-                    "avg_time": sum(thread_times) / len(thread_times),
-                }
-            )
-        except Exception as e:
-            errors.append(
-                {
-                    "thread_idx": thread_idx,
-                    "error": str(e),
-                }
-            )
-
-    # Start all threads
-    print(f"\n[Performance Test] Starting {num_threads} concurrent write threads...")
-    start_time = time.perf_counter()
-
-    threads = []
-    for i in range(num_threads):
-        t = threading.Thread(target=write_summaries, args=(i,))
-        threads.append(t)
-        t.start()
-
-    # Wait for all threads to complete
-    for t in threads:
-        t.join()
-
-    total_time = time.perf_counter() - start_time
-
-    # Check for errors
-    assert len(errors) == 0, f"Concurrent writes failed: {errors}"
-    assert len(results) == num_threads, f"Expected {num_threads} results, got {len(results)}"
-
-    # Calculate statistics
-    all_times = []
-    for result in results:
-        all_times.extend(result["times"])
-
-    avg_write_time = sum(all_times) / len(all_times)
-    max_write_time = max(all_times)
-    min_write_time = min(all_times)
-
-    print(f"[Performance Test] Concurrent writes completed in {total_time:.2f}s")
-    print(f"[Performance Test] Write times: avg={avg_write_time:.2f}ms, min={min_write_time:.2f}ms, max={max_write_time:.2f}ms")
-
-    # Assert performance requirements
-    assert avg_write_time < 100, f"Average write time {avg_write_time:.2f}ms exceeds 100ms threshold"
-
-    # Verify data integrity - each thread should have its latest summary
-    for i in range(num_threads):
-        thread_id = f"concurrent-thread-{i:02d}"
-        summary = store.get_latest_summary(thread_id)
-        assert summary is not None, f"Missing summary for {thread_id}"
-        assert summary.thread_id == thread_id
-        assert summary.compact_up_to_index == (summaries_per_thread - 1) * 10
-
-
-@_SKIP_WINDOWS
-def test_database_size_growth(temp_db):
-    """Test database size growth with 100 summaries.
-
-    Requirements:
-    - Create 100 summaries with realistic content
-    - Database size (including WAL files) should be < 1MB
-    - Verify efficient storage without excessive overhead
-    """
-    store = SummaryStore(temp_db)
-
-    num_summaries = 100
-
-    # Create realistic summary content (~2KB per summary)
-    summary_template = (
-        """
-    The conversation covered the following topics:
-    - User requested implementation of feature X
-    - Discussion about architecture and design patterns
-    - Code review and feedback on proposed changes
-    - Testing strategy and coverage requirements
-    - Documentation updates and API changes
-    """
-        * 10
-    )  # ~2KB of text
-
-    print(f"\n[Performance Test] Creating {num_summaries} summaries with realistic content...")
-
-    for i in range(num_summaries):
-        store.save_summary(
-            thread_id=f"size-test-thread-{i:03d}",
-            summary_text=f"Summary {i}: {summary_template}",
-            compact_up_to_index=i * 10,
-            compacted_at=i * 20,
-            is_split_turn=(i % 5 == 0),  # 20% split turns
-            split_turn_prefix=f"Prefix for summary {i}" if i % 5 == 0 else None,
-        )
-
-    # Force WAL checkpoint to flush data to main database
-    import sqlite3
-
-    conn = sqlite3.connect(str(temp_db))
-    try:
-        conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
-        conn.commit()
-    finally:
-        conn.close()
-
-    # Calculate total database size (main DB + WAL files)
-    db_size = temp_db.stat().st_size
-
-    wal_size = 0
-    for suffix in ["-wal", "-shm"]:
-        wal_file = Path(str(temp_db) + suffix)
-        if wal_file.exists():
-            wal_size += wal_file.stat().st_size
-
-    total_size = db_size + wal_size
-    total_size_kb = total_size / 1024
-    total_size_mb = total_size / (1024 * 1024)
-
-    print("[Performance Test] Database sizes:")
-    print(f"  - Main DB: {db_size / 1024:.2f} KB")
-    print(f"  - WAL files: {wal_size / 1024:.2f} KB")
-    print(f"  - Total: {total_size_kb:.2f} KB ({total_size_mb:.3f} MB)")
-
-    # Assert size requirements
-    assert total_size < 1024 * 1024, f"Database size {total_size_mb:.3f}MB exceeds 1MB threshold"
-
-    # Verify data integrity - spot check a few summaries
-    for i in [0, 49, 99]:
-        thread_id = f"size-test-thread-{i:03d}"
-        summary = store.get_latest_summary(thread_id)
-        assert summary is not None, f"Missing summary for {thread_id}"
-        assert summary.thread_id == thread_id
-        assert summary.compact_up_to_index == i * 10
-        assert summary_template in summary.summary_text
-
-    # Verify total count
-    all_threads = [f"size-test-thread-{i:03d}" for i in range(num_summaries)]
-    found_count = sum(1 for tid in all_threads if store.get_latest_summary(tid) is not None)
-    assert found_count == num_summaries, f"Expected {num_summaries} summaries, found {found_count}"
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v", "-s"])
diff --git a/tests/test_agent_pool.py b/tests/test_agent_pool.py
deleted file mode 100644
index 3ddd2945f..000000000
--- a/tests/test_agent_pool.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import asyncio
-import time
-from types import SimpleNamespace
-
-import pytest
-
-from backend.web.services import agent_pool
-
-
-class _FakeThreadRepo:
-    def get_by_id(self, thread_id: str):
-        return {"id": thread_id, "cwd": "/tmp", "model": "leon:large"}
-
-
-@pytest.mark.asyncio
-async def test_get_or_create_agent_creates_once_per_thread(monkeypatch: pytest.MonkeyPatch):
-    created: list[object] = []
-
-    def _fake_create_agent_sync(
-        sandbox_name: str,
-        workspace_root=None,
-        model_name: str | None = None,
-        agent: str | None = None,
-        queue_manager=None,
-        chat_repos=None,
-        extra_allowed_paths=None,
-    ) -> object:
-        time.sleep(0.05)
-        obj = SimpleNamespace()
-        created.append(obj)
-        return obj
-
-    monkeypatch.setattr(agent_pool, "create_agent_sync", _fake_create_agent_sync)
-    monkeypatch.setattr(agent_pool, "get_or_create_agent_id", lambda **_: "agent-1")
-
-    app = SimpleNamespace(
-        state=SimpleNamespace(
-            agent_pool={},
-            thread_repo=_FakeThreadRepo(),
-            thread_cwd={},
-            thread_sandbox={},
-        )
-    )
-
-    first, second = await asyncio.gather(
-        agent_pool.get_or_create_agent(app, "local", thread_id="thread-1"),
-        agent_pool.get_or_create_agent(app, "local", thread_id="thread-1"),
-    )
-
-    assert len(created) == 1
-    assert first is second
-    assert app.state.agent_pool["thread-1:local"] is first
diff --git a/tests/test_capability_async.py b/tests/test_capability_async.py
deleted file mode 100644
index 8d1ba06d7..000000000
--- a/tests/test_capability_async.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import asyncio
-import uuid
-
-from sandbox.capability import SandboxCapability
-from sandbox.interfaces.executor import AsyncCommand, ExecuteResult
-
-
-class _DummyState:
-    cwd = "/tmp"
-
-
-class _DummyTerminal:
-    terminal_id = "dummy-term"
-
-    def get_state(self):
-        return _DummyState()
-
-
-class _DummyRuntime:
-    def __init__(self):
-        self.commands: list[str] = []
-        self._async_commands: dict[str, AsyncCommand] = {}
-
-    async def execute(self, command: str, timeout=None):
-        self.commands.append(command)
-        await asyncio.sleep(0.01)
-        return ExecuteResult(exit_code=0, stdout=f"ok:{command}", stderr="")
-
-    async def start_command(self, command: str, cwd: str) -> AsyncCommand:
-        command_id = f"cmd_{uuid.uuid4().hex[:12]}"
-        result = await self.execute(command)
-        async_cmd = AsyncCommand(
-            command_id=command_id,
-            command_line=command,
-            cwd=cwd,
-            exit_code=result.exit_code,
-            done=True,
-            stdout_buffer=[result.stdout],
-        )
-        self._async_commands[command_id] = async_cmd
-        return async_cmd
-
-    async def get_command(self, command_id: str) -> AsyncCommand | None:
-        return self._async_commands.get(command_id)
-
-    async def wait_for_command(self, command_id: str, timeout: float | None = None) -> ExecuteResult | None:
-        cmd = self._async_commands.get(command_id)
-        if cmd is None:
-            return None
-        return ExecuteResult(
-            exit_code=cmd.exit_code or 0,
-            stdout="".join(cmd.stdout_buffer),
-            stderr="".join(cmd.stderr_buffer),
-        )
-
-
-class _DummySession:
-    def __init__(self):
-        self.terminal = _DummyTerminal()
-        self.runtime = _DummyRuntime()
-        self.touches = 0
-
-    def touch(self):
-        self.touches += 1
-
-
-async def _run_async_command_flow():
-    session = _DummySession()
-    capability = SandboxCapability(session)
-
-    async_cmd = await capability.command.execute_async("echo hi", cwd="/tmp/demo", env={"A": "1"})
-    assert async_cmd.command_id.startswith("cmd_")
-
-    status = await capability.command.get_status(async_cmd.command_id)
-    assert status is not None
-
-    result = await capability.command.wait_for(async_cmd.command_id, timeout=1.0)
-    assert result is not None
-    assert result.exit_code == 0
-    assert "echo hi" in result.stdout
-    assert session.touches > 0
-
-
-def test_command_wrapper_supports_execute_async():
-    asyncio.run(_run_async_command_flow())
diff --git a/tests/test_filesystem_touch_updates_session.py b/tests/test_filesystem_touch_updates_session.py
deleted file mode 100644
index 9a6bede32..000000000
--- a/tests/test_filesystem_touch_updates_session.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""FS wrapper should count as activity (touch ChatSession) for idle reaper."""
-
-# TODO: fs.list_dir now goes through volume-mount path; FakeProvider needs a volume_id to pass
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-import sqlite3
-import tempfile
-import uuid
-from datetime import datetime
-from pathlib import Path
-
-from sandbox.manager import SandboxManager
-from sandbox.provider import Metrics, ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-
-
-class _FakeProvider(SandboxProvider):
-    name = "fake"
-
-    def __init__(self) -> None:
-        self._statuses: dict[str, str] = {}
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            supports_webhook=False,
-        )
-
-    def create_session(self, context_id: str | None = None) -> SessionInfo:
-        sid = f"s-{uuid.uuid4().hex[:8]}"
-        self._statuses[sid] = "running"
-        return SessionInfo(session_id=sid, provider=self.name, status="running")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        self._statuses.pop(session_id, None)
-        return True
-
-    def pause_session(self, session_id: str) -> bool:
-        self._statuses[session_id] = "paused"
-        return True
-
-    def resume_session(self, session_id: str) -> bool:
-        self._statuses[session_id] = "running"
-        return True
-
-    def get_session_status(self, session_id: str) -> str:
-        return self._statuses.get(session_id, "deleted")
-
-    def execute(self, session_id: str, command: str, timeout_ms: int = 30000, cwd: str | None = None) -> ProviderExecResult:
-        return ProviderExecResult(output="", exit_code=0)
-
-    def read_file(self, session_id: str, path: str) -> str:
-        return ""
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        return "ok"
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        return [{"name": "a.txt", "type": "file", "size": 1}]
-
-    def get_metrics(self, session_id: str) -> Metrics | None:
-        return None
-
-    def create_runtime(self, terminal, lease):
-        from sandbox.runtime import RemoteWrappedRuntime
-
-        return RemoteWrappedRuntime(terminal, lease, self)
-
-
-def _temp_db() -> Path:
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        return Path(f.name)
-
-
-def test_fs_list_dir_touches_session_last_active_at() -> None:
-    db = _temp_db()
-    try:
-        provider = _FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-
-        cap = mgr.get_sandbox("thread-1")
-        session_id = cap._session.session_id  # type: ignore[attr-defined]
-
-        with sqlite3.connect(str(db)) as conn:
-            before = conn.execute(
-                "SELECT last_active_at FROM chat_sessions WHERE chat_session_id = ?",
-                (session_id,),
-            ).fetchone()[0]
-
-        cap.fs.list_dir("/")
-
-        with sqlite3.connect(str(db)) as conn:
-            after = conn.execute(
-                "SELECT last_active_at FROM chat_sessions WHERE chat_session_id = ?",
-                (session_id,),
-            ).fetchone()[0]
-
-        assert datetime.fromisoformat(str(after)) >= datetime.fromisoformat(str(before))
-    finally:
-        db.unlink(missing_ok=True)
diff --git a/tests/test_idle_reaper_shared_lease.py b/tests/test_idle_reaper_shared_lease.py
deleted file mode 100644
index 172e07537..000000000
--- a/tests/test_idle_reaper_shared_lease.py
+++ /dev/null
@@ -1,146 +0,0 @@
-from __future__ import annotations
-
-# TODO: get_sandbox now calls _setup_mounts which requires lease.volume_id; FakeProvider needs update
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-import sqlite3
-from dataclasses import dataclass
-from datetime import datetime, timedelta
-from pathlib import Path
-
-from sandbox.manager import SandboxManager
-from sandbox.provider import ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-
-
-@dataclass
-class _DummyInstance:
-    instance_id: str
-
-
-class DummyProvider(SandboxProvider):
-    """Minimal provider stub for lease + idle-reaper tests."""
-
-    name = "daytona"
-
-    def __init__(self) -> None:
-        self._paused: set[str] = set()
-        self._created: list[str] = []
-        self._pause_calls: list[str] = []
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            supports_status_probe=True,
-            eager_instance_binding=False,
-            runtime_kind="remote",
-        )
-
-    def create_session(self, context_id: str | None = None) -> SessionInfo:
-        sid = f"sb-{len(self._created) + 1}"
-        self._created.append(sid)
-        return SessionInfo(session_id=sid, provider=self.name, status="running")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        return True
-
-    def pause_session(self, session_id: str) -> bool:
-        self._pause_calls.append(session_id)
-        self._paused.add(session_id)
-        return True
-
-    def resume_session(self, session_id: str) -> bool:
-        self._paused.discard(session_id)
-        return True
-
-    def get_session_status(self, session_id: str) -> str:
-        return "paused" if session_id in self._paused else "running"
-
-    def execute(
-        self,
-        session_id: str,
-        command: str,
-        timeout_ms: int = 30000,
-        cwd: str | None = None,
-    ) -> ProviderExecResult:
-        return ProviderExecResult(output="", exit_code=0)
-
-    def read_file(self, session_id: str, path: str) -> str:
-        return ""
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        return "ok"
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        return []
-
-    def get_metrics(self, session_id: str):
-        return None
-
-    def create_runtime(self, terminal, lease):
-        from sandbox.runtime import RemoteWrappedRuntime
-
-        return RemoteWrappedRuntime(terminal, lease, self)
-
-
-def _connect(db: Path) -> sqlite3.Connection:
-    conn = sqlite3.connect(str(db), timeout=30)
-    conn.execute("PRAGMA busy_timeout=30000")
-    return conn
-
-
-def test_idle_reaper_does_not_pause_shared_lease_when_other_session_active(tmp_path: Path) -> None:
-    db = tmp_path / "sandbox.db"
-    provider = DummyProvider()
-    manager = SandboxManager(provider=provider, db_path=db)
-
-    thread_id = "thread-1"
-
-    # Create the main terminal/session.
-    cap = manager.get_sandbox(thread_id)
-    lease_id = cap._session.lease.lease_id  # type: ignore[attr-defined]
-
-    # Force-bind a physical instance so idle reaper has something to pause.
-    cap._session.lease.ensure_active_instance(provider)  # type: ignore[attr-defined]
-
-    # Create a background terminal/session on the same lease (non-block command behavior).
-    bg_session = manager.create_background_command_session(thread_id=thread_id, initial_cwd="/home/daytona")
-
-    main_session_id = cap._session.session_id  # type: ignore[attr-defined]
-    bg_session_id = bg_session.session_id
-
-    # Make the background session expired, keep the main session active.
-    now = datetime.now()
-    expired_at = (now - timedelta(seconds=10_000)).isoformat()
-
-    with _connect(db) as conn:
-        conn.execute(
-            "UPDATE chat_sessions SET idle_ttl_sec = 1, last_active_at = ?, started_at = ? WHERE chat_session_id = ?",
-            (expired_at, expired_at, bg_session_id),
-        )
-        conn.execute(
-            "UPDATE chat_sessions SET idle_ttl_sec = 300, last_active_at = ?, started_at = ? WHERE chat_session_id = ?",
-            (now.isoformat(), now.isoformat(), main_session_id),
-        )
-        conn.commit()
-
-    closed = manager.enforce_idle_timeouts()
-    assert closed == 1
-
-    # The shared lease must NOT be paused because the main session is still active.
-    lease = manager.lease_store.get(lease_id)
-    assert lease is not None
-    assert lease.desired_state == "running"
-    assert provider._pause_calls == []
-
-    with _connect(db) as conn:
-        row = conn.execute(
-            "SELECT status, close_reason FROM chat_sessions WHERE chat_session_id = ?",
-            (bg_session_id,),
-        ).fetchone()
-        assert row is not None
-        assert row[0] == "closed"
-        assert row[1] == "idle_timeout"
diff --git a/tests/test_integration_new_arch.py b/tests/test_integration_new_arch.py
deleted file mode 100644
index 459919424..000000000
--- a/tests/test_integration_new_arch.py
+++ /dev/null
@@ -1,619 +0,0 @@
-"""Integration tests for the full new architecture flow.
-
-Tests the complete flow: Thread → ChatSession → Runtime → Terminal → Lease → Instance
-"""
-
-# TODO: get_sandbox now calls _setup_mounts requiring lease.volume_id; FakeProvider/mock_provider
-#       needs a volume configured. Most tests in this file fail for the same reason.
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-import asyncio
-import sqlite3
-import tempfile
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from sandbox.chat_session import ChatSessionManager
-from sandbox.manager import SandboxManager
-from sandbox.provider import ProviderCapability, SessionInfo
-from sandbox.terminal import terminal_from_row
-from storage.providers.sqlite.lease_repo import SQLiteLeaseRepo
-from storage.providers.sqlite.terminal_repo import SQLiteTerminalRepo
-
-
-@pytest.fixture
-def temp_db():
-    """Create temporary database for testing."""
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-    yield db_path
-    db_path.unlink(missing_ok=True)
-
-
-@pytest.fixture
-def mock_provider():
-    """Create mock SandboxProvider for local testing."""
-    provider = MagicMock()
-    provider.name = "local"
-    provider.default_cwd = "/tmp"
-    provider.get_capability.return_value = ProviderCapability(
-        can_pause=True,
-        can_resume=True,
-        can_destroy=True,
-        supports_webhook=False,
-        supports_status_probe=False,
-        eager_instance_binding=True,
-        inspect_visible=True,
-        runtime_kind="local",
-    )
-    provider.create_session.return_value = SessionInfo(
-        session_id="local-inst-1",
-        provider="local",
-        status="running",
-    )
-    provider.get_session_status.return_value = "running"
-    provider.pause_session.return_value = True
-    provider.resume_session.return_value = True
-    provider.destroy_session.return_value = True
-
-    # Mock execute to return proper results
-    def mock_execute(instance_id, command, timeout_ms=None, cwd=None):
-        result = MagicMock()
-        result.exit_code = 0
-
-        if command == "pwd":
-            result.stdout = cwd or "/root"
-            result.stderr = ""
-        elif command.startswith("cd "):
-            result.stdout = ""
-            result.stderr = ""
-        else:
-            result.stdout = "command output"
-            result.stderr = ""
-
-        return result
-
-    provider.execute = mock_execute
-    from sandbox.providers.local import LocalPersistentShellRuntime
-
-    provider.create_runtime.side_effect = lambda terminal, lease: LocalPersistentShellRuntime(terminal, lease)
-    return provider
-
-
-@pytest.fixture
-def mock_remote_provider():
-    """Create mock remote provider that supports lease lifecycle + fs ops."""
-    provider = MagicMock()
-    provider.name = "e2b"
-    provider.get_capability.return_value = ProviderCapability(
-        can_pause=True,
-        can_resume=True,
-        can_destroy=True,
-        supports_webhook=False,
-        runtime_kind="remote",
-    )
-    provider.create_session.return_value = SessionInfo(
-        session_id="inst-remote-1",
-        provider="e2b",
-        status="running",
-    )
-    provider.get_session_status.return_value = "running"
-    provider.pause_session.return_value = True
-    provider.resume_session.return_value = True
-    provider.write_file.return_value = "ok"
-    provider.read_file.return_value = "content"
-    provider.list_dir.return_value = []
-    from sandbox.runtime import RemoteWrappedRuntime
-
-    provider.create_runtime.side_effect = lambda terminal, lease: RemoteWrappedRuntime(terminal, lease, provider)
-    return provider
-
-
-@pytest.fixture
-def sandbox_manager(temp_db, mock_provider):
-    """Create SandboxManager with temp database."""
-    return SandboxManager(provider=mock_provider, db_path=temp_db)
-
-
-@pytest.fixture
-def remote_sandbox_manager(temp_db, mock_remote_provider):
-    """Create SandboxManager with remote provider."""
-    return SandboxManager(provider=mock_remote_provider, db_path=temp_db)
-
-
-class TestFullArchitectureFlow:
-    """Test complete flow through all layers."""
-
-    @pytest.mark.skip(reason="pre-existing: get_sandbox now requires lease.volume_id — FakeProvider needs update")
-    def test_get_sandbox_creates_all_layers(self, sandbox_manager, temp_db):
-        """Test that get_sandbox creates Terminal → Lease → Runtime → ChatSession."""
-        thread_id = "test-thread-1"
-
-        # Get sandbox (should create everything)
-        capability = sandbox_manager.get_sandbox(thread_id)
-
-        assert capability is not None
-        assert capability._session is not None
-        assert capability._session.thread_id == thread_id
-        assert capability._session.terminal is not None
-        assert capability._session.lease is not None
-        assert capability._session.runtime is not None
-
-        # Verify persistence
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        terminal_row = terminal_store.get_active(thread_id)
-        assert terminal_row is not None
-
-        lease_repo = SQLiteLeaseRepo(db_path=temp_db)
-        lease_row = lease_repo.get(terminal_row["lease_id"])
-        lease_repo.close()
-        assert lease_row is not None
-
-    def test_get_sandbox_reuses_existing_session(self, sandbox_manager):
-        """Test that get_sandbox reuses existing session."""
-        thread_id = "test-thread-2"
-
-        # First call creates
-        capability1 = sandbox_manager.get_sandbox(thread_id)
-        session_id1 = capability1._session.session_id
-
-        # Second call reuses
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-        session_id2 = capability2._session.session_id
-
-        assert session_id1 == session_id2
-
-    @pytest.mark.asyncio
-    async def test_command_execution_through_capability(self, sandbox_manager):
-        """Test command execution through capability wrapper."""
-        thread_id = "test-thread-3"
-
-        capability = sandbox_manager.get_sandbox(thread_id)
-
-        # Execute command
-        result = await capability.command.execute("echo hello")
-
-        assert result.exit_code == 0
-        assert result.stdout is not None
-
-    @pytest.mark.asyncio
-    async def test_async_command_status_survives_session_recreate(self, sandbox_manager):
-        """Completed async commands should remain queryable after ChatSession recreation."""
-        thread_id = "test-thread-3b"
-        capability1 = sandbox_manager.get_sandbox(thread_id)
-        session_id_1 = capability1._session.session_id
-
-        async_cmd = await capability1.command.execute_async("echo async-ok")
-        done_1 = await capability1.command.wait_for(async_cmd.command_id, timeout=5.0)
-        assert done_1 is not None
-        assert done_1.exit_code == 0
-        assert "async-ok" in done_1.stdout
-
-        sandbox_manager.session_manager.delete(session_id_1, reason="test_rotate_session")
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-        assert capability2._session.session_id != session_id_1
-
-        status = await capability2.command.get_status(async_cmd.command_id)
-        assert status is not None
-        assert status.done
-
-        done_2 = await capability2.command.wait_for(async_cmd.command_id, timeout=1.0)
-        assert done_2 is not None
-        assert done_2.exit_code == 0
-        assert "async-ok" in done_2.stdout
-
-    @pytest.mark.asyncio
-    async def test_non_blocking_command_uses_new_abstract_terminal(self, sandbox_manager, temp_db):
-        thread_id = "test-thread-async-terminal"
-        capability = sandbox_manager.get_sandbox(thread_id)
-        default_terminal_id = capability._session.terminal.terminal_id
-        shared_lease_id = capability._session.lease.lease_id
-
-        from sandbox.terminal import TerminalState
-
-        capability._session.terminal.update_state(TerminalState(cwd="/tmp", env_delta={"FOO": "bar"}))
-
-        async_cmd = await capability.command.execute_async("echo bg-terminal")
-        result = await capability.command.wait_for(async_cmd.command_id, timeout=5.0)
-        assert result is not None
-        assert result.exit_code == 0
-        assert "bg-terminal" in result.stdout
-
-        terminal_rows = sandbox_manager.terminal_store.list_by_thread(thread_id)
-        assert len(terminal_rows) == 2
-        terminals = [terminal_from_row(r, sandbox_manager.terminal_store.db_path) for r in terminal_rows]
-        default_row = sandbox_manager.terminal_store.get_default(thread_id)
-        assert default_row is not None
-        default_terminal = terminal_from_row(default_row, sandbox_manager.terminal_store.db_path)
-        assert default_terminal.terminal_id == default_terminal_id
-
-        background_terminal = next(t for t in terminals if t.terminal_id != default_terminal_id)
-        assert background_terminal.lease_id == shared_lease_id
-        bg_state = background_terminal.get_state()
-        assert bg_state.cwd in {"/tmp", "/private/tmp"}
-        assert bg_state.env_delta.get("FOO") == "bar"
-
-        with sqlite3.connect(str(temp_db), timeout=30) as conn:
-            row = conn.execute(
-                "SELECT terminal_id FROM terminal_commands WHERE command_id = ?",
-                (async_cmd.command_id,),
-            ).fetchone()
-        assert row is not None
-        assert row[0] == background_terminal.terminal_id
-
-    @pytest.mark.asyncio
-    async def test_running_async_command_visible_from_new_manager(self, temp_db, mock_provider):
-        thread_id = "test-thread-running-visible"
-        manager1 = SandboxManager(provider=mock_provider, db_path=temp_db)
-        capability1 = manager1.get_sandbox(thread_id)
-
-        async_cmd = await capability1.command.execute_async("for i in 1 2 3; do echo tick-$i; sleep 1; done")
-        await asyncio.sleep(1.2)
-
-        # Simulate command_status query from a fresh API manager/session process.
-        manager2 = SandboxManager(provider=mock_provider, db_path=temp_db)
-        capability2 = manager2.get_sandbox(thread_id)
-
-        running = await capability2.command.get_status(async_cmd.command_id)
-        assert running is not None
-        assert not running.done
-        assert "Runtime restarted before command completion" not in "".join(running.stderr_buffer)
-        assert "tick-1" in "".join(running.stdout_buffer)
-
-        finished = await capability2.command.wait_for(async_cmd.command_id, timeout=5.0)
-        assert finished is not None
-        assert finished.exit_code == 0
-        assert "tick-3" in finished.stdout
-
-    def test_terminal_state_persists_across_sessions(self, sandbox_manager, temp_db):
-        """Test that terminal state persists when session expires."""
-        thread_id = "test-thread-4"
-
-        # Create session and update terminal state
-        capability1 = sandbox_manager.get_sandbox(thread_id)
-        terminal_id = capability1._session.terminal.terminal_id
-
-        # Update terminal state
-        from sandbox.terminal import TerminalState
-
-        new_state = TerminalState(cwd="/tmp", env_delta={"FOO": "bar"})
-        capability1._session.terminal.update_state(new_state)
-
-        # Delete session (simulating expiry)
-        sandbox_manager.session_manager.delete(capability1._session.session_id)
-
-        # Get sandbox again (creates new session)
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-
-        # Terminal should be reused with persisted state
-        assert capability2._session.terminal.terminal_id == terminal_id
-        state = capability2._session.terminal.get_state()
-        assert state.cwd == "/tmp"
-        assert state.env_delta == {"FOO": "bar"}
-
-    def test_get_sandbox_fails_on_provider_mismatch(self, temp_db, mock_provider, mock_remote_provider):
-        local_mgr = SandboxManager(provider=mock_provider, db_path=temp_db)
-        remote_mgr = SandboxManager(provider=mock_remote_provider, db_path=temp_db)
-
-        thread_id = "test-thread-provider-mismatch"
-        _ = local_mgr.get_sandbox(thread_id)
-
-        with pytest.raises(RuntimeError, match="bound to provider"):
-            remote_mgr.get_sandbox(thread_id)
-
-    def test_pause_all_sessions_skips_provider_mismatch(self, temp_db, mock_provider, mock_remote_provider):
-        local_mgr = SandboxManager(provider=mock_provider, db_path=temp_db)
-        remote_mgr = SandboxManager(provider=mock_remote_provider, db_path=temp_db)
-
-        _ = local_mgr.get_sandbox("test-thread-provider-mismatch-pause")
-
-        assert remote_mgr.pause_all_sessions() == 0
-
-    def test_lease_shared_across_terminals(self, sandbox_manager, temp_db):
-        """Test that multiple terminals can share the same lease."""
-        thread_id1 = "test-thread-5"
-        thread_id2 = "test-thread-6"
-
-        # Create first terminal
-        capability1 = sandbox_manager.get_sandbox(thread_id1)
-        lease_id1 = capability1._session.lease.lease_id
-
-        # Manually create second terminal with same lease
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        _terminal2 = terminal_store.create(
-            terminal_id="term-shared",
-            thread_id=thread_id2,
-            lease_id=lease_id1,
-        )
-
-        # Get sandbox for second thread
-        capability2 = sandbox_manager.get_sandbox(thread_id2)
-        lease_id2 = capability2._session.lease.lease_id
-
-        # Should share the same lease
-        assert lease_id1 == lease_id2
-
-    def test_session_touch_updates_activity(self, sandbox_manager):
-        """Test that capability.touch() updates session activity."""
-        thread_id = "test-thread-7"
-
-        capability = sandbox_manager.get_sandbox(thread_id)
-        old_activity = capability._session.last_active_at
-
-        import time
-
-        time.sleep(0.01)
-
-        capability.touch()
-
-        # Activity should be updated
-        assert capability._session.last_active_at > old_activity
-
-    def test_session_info_api(self, sandbox_manager):
-        """Test that manager can expose current provider session info."""
-        thread_id = "test-thread-8"
-
-        session_info = sandbox_manager.get_or_create_session(thread_id)
-        assert session_info is not None
-        assert session_info.provider == "local"
-
-        sessions = sandbox_manager.list_sessions()
-        assert len(sessions) > 0
-
-    def test_remote_fs_operation_fails_on_paused_lease(self, remote_sandbox_manager, mock_remote_provider):
-        """Paused lease must fail fast until explicit resume."""
-        thread_id = "test-thread-remote-fs-1"
-        capability = remote_sandbox_manager.get_sandbox(thread_id)
-
-        lease = capability._session.lease
-        lease.ensure_active_instance(mock_remote_provider)
-        lease.pause_instance(mock_remote_provider)
-        assert lease.get_instance() is not None
-        assert lease.get_instance().status == "paused"
-        mock_remote_provider.get_session_status.return_value = "paused"
-
-        with pytest.raises(RuntimeError, match="is paused"):
-            capability.fs.write_file("/home/user/test.txt", "ok")
-        assert lease.get_instance().status == "paused"
-
-
-class TestSessionLifecycle:
-    """Test session lifecycle management."""
-
-    def test_session_expiry_cleanup(self, sandbox_manager, temp_db):
-        """Test that expired sessions are cleaned up."""
-
-        thread_id = "test-thread-9"
-
-        # Create session with very short timeout
-        capability = sandbox_manager.get_sandbox(thread_id)
-        _session_id = capability._session.session_id
-
-        # Manually update policy to expire immediately
-        session_manager = ChatSessionManager(
-            provider=sandbox_manager.provider,
-            db_path=temp_db,
-        )
-
-        import time
-
-        time.sleep(0.1)
-
-        # Cleanup expired
-        count = session_manager.cleanup_expired()
-
-        # Session should still exist (default policy is 10 minutes)
-        assert count == 0
-
-    def test_pause_and_resume_session(self, sandbox_manager):
-        """Test pausing and resuming sessions."""
-        thread_id = "test-thread-10"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        session_id = capability._session.session_id
-        terminal_id = capability._session.terminal.terminal_id
-
-        assert sandbox_manager.pause_session(thread_id)
-        paused = sandbox_manager.session_manager.get(thread_id, terminal_id)
-        assert paused is not None
-        assert paused.session_id == session_id
-        assert paused.status == "paused"
-
-        assert sandbox_manager.resume_session(thread_id)
-        resumed = sandbox_manager.session_manager.get(thread_id, terminal_id)
-        assert resumed is not None
-        assert resumed.session_id == session_id
-        assert resumed.status == "active"
-
-    def test_pause_and_resume_cover_all_thread_terminals(self, sandbox_manager):
-        thread_id = "test-thread-10b"
-        capability = sandbox_manager.get_sandbox(thread_id)
-        asyncio.run(capability.command.execute_async("echo bg"))
-
-        terminal_rows = sandbox_manager.terminal_store.list_by_thread(thread_id)
-        assert len(terminal_rows) == 2
-
-        assert sandbox_manager.pause_session(thread_id)
-        for row in terminal_rows:
-            session = sandbox_manager.session_manager.get(thread_id, row["terminal_id"])
-            assert session is not None
-            assert session.status == "paused"
-
-        assert sandbox_manager.resume_session(thread_id)
-        for row in terminal_rows:
-            session = sandbox_manager.session_manager.get(thread_id, row["terminal_id"])
-            assert session is not None
-            assert session.status == "active"
-
-    def test_destroy_session(self, sandbox_manager):
-        """Test destroying a session."""
-        thread_id = "test-thread-11"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        _session_id = capability._session.session_id
-        terminal_id = capability._session.terminal.terminal_id
-
-        # Destroy
-        sandbox_manager.destroy_session(thread_id)
-
-        # Session should be gone
-        session = sandbox_manager.session_manager.get(thread_id, terminal_id)
-        assert session is None
-
-    def test_destroy_session_removes_all_thread_resources(self, sandbox_manager):
-        thread_id = "test-thread-11b"
-        capability = sandbox_manager.get_sandbox(thread_id)
-        asyncio.run(capability.command.execute_async("echo bg"))
-
-        terminal_rows_before = sandbox_manager.terminal_store.list_by_thread(thread_id)
-        assert len(terminal_rows_before) == 2
-
-        assert sandbox_manager.destroy_session(thread_id)
-        assert sandbox_manager.terminal_store.list_by_thread(thread_id) == []
-        assert all(sandbox_manager.session_manager.get(thread_id, row["terminal_id"]) is None for row in terminal_rows_before)
-
-
-class TestMultiThreadScenarios:
-    """Test scenarios with multiple threads."""
-
-    def test_multiple_threads_independent_sessions(self, sandbox_manager):
-        """Test that multiple threads get independent sessions."""
-        thread_ids = [f"test-thread-{i}" for i in range(3)]
-
-        capabilities = [sandbox_manager.get_sandbox(tid) for tid in thread_ids]
-
-        # All should have different sessions
-        session_ids = [cap._session.session_id for cap in capabilities]
-        assert len(set(session_ids)) == 3
-
-        # All should have different terminals
-        terminal_ids = [cap._session.terminal.terminal_id for cap in capabilities]
-        assert len(set(terminal_ids)) == 3
-
-    def test_thread_switch_preserves_state(self, sandbox_manager):
-        """Test that switching between threads preserves state."""
-        thread_id1 = "test-thread-12"
-        thread_id2 = "test-thread-13"
-
-        # Work on thread 1
-        cap1 = sandbox_manager.get_sandbox(thread_id1)
-        from sandbox.terminal import TerminalState
-
-        cap1._session.terminal.update_state(TerminalState(cwd="/tmp"))
-
-        # Switch to thread 2
-        cap2 = sandbox_manager.get_sandbox(thread_id2)
-        cap2._session.terminal.update_state(TerminalState(cwd="/home"))
-
-        # Switch back to thread 1
-        cap1_again = sandbox_manager.get_sandbox(thread_id1)
-        state1 = cap1_again._session.terminal.get_state()
-        assert state1.cwd == "/tmp"
-
-        # Check thread 2 state
-        cap2_again = sandbox_manager.get_sandbox(thread_id2)
-        state2 = cap2_again._session.terminal.get_state()
-        assert state2.cwd == "/home"
-
-
-class TestErrorHandling:
-    """Test error handling scenarios."""
-
-    def test_missing_terminal_recreates_with_same_id(self, sandbox_manager, temp_db):
-        """Test that terminal is recreated when missing from DB.
-
-        Note: The terminal_id is stored in the session, so when we delete
-        the terminal but not the session, the session still references the
-        old terminal_id. This is expected behavior - the terminal_id is
-        stable across recreations.
-        """
-        thread_id = "test-thread-14"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        terminal_id = capability._session.terminal.terminal_id
-
-        # Delete terminal from DB (but not session)
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        terminal_store.delete(terminal_id)
-
-        # Delete session to force full recreation
-        sandbox_manager.session_manager.delete(capability._session.session_id)
-
-        # Get sandbox again - creates new terminal
-        _capability2 = sandbox_manager.get_sandbox(thread_id)
-
-        # Terminal should exist in DB now
-        _terminal2 = terminal_store.get_active(thread_id)
-        assert _terminal2 is not None
-
-    def test_missing_lease_recreates_with_same_id(self, sandbox_manager, temp_db):
-        """Test that lease is recreated when missing from DB.
-
-        Note: The lease_id is stored in the terminal, so when we delete
-        the lease but not the terminal, the terminal still references the
-        old lease_id. This is expected behavior - the lease_id is stable.
-        """
-        thread_id = "test-thread-15"
-
-        # Create session
-        capability = sandbox_manager.get_sandbox(thread_id)
-        lease_id = capability._session.lease.lease_id
-
-        # Delete lease from DB
-        lease_repo = SQLiteLeaseRepo(db_path=temp_db)
-        lease_repo.delete(lease_id)
-        lease_repo.close()
-
-        # Delete session AND terminal to force full recreation
-        sandbox_manager.session_manager.delete(capability._session.session_id)
-        terminal_store = SQLiteTerminalRepo(db_path=temp_db)
-        terminal_store.delete(capability._session.terminal.terminal_id)
-
-        # Get sandbox again - creates new terminal + lease
-        capability2 = sandbox_manager.get_sandbox(thread_id)
-
-        # Lease should exist in DB now
-        lease_repo2 = SQLiteLeaseRepo(db_path=temp_db)
-        lease2 = lease_repo2.get(capability2._session.lease.lease_id)
-        lease_repo2.close()
-        assert lease2 is not None
-
-
-# ── create_sandbox() factory tests ──────────────────────────────────────────
-
-from sandbox import LocalSandbox, create_sandbox  # noqa: E402
-from sandbox.config import SandboxConfig  # noqa: E402
-
-
-def test_create_sandbox_local():
-    sbx = create_sandbox(SandboxConfig(provider="local"), workspace_root="/tmp")
-    assert isinstance(sbx, LocalSandbox)
-    assert sbx.working_dir == "/tmp"
-
-
-def test_create_sandbox_agentbay_requires_api_key(monkeypatch):
-    monkeypatch.delenv("AGENTBAY_API_KEY", raising=False)
-    with pytest.raises(ValueError, match="AGENTBAY_API_KEY"):
-        create_sandbox(SandboxConfig(provider="agentbay"))
-
-
-def test_create_sandbox_e2b_requires_api_key(monkeypatch):
-    monkeypatch.delenv("E2B_API_KEY", raising=False)
-    with pytest.raises(ValueError, match="E2B_API_KEY"):
-        create_sandbox(SandboxConfig(provider="e2b"))
-
-
-def test_create_sandbox_daytona_requires_api_key(monkeypatch):
-    monkeypatch.delenv("DAYTONA_API_KEY", raising=False)
-    with pytest.raises(ValueError, match="DAYTONA_API_KEY"):
-        create_sandbox(SandboxConfig(provider="daytona"))
-
-
-def test_create_sandbox_unknown_provider():
-    with pytest.raises(ValueError, match="Unknown sandbox provider"):
-        create_sandbox(SandboxConfig(provider="bogus"))
diff --git a/tests/test_local_chat_session.py b/tests/test_local_chat_session.py
deleted file mode 100644
index 49b45fb9a..000000000
--- a/tests/test_local_chat_session.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""Tests for local sandbox using ChatSession architecture."""
-
-from __future__ import annotations
-
-# TODO: pre-existing: get_sandbox requires lease.volume_id
-import pytest
-
-pytest.skip("pre-existing: FakeProvider missing volume setup — needs test update", allow_module_level=True)
-
-from pathlib import Path
-
-import pytest
-
-from sandbox.base import LocalSandbox
-from sandbox.manager import lookup_sandbox_for_thread
-from sandbox.providers.local import LocalSessionProvider
-from sandbox.thread_context import set_current_thread_id
-
-
-@pytest.mark.asyncio
-async def test_local_chat_session_persistence_and_resume(tmp_path: Path):
-    workspace = tmp_path / "workspace"
-    workspace.mkdir(parents=True, exist_ok=True)
-    db_path = tmp_path / "sandbox.db"
-
-    thread_id = "local-thread-1"
-    sandbox = LocalSandbox(workspace_root=str(workspace), db_path=db_path)
-    set_current_thread_id(thread_id)
-    sandbox.ensure_session(thread_id)
-
-    shell = sandbox.shell()
-
-    first = await shell.execute("cd /tmp && export LEON_LOCAL_VAR=chat-session-ok && pwd")
-    assert first.exit_code == 0
-    assert "/tmp" in first.stdout
-
-    second = await shell.execute("pwd")
-    assert second.exit_code == 0
-    assert "/tmp" in second.stdout
-
-    third = await shell.execute("echo $LEON_LOCAL_VAR")
-    assert third.exit_code == 0
-    assert "chat-session-ok" in third.stdout
-
-    assert sandbox.pause_thread(thread_id)
-    assert lookup_sandbox_for_thread(thread_id, db_path=db_path) == "local"
-    assert sandbox.resume_thread(thread_id)
-
-    set_current_thread_id(thread_id)
-    resumed_pwd = await shell.execute("pwd")
-    assert resumed_pwd.exit_code == 0
-    assert "/tmp" in resumed_pwd.stdout
-
-    resumed_env = await shell.execute("echo $LEON_LOCAL_VAR")
-    assert resumed_env.exit_code == 0
-    assert "chat-session-ok" in resumed_env.stdout
-
-    sandbox.close()
-
-
-def test_local_provider_pause_resume_state_recovery():
-    provider = LocalSessionProvider()
-    session = provider.create_session(context_id="leon-lease-test-session")
-    sid = session.session_id
-    provider._session_states.clear()
-    assert provider.pause_session(sid)
-    assert provider.get_session_status(sid) == "paused"
-
-    provider._session_states.clear()
-    assert provider.resume_session(sid)
-    assert provider.get_session_status(sid) == "running"
-    assert not provider.pause_session("unknown-session-id")
diff --git a/tests/test_main_thread_flow.py b/tests/test_main_thread_flow.py
deleted file mode 100644
index e9c2afbd3..000000000
--- a/tests/test_main_thread_flow.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import pytest
-
-pytest.skip("pre-existing: thread_config and agent-member wiring broken — needs migration", allow_module_level=True)
-
-import asyncio
-import os
-from types import SimpleNamespace
-
-from backend.web.models.requests import CreateThreadRequest, ResolveMainThreadRequest
-from backend.web.routers import threads as threads_router
-from backend.web.services.auth_service import AuthService
-from storage.contracts import EntityRow
-from storage.providers.sqlite.entity_repo import SQLiteEntityRepo
-from storage.providers.sqlite.member_repo import SQLiteAccountRepo, SQLiteMemberRepo
-from storage.providers.sqlite.thread_repo import SQLiteThreadRepo
-
-
-def test_register_creates_agent_members_without_threads(tmp_path, monkeypatch):
-    db_path = tmp_path / "leon.db"
-    members_dir = tmp_path / "members"
-
-    import backend.web.services.member_service as member_service
-
-    monkeypatch.setattr(member_service, "MEMBERS_DIR", members_dir)
-    monkeypatch.setattr(member_service, "LEON_HOME", tmp_path)
-
-    member_repo = SQLiteMemberRepo(db_path)
-    account_repo = SQLiteAccountRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-    service = AuthService(
-        members=member_repo,
-        accounts=account_repo,
-        entities=entity_repo,
-    )
-
-    payload = service.register("fresh_user", "pass1234")
-    claims = service.verify_token(payload["token"])
-    account = account_repo.get_by_username("fresh_user")
-
-    owned_agents = member_repo.list_by_owner_user_id(payload["user"]["id"])
-    assert "member_id" not in claims
-    assert claims["user_id"] == payload["user"]["id"]
-    assert payload["user"]["name"] == "fresh_user"
-    assert account is not None
-    assert account.user_id == payload["user"]["id"]
-    assert len(owned_agents) == 2
-    assert [agent.name for agent in owned_agents] == ["Toad", "Morel"]
-    for agent in owned_agents:
-        assert thread_repo.list_by_member(agent.id) == []
-        assert entity_repo.get_by_member_id(agent.id) == []
-
-
-def test_first_explicit_thread_becomes_main_then_followups_are_children(tmp_path):
-    db_path = tmp_path / "leon.db"
-
-    member_repo = SQLiteMemberRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-
-    from storage.contracts import MemberRow, MemberType
-
-    member_repo.create(
-        MemberRow(
-            id="owner-1",
-            name="owner",
-            type=MemberType.HUMAN,
-            created_at=1.0,
-        )
-    )
-    member_repo.create(
-        MemberRow(
-            id="member-1",
-            name="Template Agent",
-            type=MemberType.MYCEL_AGENT,
-            owner_user_id="owner-1",
-            created_at=2.0,
-        )
-    )
-
-    app = SimpleNamespace(
-        state=SimpleNamespace(
-            member_repo=member_repo,
-            entity_repo=entity_repo,
-            thread_repo=thread_repo,
-            thread_sandbox={},
-            thread_cwd={},
-        )
-    )
-
-    first = threads_router._create_owned_thread(
-        app,
-        "owner-1",
-        CreateThreadRequest(member_id="member-1", sandbox="local"),
-        is_main=False,
-    )
-    second = threads_router._create_owned_thread(
-        app,
-        "owner-1",
-        CreateThreadRequest(member_id="member-1", sandbox="local"),
-        is_main=False,
-    )
-
-    assert first["is_main"] is True
-    assert first["branch_index"] == 0
-    assert first["entity_name"] == "Template Agent"
-    assert second["is_main"] is False
-    assert second["branch_index"] == 1
-    assert second["entity_name"] == "Template Agent · 分身1"
-    assert thread_repo.get_main_thread("member-1")["id"] == first["thread_id"]
-
-
-def test_member_rename_recomputes_agent_entity_names(tmp_path, monkeypatch):
-    db_path = tmp_path / "leon.db"
-    members_dir = tmp_path / "members"
-    members_dir.mkdir(parents=True)
-    os.environ["LEON_DB_PATH"] = str(db_path)
-
-    import backend.web.services.member_service as member_service
-
-    monkeypatch.setattr(member_service, "MEMBERS_DIR", members_dir)
-    monkeypatch.setattr(member_service, "LEON_HOME", tmp_path)
-
-    member_repo = SQLiteMemberRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-
-    from storage.contracts import MemberRow, MemberType
-
-    member_repo.create(
-        MemberRow(
-            id="owner-1",
-            name="owner",
-            type=MemberType.HUMAN,
-            created_at=1.0,
-        )
-    )
-    member_repo.create(
-        MemberRow(
-            id="member-1",
-            name="Toad",
-            type=MemberType.MYCEL_AGENT,
-            owner_user_id="owner-1",
-            created_at=2.0,
-        )
-    )
-
-    member_dir = members_dir / "member-1"
-    member_dir.mkdir()
-    (member_dir / "agent.md").write_text("---\nname: Toad\n---\n\n", encoding="utf-8")
-    (member_dir / "meta.json").write_text("{}", encoding="utf-8")
-
-    thread_repo.create(
-        thread_id="member-1-1",
-        member_id="member-1",
-        sandbox_type="local",
-        created_at=3.0,
-        is_main=True,
-        branch_index=0,
-    )
-    thread_repo.create(
-        thread_id="member-1-2",
-        member_id="member-1",
-        sandbox_type="local",
-        created_at=4.0,
-        is_main=False,
-        branch_index=1,
-    )
-    entity_repo.create(
-        EntityRow(
-            id="member-1-1",
-            type="agent",
-            member_id="member-1",
-            name="Toad",
-            thread_id="member-1-1",
-            created_at=3.0,
-        )
-    )
-    entity_repo.create(
-        EntityRow(
-            id="member-1-2",
-            type="agent",
-            member_id="member-1",
-            name="Toad · 分身1",
-            thread_id="member-1-2",
-            created_at=4.0,
-        )
-    )
-
-    updated = member_service.update_member("member-1", name="Scout")
-
-    refreshed_entities = sorted(entity_repo.get_by_member_id("member-1"), key=lambda entity: entity.thread_id or "")
-    assert updated is not None
-    assert updated["name"] == "Scout"
-    assert [entity.name for entity in refreshed_entities] == ["Scout", "Scout · 分身1"]
-
-
-def test_resolve_main_thread_returns_null_when_member_has_no_main(tmp_path):
-    db_path = tmp_path / "leon.db"
-
-    member_repo = SQLiteMemberRepo(db_path)
-    entity_repo = SQLiteEntityRepo(db_path)
-    thread_repo = SQLiteThreadRepo(db_path)
-
-    from storage.contracts import MemberRow, MemberType
-
-    member_repo.create(
-        MemberRow(
-            id="owner-1",
-            name="owner",
-            type=MemberType.HUMAN,
-            created_at=1.0,
-        )
-    )
-    member_repo.create(
-        MemberRow(
-            id="member-1",
-            name="Template Agent",
-            type=MemberType.MYCEL_AGENT,
-            owner_user_id="owner-1",
-            created_at=2.0,
-        )
-    )
-
-    app = SimpleNamespace(
-        state=SimpleNamespace(
-            member_repo=member_repo,
-            entity_repo=entity_repo,
-            thread_repo=thread_repo,
-            thread_sandbox={},
-            thread_cwd={},
-        )
-    )
-
-    result = asyncio.run(
-        threads_router.resolve_main_thread(
-            ResolveMainThreadRequest(member_id="member-1"),
-            "owner-1",
-            app,
-        )
-    )
-
-    assert result == {"thread": None}
diff --git a/tests/test_manager_ground_truth.py b/tests/test_manager_ground_truth.py
deleted file mode 100644
index 59027d277..000000000
--- a/tests/test_manager_ground_truth.py
+++ /dev/null
@@ -1,303 +0,0 @@
-"""Tests for SandboxManager inspect ground-truth behavior."""
-
-import asyncio
-import sqlite3
-import tempfile
-import uuid
-from datetime import datetime, timedelta
-from pathlib import Path
-
-import pytest
-
-from sandbox.manager import SandboxManager
-from sandbox.provider import Metrics, ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-from storage import StorageContainer
-from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
-from storage.providers.sqlite.eval_repo import SQLiteEvalRepo
-from storage.providers.supabase.checkpoint_repo import SupabaseCheckpointRepo
-from storage.providers.supabase.eval_repo import SupabaseEvalRepo
-from storage.providers.supabase.file_operation_repo import SupabaseFileOperationRepo
-from storage.providers.supabase.run_event_repo import SupabaseRunEventRepo
-from storage.providers.supabase.summary_repo import SupabaseSummaryRepo
-
-
-class FakeProvider(SandboxProvider):
-    name = "fake"
-
-    def __init__(self):
-        self._statuses: dict[str, str] = {}
-        self.fail_pause = False
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            supports_webhook=False,
-        )
-
-    def create_session(self, context_id: str | None = None, thread_id: str | None = None) -> SessionInfo:
-        sid = f"s-{uuid.uuid4().hex[:8]}"
-        self._statuses[sid] = "running"
-        return SessionInfo(session_id=sid, provider=self.name, status="running")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        self._statuses.pop(session_id, None)
-        return True
-
-    def pause_session(self, session_id: str) -> bool:
-        if self.fail_pause:
-            return False
-        if session_id in self._statuses:
-            self._statuses[session_id] = "paused"
-            return True
-        return False
-
-    def resume_session(self, session_id: str) -> bool:
-        if session_id in self._statuses:
-            self._statuses[session_id] = "running"
-            return True
-        return False
-
-    def get_session_status(self, session_id: str) -> str:
-        return self._statuses.get(session_id, "deleted")
-
-    def execute(
-        self,
-        session_id: str,
-        command: str,
-        timeout_ms: int = 30000,
-        cwd: str | None = None,
-    ) -> ProviderExecResult:
-        return ProviderExecResult(output="", exit_code=0, error=None)
-
-    def read_file(self, session_id: str, path: str) -> str:
-        return ""
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        return "ok"
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        return []
-
-    def get_metrics(self, session_id: str) -> Metrics | None:
-        return None
-
-    def list_provider_sessions(self) -> list[SessionInfo]:
-        return [SessionInfo(session_id=sid, provider=self.name, status=status) for sid, status in self._statuses.items()]
-
-    def create_runtime(self, terminal, lease):
-        from sandbox.runtime import RemoteWrappedRuntime
-
-        return RemoteWrappedRuntime(terminal, lease, self)
-
-
-class _FakeSupabaseClient:
-    def table(self, table_name: str):
-        raise AssertionError(f"table() should not be called in this container wiring test: {table_name}")
-
-
-def _temp_db() -> Path:
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        return Path(f.name)
-
-
-@pytest.mark.skip(reason="pre-existing: get_sandbox requires lease.volume_id — FakeProvider needs update")
-def test_list_sessions_shows_running_lease_without_chat_session() -> None:
-    db = _temp_db()
-    try:
-        provider = FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-        lease = mgr.lease_store.create("lease-1", provider.name)
-        instance = lease.ensure_active_instance(provider)
-        mgr.terminal_store.create("term-1", "thread-1", "lease-1", "/home/user")
-
-        rows = mgr.list_sessions()
-        assert rows
-        row = rows[0]
-        assert row["thread_id"] == "thread-1"
-        assert row["instance_id"] == instance.instance_id
-        assert row["status"] == "running"
-        assert row["source"] == "lease"
-    finally:
-        db.unlink(missing_ok=True)
-
-
-def test_list_sessions_includes_provider_orphan(temp_db) -> None:
-    provider = FakeProvider()
-    mgr = SandboxManager(provider=provider, db_path=temp_db)
-    orphan = provider.create_session()
-    rows = mgr.list_sessions()
-    assert any(r["instance_id"] == orphan.session_id and r["source"] == "provider_orphan" for r in rows)
-
-
-@pytest.mark.skip(reason="pre-existing: get_sandbox requires lease.volume_id — FakeProvider needs update")
-def test_enforce_idle_timeouts_pauses_lease_and_closes_session() -> None:
-    db = _temp_db()
-    try:
-        provider = FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-
-        capability = mgr.get_sandbox("thread-1")
-        asyncio.run(capability.command.execute("echo hi"))
-        session_id = capability._session.session_id
-        instance_id = capability._session.lease.get_instance().instance_id
-
-        with sqlite3.connect(str(db)) as conn:
-            conn.execute(
-                """
-                UPDATE chat_sessions
-                SET idle_ttl_sec = 1, last_active_at = ?
-                WHERE chat_session_id = ?
-                """,
-                ((datetime.now() - timedelta(seconds=5)).isoformat(), session_id),
-            )
-            conn.commit()
-
-        count = mgr.enforce_idle_timeouts()
-        assert count == 1
-        assert provider.get_session_status(instance_id) == "paused"
-        assert mgr.session_manager.get("thread-1") is None
-    finally:
-        db.unlink(missing_ok=True)
-
-
-@pytest.mark.skip(reason="pre-existing: get_sandbox requires lease.volume_id — FakeProvider needs update")
-def test_enforce_idle_timeouts_continues_on_pause_failure() -> None:
-    db = _temp_db()
-    try:
-        provider = FakeProvider()
-        mgr = SandboxManager(provider=provider, db_path=db)
-
-        capability = mgr.get_sandbox("thread-1")
-        asyncio.run(capability.command.execute("echo hi"))
-        session_id = capability._session.session_id
-
-        with sqlite3.connect(str(db)) as conn:
-            conn.execute(
-                """
-                UPDATE chat_sessions
-                SET idle_ttl_sec = 1, last_active_at = ?
-                WHERE chat_session_id = ?
-                """,
-                ((datetime.now() - timedelta(seconds=5)).isoformat(), session_id),
-            )
-            conn.commit()
-
-        provider.fail_pause = True
-        count = mgr.enforce_idle_timeouts()
-        assert count == 0
-        assert mgr.session_manager.get("thread-1") is not None
-    finally:
-        db.unlink(missing_ok=True)
-
-
-def test_storage_container_sqlite_strategy_is_non_regression(temp_db) -> None:
-    container = StorageContainer(main_db_path=temp_db, strategy="sqlite")
-    repo = container.checkpoint_repo()
-    assert isinstance(repo, SQLiteCheckpointRepo)
-
-
-def test_storage_container_supabase_repos_are_concrete() -> None:
-    fake_client = _FakeSupabaseClient()
-    container = StorageContainer(strategy="supabase", supabase_client=fake_client)
-    checkpoint_repo = container.checkpoint_repo()
-    assert isinstance(checkpoint_repo, SupabaseCheckpointRepo)
-    run_event_repo = container.run_event_repo()
-    assert isinstance(run_event_repo, SupabaseRunEventRepo)
-    file_operation_repo = container.file_operation_repo()
-    assert isinstance(file_operation_repo, SupabaseFileOperationRepo)
-    summary_repo = container.summary_repo()
-    assert isinstance(summary_repo, SupabaseSummaryRepo)
-    eval_repo = container.eval_repo()
-    assert isinstance(eval_repo, SupabaseEvalRepo)
-
-
-def test_storage_container_repo_level_provider_override_from_sqlite_default() -> None:
-    fake_client = _FakeSupabaseClient()
-    container = StorageContainer(
-        strategy="sqlite",
-        repo_providers={"checkpoint_repo": "supabase"},
-        supabase_client=fake_client,
-    )
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_storage_container_repo_level_provider_override_from_supabase_default() -> None:
-    fake_client = _FakeSupabaseClient()
-    container = StorageContainer(
-        strategy="supabase",
-        repo_providers={"eval_repo": "sqlite"},
-        supabase_client=fake_client,
-    )
-    assert isinstance(container.eval_repo(), SQLiteEvalRepo)
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_storage_container_supabase_checkpoint_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy checkpoint_repo requires supabase_client",
-    ):
-        container.checkpoint_repo()
-
-
-def test_storage_container_supabase_run_event_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy run_event_repo requires supabase_client",
-    ):
-        container.run_event_repo()
-
-
-def test_storage_container_supabase_file_operation_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy file_operation_repo requires supabase_client",
-    ):
-        container.file_operation_repo()
-
-
-def test_storage_container_supabase_summary_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy summary_repo requires supabase_client",
-    ):
-        container.summary_repo()
-
-
-def test_storage_container_supabase_eval_requires_client() -> None:
-    container = StorageContainer(strategy="supabase")
-    with pytest.raises(
-        RuntimeError,
-        match="Supabase strategy eval_repo requires supabase_client",
-    ):
-        container.eval_repo()
-
-
-def test_storage_container_rejects_unknown_strategy() -> None:
-    with pytest.raises(
-        ValueError,
-        match="Unsupported storage strategy: redis. Supported strategies: sqlite, supabase",
-    ):
-        StorageContainer(strategy="redis")  # type: ignore[arg-type]
-
-
-def test_storage_container_rejects_unknown_repo_provider_binding() -> None:
-    with pytest.raises(
-        ValueError,
-        match="Unknown repo provider bindings: foo_repo",
-    ):
-        StorageContainer(repo_providers={"foo_repo": "sqlite"})
-
-
-def test_storage_container_rejects_invalid_repo_provider_value() -> None:
-    with pytest.raises(
-        ValueError,
-        match="Unsupported provider for checkpoint_repo",
-    ):
-        StorageContainer(repo_providers={"checkpoint_repo": "mysql"})
diff --git a/tests/test_monitor_core_overview.py b/tests/test_monitor_core_overview.py
deleted file mode 100644
index d80ace417..000000000
--- a/tests/test_monitor_core_overview.py
+++ /dev/null
@@ -1,415 +0,0 @@
-import pytest
-
-pytest.skip("pre-existing: monitor/resource_service API mismatch — needs test update", allow_module_level=True)
-
-import json
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from backend.web.services import resource_service
-from sandbox.provider import ProviderCapability, build_resource_capabilities
-
-
-def _write_provider_config(tmp_path: Path, instance_name: str, payload: dict) -> None:
-    (tmp_path / f"{instance_name}.json").write_text(json.dumps(payload))
-
-
-def _make_fake_thread_config_repo(agent_by_thread: dict[str, str]):
-    """Fake ThreadConfigRepo backed by a simple dict — works for both SQLite and Supabase code paths."""
-    repo = MagicMock()
-    repo.lookup_config.side_effect = lambda tid: (
-        {
-            "sandbox_type": "local",
-            "cwd": None,
-            "model": None,
-            "queue_mode": None,
-            "observation_provider": None,
-            "agent": agent_by_thread[tid],
-        }
-        if tid in agent_by_thread
-        else None
-    )
-    repo.close.return_value = None
-    return repo
-
-
-def _make_fake_repo(sessions: list[dict]):
-    """Create a mock repo that returns pre-canned sessions."""
-    repo = MagicMock()
-    repo.list_sessions_with_leases.return_value = sessions
-    repo.close.return_value = None
-    return repo
-
-
-def _patch_resources_context(
-    monkeypatch,
-    *,
-    tmp_path: Path,
-    providers: list[dict],
-    sessions: list[dict],
-    snapshots: dict | None = None,
-) -> None:
-    monkeypatch.setattr(resource_service, "SANDBOXES_DIR", tmp_path)
-    monkeypatch.setattr(resource_service, "available_sandbox_types", lambda: providers)
-    monkeypatch.setattr(
-        resource_service,
-        "SQLiteSandboxMonitorRepo",
-        lambda: _make_fake_repo(sessions),
-    )
-    capability_by_provider = {
-        "local": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=False,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=False,
-            snapshot=False,
-        ),
-        "docker": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=True,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=False,
-            snapshot=False,
-        ),
-        "e2b": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=False,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=False,
-            snapshot=True,
-        ),
-        "daytona": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=False,
-            screenshot=False,
-            web=False,
-            process=False,
-            hooks=True,
-            snapshot=False,
-        ),
-        "agentbay": build_resource_capabilities(
-            filesystem=True,
-            terminal=True,
-            metrics=True,
-            screenshot=True,
-            web=True,
-            process=True,
-            hooks=False,
-            snapshot=False,
-        ),
-    }
-
-    def _fake_provider_builder(config_name: str, *, sandboxes_dir: Path | None = None):
-        provider_name = resource_service.resolve_provider_name(
-            config_name,
-            sandboxes_dir=sandboxes_dir or tmp_path,
-        )
-        resource_capabilities = capability_by_provider.get(provider_name)
-        if resource_capabilities is None:
-            return None
-
-        class _FakeProvider:
-            def get_capability(self) -> ProviderCapability:
-                return ProviderCapability(
-                    can_pause=True,
-                    can_resume=True,
-                    can_destroy=True,
-                    resource_capabilities=resource_capabilities,
-                )
-
-        return _FakeProvider()
-
-    monkeypatch.setattr(resource_service, "build_provider_from_config_name", _fake_provider_builder)
-    if snapshots is not None:
-        monkeypatch.setattr(resource_service, "list_snapshots_by_lease_ids", lambda _: snapshots)
-
-
-def test_list_resource_providers_maps_status_and_metric_metadata(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "docker_dev", {"provider": "docker"})
-
-    monkeypatch.setattr(
-        resource_service,
-        "_make_thread_config_repo",
-        lambda: _make_fake_thread_config_repo({"thread-local-1": "member-1"}),
-    )
-    monkeypatch.setattr(resource_service, "_member_name_map", lambda: {"member-1": "Alice"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[
-            {"name": "local", "available": True},
-            {"name": "docker_dev", "available": False, "reason": "docker daemon down"},
-        ],
-        sessions=[
-            {
-                "provider": "local",
-                "session_id": "sess-local-1",
-                "thread_id": "thread-local-1",
-                "observed_state": "detached",
-                "desired_state": "running",
-                "created_at": "2026-03-03T00:00:00",
-            },
-            {
-                "provider": "docker_dev",
-                "session_id": "sess-docker-1",
-                "thread_id": "thread-docker-1",
-                "observed_state": "paused",
-                "desired_state": "paused",
-                "created_at": "2026-03-03T00:00:00",
-            },
-        ],
-    )
-
-    payload = resource_service.list_resource_providers()
-    assert "summary" in payload
-    assert "providers" in payload
-    assert payload["summary"]["total_providers"] == 2
-    assert payload["summary"]["active_providers"] == 1
-    assert payload["summary"]["unavailable_providers"] == 1
-    assert payload["summary"]["running_sessions"] == 1
-
-    local = next(item for item in payload["providers"] if item["id"] == "local")
-    assert local["status"] == "active"
-    assert local["telemetry"]["running"]["used"] == 1
-    assert local["telemetry"]["running"]["source"] == "sandbox_db"
-    assert local["telemetry"]["running"]["freshness"] == "cached"
-    assert local["sessions"][0]["threadId"] == "thread-local-1"
-    assert local["sessions"][0]["agentId"] == "member-1"
-    assert local["sessions"][0]["agentName"] == "Alice"
-
-    docker = next(item for item in payload["providers"] if item["id"] == "docker_dev")
-    assert docker["status"] == "unavailable"
-    assert docker["error"]["code"] == "PROVIDER_UNAVAILABLE"
-    assert docker["sessions"][0]["status"] == "paused"
-    assert docker["sessions"][0]["agentName"] == "未绑定Agent"
-
-
-def test_list_resource_providers_marks_ready_when_no_running_sessions(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "e2b_test", {"provider": "e2b"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "e2b_test", "available": True}],
-        sessions=[],
-    )
-
-    payload = resource_service.list_resource_providers()
-    assert len(payload["providers"]) == 1
-    assert payload["summary"]["active_providers"] == 0
-    assert payload["summary"]["running_sessions"] == 0
-
-    e2b = payload["providers"][0]
-    assert e2b["id"] == "e2b_test"
-    assert e2b["status"] == "ready"
-    assert e2b["telemetry"]["running"]["used"] == 0
-    assert e2b["telemetry"]["cpu"]["freshness"] == "stale"
-    assert e2b["cardCpu"]["used"] is None
-    assert e2b["cardCpu"]["limit"] is None
-    assert e2b["cardCpu"]["error"] is not None
-
-
-def test_list_resource_providers_prefers_config_console_url_override(tmp_path, monkeypatch):
-    _write_provider_config(
-        tmp_path,
-        "daytona_selfhost",
-        {
-            "provider": "daytona",
-            "console_url": "https://ops.example.com/daytona",
-            "daytona": {"target": "local", "api_url": "https://daytona.example.com/api"},
-        },
-    )
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "daytona_selfhost", "available": True}],
-        sessions=[],
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["id"] == "daytona_selfhost"
-    assert provider["consoleUrl"] == "https://ops.example.com/daytona"
-    assert provider["type"] == "container"
-
-
-def test_list_resource_providers_uses_snapshot_metrics(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "agentbay_prod", {"provider": "agentbay"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "agentbay_prod", "available": True}],
-        sessions=[
-            {
-                "provider": "agentbay_prod",
-                "session_id": "sess-1",
-                "thread_id": "thread-1",
-                "lease_id": "lease-1",
-                "status": "running",
-                "created_at": "2026-03-03T00:00:00",
-            }
-        ],
-        snapshots={
-            "lease-1": {
-                "lease_id": "lease-1",
-                "cpu_used": 21.0,
-                "cpu_limit": 100.0,
-                "memory_used_mb": 1024.0,
-                "memory_total_mb": 4096.0,
-                "disk_used_gb": 4.0,
-                "disk_total_gb": 20.0,
-                "collected_at": "2099-01-01T00:00:00Z",
-            }
-        },
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["telemetry"]["cpu"]["used"] == 21.0
-    assert provider["telemetry"]["cpu"]["limit"] == 100.0
-    assert provider["telemetry"]["memory"]["used"] == 1.0
-    assert provider["telemetry"]["memory"]["limit"] == 4.0
-    assert provider["telemetry"]["disk"]["used"] == 4.0
-    assert provider["telemetry"]["disk"]["limit"] == 20.0
-    assert provider["telemetry"]["cpu"]["source"] == "api"
-
-
-def test_list_resource_providers_surfaces_snapshot_probe_error(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "daytona_cloud", {"provider": "daytona"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "daytona_cloud", "available": True}],
-        sessions=[
-            {
-                "provider": "daytona_cloud",
-                "session_id": "sess-1",
-                "thread_id": "thread-1",
-                "lease_id": "lease-1",
-                "status": "paused",
-                "created_at": "2026-03-03T00:00:00",
-            }
-        ],
-        snapshots={
-            "lease-1": {
-                "lease_id": "lease-1",
-                "cpu_used": None,
-                "cpu_limit": None,
-                "memory_used_mb": None,
-                "memory_total_mb": None,
-                "disk_used_gb": None,
-                "disk_total_gb": None,
-                "probe_error": "metrics unavailable",
-                "collected_at": "2099-01-01T00:00:00Z",
-            }
-        },
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["telemetry"]["cpu"]["used"] is None
-    assert provider["telemetry"]["cpu"]["source"] == "sandbox_db"
-    assert provider["telemetry"]["cpu"]["error"] == "metrics unavailable"
-    assert provider["telemetry"]["memory"]["error"] == "metrics unavailable"
-    assert provider["telemetry"]["disk"]["error"] == "metrics unavailable"
-
-
-def test_thread_owner_uses_agent_ref_as_name_when_member_lookup_missing(monkeypatch):
-    monkeypatch.setattr(
-        resource_service,
-        "_make_thread_config_repo",
-        lambda: _make_fake_thread_config_repo({"thread-1": "Lex"}),
-    )
-    monkeypatch.setattr(resource_service, "_member_name_map", lambda: {})
-
-    owners = resource_service._thread_owners(["thread-1", "thread-2"])
-    assert owners["thread-1"]["agent_id"] == "Lex"
-    assert owners["thread-1"]["agent_name"] == "Lex"
-    assert owners["thread-2"]["agent_id"] is None
-    assert owners["thread-2"]["agent_name"] == "未绑定Agent"
-
-
-def test_thread_owner_works_with_supabase_backed_thread_config(monkeypatch):
-    """Thread config lookup routes through ThreadConfigRepo abstraction,
-    so it works identically whether the backing store is SQLite or Supabase."""
-
-    class _FakeSupabaseThreadConfigRepo:
-        """Mimics SupabaseThreadConfigRepo interface without a real Supabase connection."""
-
-        def __init__(self):
-            self._data = {"thread-supabase-1": "agent-uuid-abc"}
-
-        def lookup_config(self, thread_id: str):
-            agent = self._data.get(thread_id)
-            return (
-                {
-                    "sandbox_type": "local",
-                    "cwd": None,
-                    "model": None,
-                    "queue_mode": None,
-                    "observation_provider": None,
-                    "agent": agent,
-                }
-                if agent
-                else None
-            )
-
-        def close(self):
-            pass
-
-    monkeypatch.setattr(resource_service, "_make_thread_config_repo", _FakeSupabaseThreadConfigRepo)
-    monkeypatch.setattr(resource_service, "_member_name_map", lambda: {"agent-uuid-abc": "Bob"})
-
-    owners = resource_service._thread_owners(["thread-supabase-1", "thread-missing"])
-    assert owners["thread-supabase-1"]["agent_id"] == "agent-uuid-abc"
-    assert owners["thread-supabase-1"]["agent_name"] == "Bob"
-    assert owners["thread-missing"]["agent_id"] is None
-    assert owners["thread-missing"]["agent_name"] == "未绑定Agent"
-
-
-def test_list_resource_providers_uses_instance_capability_single_source(tmp_path, monkeypatch):
-    _write_provider_config(tmp_path, "agentbay_prod", {"provider": "agentbay"})
-    _patch_resources_context(
-        monkeypatch,
-        tmp_path=tmp_path,
-        providers=[{"name": "agentbay_prod", "available": True}],
-        sessions=[],
-    )
-
-    class _InstanceOverrideProvider:
-        def get_capability(self) -> ProviderCapability:
-            return ProviderCapability(
-                can_pause=False,
-                can_resume=False,
-                can_destroy=True,
-                resource_capabilities=build_resource_capabilities(
-                    filesystem=True,
-                    terminal=True,
-                    metrics=False,
-                    screenshot=False,
-                    web=False,
-                    process=False,
-                    hooks=False,
-                    snapshot=False,
-                ),
-            )
-
-    monkeypatch.setattr(
-        resource_service,
-        "build_provider_from_config_name",
-        lambda _name, **_kwargs: _InstanceOverrideProvider(),
-    )
-
-    payload = resource_service.list_resource_providers()
-    provider = payload["providers"][0]
-    assert provider["capabilities"]["metrics"] is False
-    assert provider["capabilities"]["web"] is False
diff --git a/tests/test_mount_pluggable.py b/tests/test_mount_pluggable.py
deleted file mode 100644
index b9bcdd049..000000000
--- a/tests/test_mount_pluggable.py
+++ /dev/null
@@ -1,212 +0,0 @@
-"""Mount contract tests for pluggable multi-folder mounts."""
-
-from __future__ import annotations
-
-# TODO: pre-existing failures — provider capability API changed
-import pytest
-
-pytest.skip("pre-existing: provider capability API mismatch — needs test update", allow_module_level=True)
-
-import subprocess
-import sys
-import types
-from pathlib import Path
-
-import pytest
-
-
-def test_mount_spec_defaults_to_mount_mode() -> None:
-    from sandbox.config import MountSpec
-
-    mount = MountSpec.model_validate({"source": "/host/x", "target": "/sandbox/x"})
-    assert mount.mode == "mount"
-
-
-def test_create_thread_request_parses_bind_mounts_with_legacy_keys() -> None:
-    from backend.web.models.requests import CreateThreadRequest
-
-    payload = CreateThreadRequest.model_validate(
-        {
-            "sandbox": "local",
-            "bind_mounts": [
-                {"source": "/host/tasks", "target": "/sandbox/tasks", "mode": "mount", "read_only": False},
-                {"host_path": "/host/docs", "mount_path": "/sandbox/docs", "mode": "copy", "read_only": True},
-            ],
-        }
-    )
-
-    assert len(payload.bind_mounts) == 2
-    assert payload.bind_mounts[0].source == "/host/tasks"
-    assert payload.bind_mounts[0].target == "/sandbox/tasks"
-    assert payload.bind_mounts[1].source == "/host/docs"
-    assert payload.bind_mounts[1].target == "/sandbox/docs"
-    assert payload.bind_mounts[1].mode == "copy"
-    assert payload.bind_mounts[1].read_only is True
-
-
-def test_mount_capability_gate_detects_mismatch() -> None:
-    from backend.web.routers.threads import _find_mount_capability_mismatch
-    from sandbox.config import MountSpec
-    from sandbox.provider import MountCapability
-
-    requested = [MountSpec.model_validate({"source": "/host/a", "target": "/sandbox/a", "mode": "copy"})]
-    mismatch = _find_mount_capability_mismatch(
-        requested_mounts=requested,
-        mount_capability=MountCapability(supports_mount=True, supports_copy=False, supports_read_only=False),
-    )
-
-    assert mismatch is not None
-    assert mismatch["requested"] == {"mode": "copy", "read_only": False}
-    assert mismatch["capability"]["supports_copy"] is False
-
-
-def test_mount_capability_gate_accepts_supported_combo() -> None:
-    from backend.web.routers.threads import _find_mount_capability_mismatch
-    from sandbox.config import MountSpec
-    from sandbox.provider import MountCapability
-
-    requested = [
-        MountSpec.model_validate({"source": "/host/a", "target": "/sandbox/a", "mode": "mount", "read_only": True}),
-        MountSpec.model_validate({"source": "/host/b", "target": "/sandbox/b", "mode": "copy", "read_only": False}),
-    ]
-    mismatch = _find_mount_capability_mismatch(
-        requested_mounts=requested,
-        mount_capability=MountCapability(supports_mount=True, supports_copy=True, supports_read_only=True),
-    )
-    assert mismatch is None
-
-
-def test_mount_capability_gate_respects_mode_handlers() -> None:
-    from backend.web.routers.threads import _find_mount_capability_mismatch
-    from sandbox.config import MountSpec
-    from sandbox.provider import MountCapability
-
-    requested = [MountSpec.model_validate({"source": "/host/a", "target": "/sandbox/a", "mode": "copy"})]
-    mismatch = _find_mount_capability_mismatch(
-        requested_mounts=requested,
-        mount_capability=MountCapability(
-            supports_mount=True,
-            supports_copy=True,
-            supports_read_only=True,
-            mode_handlers={"mount": True, "copy": False},
-        ),
-    )
-
-    assert mismatch is not None
-    assert mismatch["requested"] == {"mode": "copy", "read_only": False}
-    assert mismatch["capability"]["mode_handlers"]["copy"] is False
-
-
-def test_docker_provider_supports_multiple_bind_mount_modes(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-    from sandbox.providers.docker import DockerProvider
-
-    copy_source = tmp_path / "bootstrap"
-    copy_source.mkdir(parents=True, exist_ok=True)
-    (copy_source / "seed.txt").write_text("hello")
-
-    provider = DockerProvider(
-        image="python:3.12-slim",
-        mount_path="/workspace",
-        default_cwd="/home/leon",
-        bind_mounts=[
-            {"source": "/host/tasks", "target": "/home/leon/shared/tasks", "mode": "mount", "read_only": False},
-            {"source": "/host/docs", "target": "/home/leon/shared/docs", "mode": "mount", "read_only": True},
-            {"source": str(copy_source), "target": "/home/leon/bootstrap", "mode": "copy", "read_only": False},
-            {
-                "host_path": "/host/issues",
-                "mount_path": "/home/leon/shared/issues",
-                "mode": "mount",
-                "read_only": False,
-            },
-        ],
-    )
-
-    calls: list[list[str]] = []
-
-    def fake_run(cmd: list[str], **_: object) -> subprocess.CompletedProcess[str]:
-        calls.append(cmd)
-        return subprocess.CompletedProcess(cmd, 0, stdout="container-123\n", stderr="")
-
-    monkeypatch.setattr(provider, "_run", fake_run)
-
-    session = provider.create_session(context_id="ctx-volume")
-    assert session.status == "running"
-
-    run_cmd = calls[0]
-    volume_specs = [run_cmd[i + 1] for i, token in enumerate(run_cmd) if token == "-v"]
-    assert "/host/tasks:/home/leon/shared/tasks" in volume_specs
-    assert "/host/docs:/home/leon/shared/docs:ro" in volume_specs
-    assert "/host/issues:/home/leon/shared/issues" in volume_specs
-    assert "ctx-volume:/workspace" in volume_specs
-    assert all(str(copy_source) not in spec for spec in volume_specs)
-
-    serialized_calls = [" ".join(cmd) for cmd in calls]
-    assert any("docker cp" in cmd and "bootstrap/." in cmd and "container-123:/home/leon/bootstrap" in cmd for cmd in serialized_calls)
-
-
-def test_daytona_provider_maps_multiple_mounts_to_http_payload(monkeypatch: pytest.MonkeyPatch) -> None:
-    captured: dict[str, object] = {}
-
-    class FakeDaytona:
-        def __init__(self) -> None:
-            pass
-
-    fake_sdk = types.SimpleNamespace(Daytona=FakeDaytona)
-    monkeypatch.setitem(sys.modules, "daytona_sdk", fake_sdk)
-
-    import sandbox.providers.daytona as daytona_module
-    from sandbox.providers.daytona import DaytonaProvider
-
-    class FakeResponse:
-        def __init__(self, status_code: int, payload: dict[str, object]) -> None:
-            self.status_code = status_code
-            self._payload = payload
-            self.text = str(payload)
-
-        def json(self) -> dict[str, object]:
-            return self._payload
-
-    class FakeClient:
-        def __init__(self, timeout: float) -> None:
-            self.timeout = timeout
-
-        def __enter__(self) -> FakeClient:
-            return self
-
-        def __exit__(self, exc_type, exc, tb) -> None:
-            return None
-
-        def post(self, url: str, headers: dict[str, str], json: dict[str, object]) -> FakeResponse:
-            captured["url"] = url
-            captured["headers"] = headers
-            captured["json"] = json
-            return FakeResponse(200, {"id": "sb-123"})
-
-    monkeypatch.setattr(daytona_module.httpx, "Client", FakeClient)
-
-    provider = DaytonaProvider(
-        api_key="token-1",
-        api_url="http://127.0.0.1:3000/api",
-        bind_mounts=[
-            {"source": "/host/tasks", "target": "/home/daytona/shared/tasks", "mode": "mount", "read_only": False},
-            {"source": "/host/docs", "target": "/home/daytona/shared/docs", "mode": "mount", "read_only": True},
-            {"source": "/host/bootstrap", "target": "/home/daytona/bootstrap", "mode": "copy", "read_only": False},
-            {
-                "host_path": "/host/issues",
-                "mount_path": "/home/daytona/shared/issues",
-                "mode": "mount",
-                "read_only": False,
-            },
-        ],
-    )
-
-    sandbox_id = provider._create_via_http(provider.bind_mounts)
-    assert sandbox_id == "sb-123"
-
-    payload = captured["json"]
-    assert isinstance(payload, dict)
-    assert payload.get("bindMounts") == [
-        {"hostPath": "/host/tasks", "mountPath": "/home/daytona/shared/tasks", "readOnly": False},
-        {"hostPath": "/host/docs", "mountPath": "/home/daytona/shared/docs", "readOnly": True},
-        {"hostPath": "/host/issues", "mountPath": "/home/daytona/shared/issues", "readOnly": False},
-    ]
diff --git a/tests/test_remote_sandbox.py b/tests/test_remote_sandbox.py
deleted file mode 100644
index c0a48e22a..000000000
--- a/tests/test_remote_sandbox.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""Unit tests for RemoteSandbox._run_init_commands and RemoteSandbox.close()."""
-
-# TODO: pre-existing: get_sandbox now requires lease.volume_id
-import pytest
-
-pytest.skip("pre-existing: RemoteSandbox tests need volume setup — needs test update", allow_module_level=True)
-
-import asyncio
-import tempfile
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from sandbox.base import RemoteSandbox
-from sandbox.config import SandboxConfig
-from sandbox.interfaces.executor import ExecuteResult
-from sandbox.provider import ProviderCapability, SessionInfo
-from sandbox.thread_context import set_current_thread_id
-
-
-@pytest.fixture
-def temp_db():
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-    yield db_path
-    db_path.unlink(missing_ok=True)
-
-
-def _make_provider(on_init_exit_code: int = 0) -> MagicMock:
-    provider = MagicMock()
-    provider.name = "mock"
-    provider.default_cwd = "/tmp"
-    provider.get_capability.return_value = ProviderCapability(
-        can_pause=True,
-        can_resume=True,
-        can_destroy=True,
-        supports_status_probe=False,
-        eager_instance_binding=True,
-    )
-    provider.create_session.return_value = SessionInfo(session_id="inst-1", provider="mock", status="running")
-    provider.get_session_status.return_value = "running"
-    provider.pause_session.return_value = True
-    provider.resume_session.return_value = True
-    provider.destroy_session.return_value = True
-
-    runtime = MagicMock()
-    runtime.runtime_id = "runtime-test-000001"
-    runtime.chat_session_id = None
-    runtime.execute = AsyncMock(
-        return_value=ExecuteResult(
-            exit_code=on_init_exit_code,
-            stdout="ok" if on_init_exit_code == 0 else "",
-            stderr="" if on_init_exit_code == 0 else "fail",
-        )
-    )
-    runtime.close = AsyncMock()
-    provider.create_runtime.return_value = runtime
-    return provider
-
-
-def _make_sandbox(provider, db_path: Path, init_commands: list[str] | None = None, on_exit: str = "pause") -> RemoteSandbox:
-    config = SandboxConfig(provider="mock", on_exit=on_exit, init_commands=init_commands or [])
-    return RemoteSandbox(
-        provider=provider,
-        config=config,
-        default_cwd="/tmp",
-        db_path=db_path,
-        name="mock",
-        working_dir="/tmp",
-        env_label="Mock",
-    )
-
-
-# ── _run_init_commands ───────────────────────────────────────────────────────
-
-
-def test_run_init_commands_happy_path(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, init_commands=["echo hello"])
-    set_current_thread_id("thread-init-1")
-    assert sandbox._get_capability() is not None
-    assert "thread-init-1" in sandbox._init_commands_run
-
-
-def test_run_init_commands_failure_raises(temp_db):
-    sandbox = _make_sandbox(_make_provider(on_init_exit_code=1), temp_db, init_commands=["bad-cmd"])
-    set_current_thread_id("thread-init-fail")
-    with pytest.raises(RuntimeError, match="Init command #1 failed"):
-        sandbox._get_capability()
-
-
-def test_run_init_commands_idempotent(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, init_commands=["echo once"])
-    set_current_thread_id("thread-init-2")
-    sandbox._get_capability()
-    sandbox._get_capability()
-    assert len(sandbox._init_commands_run) == 1
-
-
-@pytest.mark.asyncio
-async def test_run_init_commands_inside_running_loop(temp_db):
-    """Covers the run_coroutine_threadsafe branch: _get_capability called from a running event loop."""
-    sandbox = _make_sandbox(_make_provider(), temp_db, init_commands=["echo hello"])
-    set_current_thread_id("thread-init-async")
-    await asyncio.to_thread(sandbox._get_capability)
-    assert "thread-init-async" in sandbox._init_commands_run
-
-
-# ── RemoteSandbox.close() ────────────────────────────────────────────────────
-
-
-def test_close_pause_calls_pause_all_sessions(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, on_exit="pause")
-    sandbox._manager.pause_all_sessions = MagicMock(return_value=2)
-    sandbox.close()
-    sandbox._manager.pause_all_sessions.assert_called_once()
-
-
-def test_close_destroy_calls_destroy_for_each_session(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, on_exit="destroy")
-    sandbox._manager.list_sessions = MagicMock(return_value=[{"thread_id": "t1"}, {"thread_id": "t2"}, {"thread_id": "t3"}])
-    sandbox._manager.destroy_session = MagicMock(return_value=True)
-    sandbox.close()
-    assert sandbox._manager.destroy_session.call_count == 3
-
-
-def test_close_destroy_continues_after_one_failure(temp_db):
-    sandbox = _make_sandbox(_make_provider(), temp_db, on_exit="destroy")
-    sandbox._manager.list_sessions = MagicMock(return_value=[{"thread_id": "t1"}, {"thread_id": "t2"}, {"thread_id": "t3"}])
-
-    call_count = 0
-
-    def side_effect(thread_id):
-        nonlocal call_count
-        call_count += 1
-        if thread_id == "t2":
-            raise RuntimeError("network error")
-        return True
-
-    sandbox._manager.destroy_session = MagicMock(side_effect=side_effect)
-    sandbox.close()
-    assert call_count == 3
diff --git a/tests/test_resource_snapshot.py b/tests/test_resource_snapshot.py
deleted file mode 100644
index 314e2a194..000000000
--- a/tests/test_resource_snapshot.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import pytest
-
-pytest.skip("pre-existing: resource_snapshot API mismatch — needs test update", allow_module_level=True)
-
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from sandbox.provider import Metrics, ProviderCapability, ProviderExecResult, SandboxProvider, SessionInfo
-from sandbox.resource_snapshot import (
-    ensure_resource_snapshot_table,
-    list_snapshots_by_lease_ids,
-    probe_and_upsert_for_instance,
-    upsert_lease_resource_snapshot,
-)
-
-
-class _FakeProvider(SandboxProvider):
-    name = "fake"
-
-    def get_capability(self) -> ProviderCapability:
-        return ProviderCapability(
-            can_pause=True,
-            can_resume=True,
-            can_destroy=True,
-            resource_capabilities={
-                "filesystem": True,
-                "terminal": True,
-                "metrics": True,
-                "screenshot": False,
-                "web": False,
-                "process": False,
-                "hooks": False,
-                "mount": False,
-            },
-        )
-
-    def create_session(self, context_id: str | None = None) -> SessionInfo:
-        raise RuntimeError("unused")
-
-    def destroy_session(self, session_id: str, sync: bool = True) -> bool:
-        raise RuntimeError("unused")
-
-    def pause_session(self, session_id: str) -> bool:
-        raise RuntimeError("unused")
-
-    def resume_session(self, session_id: str) -> bool:
-        raise RuntimeError("unused")
-
-    def get_session_status(self, session_id: str) -> str:
-        raise RuntimeError("unused")
-
-    def execute(self, session_id: str, command: str, timeout_ms: int = 30000, cwd: str | None = None) -> ProviderExecResult:
-        raise RuntimeError("unused")
-
-    def read_file(self, session_id: str, path: str) -> str:
-        raise RuntimeError("unused")
-
-    def write_file(self, session_id: str, path: str, content: str) -> str:
-        raise RuntimeError("unused")
-
-    def list_dir(self, session_id: str, path: str) -> list[dict]:
-        raise RuntimeError("unused")
-
-    def get_metrics(self, session_id: str) -> Metrics | None:
-        return Metrics(
-            cpu_percent=23.5,
-            memory_used_mb=1536.0,
-            memory_total_mb=4096.0,
-            disk_used_gb=8.0,
-            disk_total_gb=20.0,
-            network_rx_kbps=30.0,
-            network_tx_kbps=40.0,
-        )
-
-
-def test_upsert_and_query_snapshot(tmp_path):
-    db_path = Path(tmp_path) / "sandbox.db"
-    ensure_resource_snapshot_table(db_path)
-    upsert_lease_resource_snapshot(
-        lease_id="lease-1",
-        provider_name="agentbay_prod",
-        observed_state="running",
-        probe_mode="running_runtime",
-        cpu_used=12.0,
-        cpu_limit=100.0,
-        memory_used_mb=512.0,
-        memory_total_mb=1024.0,
-        disk_used_gb=2.0,
-        disk_total_gb=10.0,
-        network_rx_kbps=1.0,
-        network_tx_kbps=2.0,
-        probe_error=None,
-        db_path=db_path,
-    )
-    snapshots = list_snapshots_by_lease_ids(["lease-1"], db_path=db_path)
-    assert snapshots["lease-1"]["provider_name"] == "agentbay_prod"
-    assert snapshots["lease-1"]["cpu_used"] == 12.0
-
-
-def test_probe_and_upsert_from_provider_metrics(tmp_path):
-    db_path = Path(tmp_path) / "sandbox.db"
-    provider = _FakeProvider()
-    result = probe_and_upsert_for_instance(
-        lease_id="lease-2",
-        provider_name="fake_provider",
-        observed_state="running",
-        probe_mode="create_running",
-        provider=provider,
-        instance_id="instance-1",
-        db_path=db_path,
-    )
-    assert result["ok"] is True
-    snapshots = list_snapshots_by_lease_ids(["lease-2"], db_path=db_path)
-    assert snapshots["lease-2"]["cpu_used"] == 23.5
-    assert snapshots["lease-2"]["memory_total_mb"] == 4096.0
-
-
-def test_probe_and_upsert_ignores_non_numeric_metrics(tmp_path):
-    db_path = Path(tmp_path) / "sandbox.db"
-    provider = _FakeProvider()
-    provider.get_metrics = lambda _session_id: MagicMock()
-    result = probe_and_upsert_for_instance(
-        lease_id="lease-3",
-        provider_name="fake_provider",
-        observed_state="running",
-        probe_mode="create_running",
-        provider=provider,
-        instance_id="instance-1",
-        db_path=db_path,
-    )
-    assert result["ok"] is False
-    assert result["error"] == "metrics unavailable"
-    snapshots = list_snapshots_by_lease_ids(["lease-3"], db_path=db_path)
-    assert snapshots["lease-3"]["cpu_used"] is None
-    assert snapshots["lease-3"]["probe_error"] == "metrics unavailable"
diff --git a/tests/test_sandbox_e2e.py b/tests/test_sandbox_e2e.py
deleted file mode 100644
index f1dd64383..000000000
--- a/tests/test_sandbox_e2e.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""End-to-end headless test for sandbox mode.
-
-Tests that LeonAgent can:
-1. Initialize with sandbox=docker or sandbox=e2b
-2. Execute commands in the sandbox
-3. Read/write files in the sandbox
-4. All paths resolve correctly (no macOS firmlink leaks)
-
-Usage:
-    # Docker sandbox (requires Docker running)
-    pytest tests/test_sandbox_e2e.py -k docker -s
-
-    # E2B sandbox (requires E2B_API_KEY)
-    pytest tests/test_sandbox_e2e.py -k e2b -s
-
-    # Both
-    pytest tests/test_sandbox_e2e.py -s
-"""
-
-import pytest
-
-pytest.skip("pre-existing: Docker/E2B e2e tests require running providers", allow_module_level=True)
-
-import os
-import sys
-import uuid
-
-import pytest
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Load config.env so API keys are available
-from config.env_manager import ConfigManager
-
-ConfigManager().load_to_env()
-
-
-def _can_docker() -> bool:
-    """Check if Docker is available."""
-    import subprocess
-
-    try:
-        subprocess.run(["docker", "info"], capture_output=True, timeout=5)
-        return True
-    except Exception:
-        return False
-
-
-def _can_e2b() -> bool:
-    if os.getenv("E2B_API_KEY"):
-        return True
-    # Check sandbox config file
-    from pathlib import Path
-
-    config_file = Path.home() / ".leon" / "sandboxes" / "e2b.json"
-    if config_file.exists():
-        import json
-
-        data = json.loads(config_file.read_text())
-        key = data.get("e2b", {}).get("api_key")
-        if key:
-            os.environ["E2B_API_KEY"] = key
-            return True
-    return False
-
-
-def _invoke_and_extract(agent, message: str, thread_id: str) -> dict:
-    """Invoke agent via async runner and extract tool calls + response."""
-    import asyncio
-
-    from core.runner import NonInteractiveRunner
-    from sandbox.thread_context import set_current_thread_id
-
-    set_current_thread_id(thread_id)
-    runner = NonInteractiveRunner(agent, thread_id, debug=True)
-    result = asyncio.run(runner.run_turn(message))
-
-    return {
-        "tool_calls": [tc["name"] for tc in result.get("tool_calls", [])],
-        "response": result.get("response", ""),
-        "error": result.get("error"),
-    }
-
-
-def _get_model_name() -> str:
-    return os.getenv("MODEL_NAME") or "claude-sonnet-4-5-20250929"
-
-
-# ---------------------------------------------------------------------------
-# Docker E2E
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.skipif(not _can_docker(), reason="Docker not available")
-class TestDockerSandboxE2E:
-    def test_agent_init_and_command(self):
-        """Agent initializes with docker sandbox and can run commands."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-docker-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="docker",
-                verbose=True,
-            )
-
-            # Verify workspace_root is the sandbox path, not a local resolved path
-            assert str(agent.workspace_root) == "/workspace", f"workspace_root should be /workspace, got {agent.workspace_root}"
-
-            # Ensure session exists before invoking
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Use the run_command tool to execute: echo 'SANDBOX_OK' && pwd",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "run_command" in extracted["tool_calls"], f"Expected run_command in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-    def test_file_operations(self):
-        """Agent can read and write files in docker sandbox."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-docker-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="docker",
-                verbose=True,
-            )
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Write the text 'hello from test' to /workspace/test_e2e.txt, then read it back and tell me the content.",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "write_file" in extracted["tool_calls"], f"Expected write_file in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-
-# ---------------------------------------------------------------------------
-# E2B E2E
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.skipif(not _can_e2b(), reason="E2B_API_KEY not set")
-class TestE2BSandboxE2E:
-    def test_agent_init_and_command(self):
-        """Agent initializes with e2b sandbox and can run commands."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-e2b-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="e2b",
-                verbose=True,
-            )
-
-            assert str(agent.workspace_root) == "/home/user", f"workspace_root should be /home/user, got {agent.workspace_root}"
-
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Use the run_command tool to execute: echo 'E2B_OK' && uname -a",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "run_command" in extracted["tool_calls"], f"Expected run_command in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-    def test_file_operations(self):
-        """Agent can read and write files in e2b sandbox."""
-        from agent import create_leon_agent
-
-        thread_id = f"test-e2b-{uuid.uuid4().hex[:8]}"
-        agent = None
-        try:
-            agent = create_leon_agent(
-                model_name=_get_model_name(),
-                sandbox="e2b",
-                verbose=True,
-            )
-            agent._sandbox.ensure_session(thread_id)
-
-            extracted = _invoke_and_extract(
-                agent,
-                "Write the text 'e2b test content' to /home/user/test_e2e.txt, then read it back and tell me the content.",
-                thread_id,
-            )
-
-            print("\n--- Result ---")
-            print(f"Response: {extracted['response'][:500]}")
-            print(f"Tool calls: {extracted['tool_calls']}")
-
-            assert "write_file" in extracted["tool_calls"], f"Expected write_file in {extracted['tool_calls']}"
-
-        finally:
-            if agent:
-                agent.close()
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-s", "-v"])
diff --git a/tests/test_storage_runtime_wiring.py b/tests/test_storage_runtime_wiring.py
deleted file mode 100644
index fcb60e8ae..000000000
--- a/tests/test_storage_runtime_wiring.py
+++ /dev/null
@@ -1,389 +0,0 @@
-"""Runtime storage wiring tests for backend agent creation path."""
-
-from __future__ import annotations
-
-import asyncio
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-
-import pytest
-
-from backend.web.services import agent_pool
-from backend.web.services.event_buffer import ThreadEventBuffer
-from backend.web.services.streaming_service import _run_agent_to_buffer
-from storage.providers.sqlite.checkpoint_repo import SQLiteCheckpointRepo
-from storage.providers.sqlite.eval_repo import SQLiteEvalRepo
-from storage.providers.supabase.checkpoint_repo import SupabaseCheckpointRepo
-
-
-class _FakeSupabaseClient:
-    def table(self, table_name: str):
-        raise AssertionError(f"table() should not be called in this wiring test: {table_name}")
-
-
-def _build_fake_supabase_client() -> _FakeSupabaseClient:
-    return _FakeSupabaseClient()
-
-
-def _build_invalid_supabase_client() -> object:
-    return object()
-
-
-def _capture_create_leon_agent(monkeypatch: pytest.MonkeyPatch) -> dict[str, Any]:
-    captured: dict[str, Any] = {}
-
-    def _fake_create_leon_agent(**kwargs):
-        captured.update(kwargs)
-        return object()
-
-    monkeypatch.setattr(agent_pool, "create_leon_agent", _fake_create_leon_agent)
-    return captured
-
-
-def test_create_agent_sync_wires_supabase_storage_container(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_fake_supabase_client",
-    )
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_create_agent_sync_supabase_missing_runtime_config_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-
-    with pytest.raises(
-        RuntimeError,
-        match="LEON_SUPABASE_CLIENT_FACTORY",
-    ):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-def test_create_agent_sync_supabase_invalid_runtime_config_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_invalid_supabase_client",
-    )
-
-    with pytest.raises(RuntimeError, match="callable table\\(name\\) API"):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-def test_create_agent_sync_defaults_to_sqlite_storage_container(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.delenv("LEON_STORAGE_STRATEGY", raising=False)
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SQLiteCheckpointRepo)
-
-
-def test_create_agent_sync_repo_override_supabase_with_sqlite_default(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "sqlite")
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"checkpoint_repo":"supabase"}')
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_fake_supabase_client",
-    )
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SupabaseCheckpointRepo)
-
-
-def test_create_agent_sync_repo_override_sqlite_with_supabase_default(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"eval_repo":"sqlite"}')
-    monkeypatch.setenv(
-        "LEON_SUPABASE_CLIENT_FACTORY",
-        "tests.test_storage_runtime_wiring:_build_fake_supabase_client",
-    )
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-    container = captured["storage_container"]
-    assert isinstance(container.eval_repo(), SQLiteEvalRepo)
-
-
-@pytest.mark.skip(reason="pre-existing: storage wiring/factory API mismatch")
-def test_create_agent_sync_all_sqlite_override_with_supabase_default_does_not_require_factory(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "supabase")
-    monkeypatch.setenv(
-        "LEON_STORAGE_REPO_PROVIDERS",
-        (
-            '{"checkpoint_repo":"sqlite","thread_config_repo":"sqlite","run_event_repo":"sqlite",'
-            '"file_operation_repo":"sqlite","summary_repo":"sqlite","eval_repo":"sqlite",'
-            '"queue_repo":"sqlite","workspace_repo":"sqlite"}'
-        ),
-    )
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-    monkeypatch.setenv("LEON_DB_PATH", str(tmp_path / "leon.db"))
-    monkeypatch.setenv("LEON_EVAL_DB_PATH", str(tmp_path / "eval.db"))
-
-    captured = _capture_create_leon_agent(monkeypatch)
-    agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-    container = captured["storage_container"]
-    assert isinstance(container.checkpoint_repo(), SQLiteCheckpointRepo)
-
-
-def test_create_agent_sync_repo_override_supabase_without_runtime_config_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_STRATEGY", "sqlite")
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", '{"checkpoint_repo":"supabase"}')
-    monkeypatch.delenv("LEON_SUPABASE_CLIENT_FACTORY", raising=False)
-
-    with pytest.raises(RuntimeError, match="LEON_SUPABASE_CLIENT_FACTORY"):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-def test_create_agent_sync_invalid_repo_override_json_fails_loud(
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-) -> None:
-    monkeypatch.setenv("LEON_STORAGE_REPO_PROVIDERS", "not-json")
-
-    with pytest.raises(RuntimeError, match="Invalid LEON_STORAGE_REPO_PROVIDERS"):
-        agent_pool.create_agent_sync("local", workspace_root=tmp_path, model_name="leon:test")
-
-
-class _FakeRunEventRepo:
-    def __init__(self) -> None:
-        self.append_calls: list[dict[str, Any]] = []
-        self.closed = False
-
-    def append_event(
-        self,
-        thread_id: str,
-        run_id: str,
-        event_type: str,
-        data: dict[str, Any],
-        message_id: str | None = None,
-    ) -> int:
-        self.append_calls.append(
-            {
-                "thread_id": thread_id,
-                "run_id": run_id,
-                "event_type": event_type,
-                "data": data,
-                "message_id": message_id,
-            }
-        )
-        return len(self.append_calls)
-
-    def list_run_ids(self, thread_id: str) -> list[str]:
-        return []
-
-    def delete_runs(self, thread_id: str, run_ids: list[str]) -> int:
-        return 0
-
-    def close(self) -> None:
-        self.closed = True
-
-
-class _FakeStorageContainer:
-    def __init__(self, repo: _FakeRunEventRepo) -> None:
-        self._repo = repo
-
-    def run_event_repo(self) -> _FakeRunEventRepo:
-        return self._repo
-
-
-class _FakeGraphAgent:
-    checkpointer = None
-
-    async def astream(self, *_args: Any, **_kwargs: Any):
-        if False:  # pragma: no cover
-            yield None
-
-
-class _FakeRuntime:
-    current_state = "IDLE"
-
-    def get_pending_subagent_events(self) -> list[tuple[str, list[dict[str, Any]]]]:
-        return []
-
-    def get_status_dict(self) -> dict[str, Any]:
-        return {}
-
-    def set_event_callback(self, cb: Any) -> None:
-        pass
-
-    def set_activity_sink(self, sink: Any) -> None:
-        pass
-
-    def emit_activity_event(self, event: dict[str, Any]) -> None:
-        pass
-
-    def transition(self, new_state: Any) -> bool:
-        return True
-
-
-class _FakeRuntimeAgent:
-    def __init__(self, storage_container: Any = None) -> None:
-        self.agent = _FakeGraphAgent()
-        self.storage_container = storage_container
-        self.runtime = _FakeRuntime()
-
-
-@pytest.mark.skip(reason="pre-existing: storage wiring/factory API mismatch")
-def test_run_runtime_consumes_storage_container_run_event_repo(monkeypatch: pytest.MonkeyPatch) -> None:
-    async def _run() -> None:
-        repo = _FakeRunEventRepo()
-        agent = _FakeRuntimeAgent(storage_container=_FakeStorageContainer(repo))
-        from unittest.mock import MagicMock
-
-        qm = MagicMock()
-        qm.dequeue.return_value = None
-        app = SimpleNamespace(state=SimpleNamespace(thread_tasks={}, thread_event_buffers={}, subagent_buffers={}, queue_manager=qm))
-        thread_buf = ThreadEventBuffer()
-        run_id = "run-1"
-
-        await _run_agent_to_buffer(agent, "thread-1", "hello", app, False, thread_buf, run_id)
-
-        assert repo.append_calls, "run path should persist events through storage_container.run_event_repo()"
-        assert any(c["event_type"] == "run_done" for c in repo.append_calls)
-        assert repo.closed is True
-
-    asyncio.run(_run())
-
-
-@pytest.mark.skip(reason="pre-existing: storage wiring/factory API mismatch")
-def test_run_runtime_without_storage_container_keeps_sqlite_event_store_path(monkeypatch: pytest.MonkeyPatch) -> None:
-    async def _run() -> None:
-        import backend.web.services.event_store as event_store
-
-        calls: list[dict[str, Any]] = []
-
-        async def _fake_append_event(
-            thread_id: str,
-            run_id: str,
-            event: dict[str, Any],
-            message_id: str | None = None,
-            run_event_repo: Any | None = None,
-        ) -> int:
-            calls.append(
-                {
-                    "thread_id": thread_id,
-                    "run_id": run_id,
-                    "event": event,
-                    "message_id": message_id,
-                    "run_event_repo": run_event_repo,
-                }
-            )
-            return len(calls)
-
-        async def _fake_cleanup_old_runs(
-            thread_id: str,
-            keep_latest: int = 1,
-            run_event_repo: Any | None = None,
-        ) -> int:
-            return 0
-
-        monkeypatch.setattr(event_store, "append_event", _fake_append_event)
-        monkeypatch.setattr(event_store, "cleanup_old_runs", _fake_cleanup_old_runs)
-
-        from unittest.mock import MagicMock
-
-        qm = MagicMock()
-        qm.dequeue.return_value = None
-        agent = _FakeRuntimeAgent(storage_container=None)
-        app = SimpleNamespace(state=SimpleNamespace(thread_tasks={}, thread_event_buffers={}, subagent_buffers={}, queue_manager=qm))
-        thread_buf = ThreadEventBuffer()
-        run_id = "run-1"
-
-        await _run_agent_to_buffer(agent, "thread-1", "hello", app, False, thread_buf, run_id)
-
-        assert calls, "sqlite event store path should still be used when no storage container is injected"
-        assert all(call["run_event_repo"] is None for call in calls)
-
-    asyncio.run(_run())
-
-
-@pytest.mark.skip(reason="pre-existing: thread_config_repo removed from StorageContainer")
-def test_purge_thread_deletes_all_repo_data(tmp_path: Path) -> None:
-    from storage.container import StorageContainer
-
-    db_path = tmp_path / "leon.db"
-    eval_db = tmp_path / "eval.db"
-    container = StorageContainer(main_db_path=db_path, eval_db_path=eval_db, strategy="sqlite")
-
-    # Populate repos for thread t-1 and t-2
-    tc = container.thread_config_repo()
-    tc.save_metadata("t-1", "docker", "/ws")
-    tc.save_metadata("t-2", "local", None)
-    tc.close()
-
-    re_repo = container.run_event_repo()
-    re_repo.append_event("t-1", "r-1", "status", {"ok": True})
-    re_repo.append_event("t-2", "r-2", "status", {"ok": True})
-    re_repo.close()
-
-    fo = container.file_operation_repo()
-    fo.record("t-1", "cp-1", "write", "/a.txt", None, "x")
-    fo.record("t-2", "cp-2", "write", "/b.txt", None, "y")
-    fo.close()
-
-    sr = container.summary_repo()
-    sr.ensure_tables()
-    sr.save_summary("s-1", "t-1", "summary", 10, 20, False, None, "2025-01-01")
-    sr.close()
-
-    # Purge t-1
-    container.purge_thread("t-1")
-
-    # Verify t-1 is gone, t-2 remains
-    tc2 = container.thread_config_repo()
-    assert tc2.lookup_metadata("t-1") is None
-    assert tc2.lookup_metadata("t-2") == ("local", None)
-    tc2.close()
-
-    re2 = container.run_event_repo()
-    assert re2.latest_seq("t-1") == 0
-    assert re2.latest_seq("t-2") > 0
-    re2.close()
-
-    fo2 = container.file_operation_repo()
-    assert fo2.get_operations_for_thread("t-1") == []
-    assert len(fo2.get_operations_for_thread("t-2")) == 1
-    fo2.close()
-
-    sr2 = container.summary_repo()
-    assert sr2.get_latest_summary_row("t-1") is None
-    sr2.close()
diff --git a/tests/test_thread_config_repo.py b/tests/test_thread_config_repo.py
deleted file mode 100644
index 007d30c40..000000000
--- a/tests/test_thread_config_repo.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# TODO: thread_config_repo was removed in refactoring; update tests to use thread_repo / thread_launch_pref_repo
-import pytest
-
-pytest.skip("thread_config_repo module removed — needs migration to thread_repo", allow_module_level=True)
-
-import sqlite3  # noqa: E402
-from pathlib import Path  # noqa: E402
-
-from storage.providers.sqlite.thread_config_repo import SQLiteThreadConfigRepo  # noqa: F401
-from storage.providers.supabase.thread_config_repo import SupabaseThreadConfigRepo
-
-from backend.web.utils import helpers
-
-
-def test_migrate_thread_metadata_table(tmp_path):
-    db_path = tmp_path / "leon.db"
-    with sqlite3.connect(str(db_path)) as conn:
-        conn.execute("CREATE TABLE thread_metadata (thread_id TEXT PRIMARY KEY, sandbox_type TEXT NOT NULL, cwd TEXT, model TEXT)")
-        conn.execute(
-            "INSERT INTO thread_metadata (thread_id, sandbox_type, cwd, model) VALUES (?, ?, ?, ?)",
-            ("t-1", "local", "/tmp/ws", "m-1"),
-        )
-        conn.commit()
-
-    repo = SQLiteThreadConfigRepo(db_path)
-    try:
-        assert repo.lookup_metadata("t-1") == ("local", "/tmp/ws")
-        assert repo.lookup_model("t-1") == "m-1"
-    finally:
-        repo.close()
-
-    with sqlite3.connect(str(db_path)) as conn:
-        tables = {r[0] for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'")}
-        assert "thread_config" in tables
-        assert "thread_metadata" not in tables
-
-
-def test_save_and_lookup_thread_config(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteThreadConfigRepo(db_path)
-    try:
-        repo.save_metadata("t-2", "docker", "/workspace")
-        repo.save_model("t-2", "anthropic/claude-sonnet-4.6")
-        assert repo.lookup_metadata("t-2") == ("docker", "/workspace")
-        assert repo.lookup_model("t-2") == "anthropic/claude-sonnet-4.6"
-        repo.update_fields("t-2", queue_mode="followup", observation_provider="langfuse")
-        cfg = repo.lookup_config("t-2")
-        assert cfg is not None
-        assert cfg["queue_mode"] == "followup"
-        assert cfg["observation_provider"] == "langfuse"
-    finally:
-        repo.close()
-
-
-def test_helpers_compatibility_api(tmp_path, monkeypatch):
-    db_path = tmp_path / "leon.db"
-    monkeypatch.setattr(helpers, "DB_PATH", Path(db_path))
-
-    helpers.init_thread_config("t-3", "local", "/tmp/p")
-    helpers.save_thread_model("t-3", "m-3")
-
-    config = helpers.load_thread_config("t-3")
-    assert config is not None
-    assert (config.sandbox_type, config.cwd) == ("local", "/tmp/p")
-    assert helpers.lookup_thread_model("t-3") == "m-3"
-    helpers.save_thread_config("t-3", observation_provider="langsmith")
-    config2 = helpers.load_thread_config("t-3")
-    assert config2 is not None
-    assert config2.observation_provider == "langsmith"
-
-
-from tests.fakes.supabase import FakeSupabaseClient
-
-
-def test_supabase_thread_config_repo_save_and_lookup():
-    tables: dict[str, list[dict]] = {"thread_config": []}
-    repo = SupabaseThreadConfigRepo(client=FakeSupabaseClient(tables=tables))
-
-    repo.save_metadata("t-1", "docker", "/workspace")
-    repo.save_model("t-1", "anthropic/claude-sonnet-4.6")
-
-    assert repo.lookup_metadata("t-1") == ("docker", "/workspace")
-    assert repo.lookup_model("t-1") == "anthropic/claude-sonnet-4.6"
-
-    repo.save_model("t-2", "openai/gpt-5")
-    assert repo.lookup_metadata("t-2") == ("local", None)
-    assert repo.lookup_model("t-2") == "openai/gpt-5"
-    repo.update_fields("t-1", queue_mode="followup", observation_provider="langfuse")
-    cfg = repo.lookup_config("t-1")
-    assert cfg is not None
-    assert cfg["queue_mode"] == "followup"
-    assert cfg["observation_provider"] == "langfuse"
-
-
-def test_supabase_thread_config_repo_delete():
-    tables: dict[str, list[dict]] = {"thread_config": []}
-    repo = SupabaseThreadConfigRepo(client=FakeSupabaseClient(tables=tables))
-    repo.save_metadata("t-1", "docker", "/workspace")
-    repo.save_metadata("t-2", "local", None)
-
-    repo.delete_thread_config("t-1")
-    assert repo.lookup_metadata("t-1") is None
-    assert repo.lookup_metadata("t-2") == ("local", None)
-
-
-def test_sqlite_thread_config_repo_delete(tmp_path):
-    db_path = tmp_path / "leon.db"
-    repo = SQLiteThreadConfigRepo(db_path)
-    try:
-        repo.save_metadata("t-1", "docker", "/workspace")
-        repo.save_metadata("t-2", "local", None)
-        repo.delete_thread_config("t-1")
-        assert repo.lookup_metadata("t-1") is None
-        assert repo.lookup_metadata("t-2") == ("local", None)
-    finally:
-        repo.close()
-
-
-def test_supabase_thread_config_repo_requires_compatible_client():
-    with pytest.raises(RuntimeError, match="table\\(name\\)"):
-        SupabaseThreadConfigRepo(client=object())
diff --git a/tests/test_tool_registry_runner.py b/tests/test_tool_registry_runner.py
deleted file mode 100644
index 934ae93ca..000000000
--- a/tests/test_tool_registry_runner.py
+++ /dev/null
@@ -1,339 +0,0 @@
-"""Tests for ToolRegistry, ToolRunner, and ToolValidator (P0/P1 verification).
-
-Covers:
-- P0: Three-tier error normalization (Layer 1: validation, Layer 2: execution, Layer 3: soft)
-- P1: ToolRegistry inline/deferred mode
-- P1: ToolRunner dispatches registered tools and normalizes errors
-"""
-
-from __future__ import annotations
-
-from unittest.mock import MagicMock
-
-import pytest
-
-from core.runtime.errors import InputValidationError
-from core.runtime.registry import ToolEntry, ToolMode, ToolRegistry
-from core.runtime.runner import ToolRunner
-from core.runtime.validator import ToolValidator
-
-# ---------------------------------------------------------------------------
-# ToolRegistry
-# ---------------------------------------------------------------------------
-
-
-class TestToolRegistry:
-    def _make_entry(self, name: str, mode: ToolMode = ToolMode.INLINE) -> ToolEntry:
-        return ToolEntry(
-            name=name,
-            mode=mode,
-            schema={"name": name, "description": f"{name} tool"},
-            handler=lambda: f"result:{name}",
-            source="test",
-        )
-
-    def test_register_and_get(self):
-        reg = ToolRegistry()
-        entry = self._make_entry("Read")
-        reg.register(entry)
-        assert reg.get("Read") is entry
-
-    def test_get_unknown_returns_none(self):
-        reg = ToolRegistry()
-        assert reg.get("NonExistent") is None
-
-    def test_inline_tools_appear_in_get_inline_schemas(self):
-        reg = ToolRegistry()
-        reg.register(self._make_entry("Read", ToolMode.INLINE))
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        schemas = reg.get_inline_schemas()
-        names = [s["name"] for s in schemas]
-        assert "Read" in names
-        assert "TaskCreate" not in names  # P1: deferred not in inline
-
-    def test_deferred_tools_not_in_inline_schemas(self):
-        reg = ToolRegistry()
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        reg.register(self._make_entry("TaskUpdate", ToolMode.DEFERRED))
-        assert reg.get_inline_schemas() == []
-
-    def test_search_finds_by_name(self):
-        reg = ToolRegistry()
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        reg.register(self._make_entry("Read", ToolMode.INLINE))
-        results = reg.search("task")
-        names = [e.name for e in results]
-        assert "TaskCreate" in names
-
-    def test_search_includes_deferred_tools(self):
-        """tool_search must discover deferred tools too."""
-        reg = ToolRegistry()
-        reg.register(self._make_entry("TaskCreate", ToolMode.DEFERRED))
-        results = reg.search("TaskCreate")
-        assert any(e.name == "TaskCreate" for e in results)
-
-    def test_allowed_tools_filter(self):
-        reg = ToolRegistry(allowed_tools={"Read", "Grep"})
-        reg.register(self._make_entry("Read"))
-        reg.register(self._make_entry("Grep"))
-        reg.register(self._make_entry("Bash"))
-        assert reg.get("Read") is not None
-        assert reg.get("Grep") is not None
-        assert reg.get("Bash") is None  # filtered out
-
-    def test_dynamic_schema_callable(self):
-        call_count = 0
-
-        def schema_fn() -> dict:
-            nonlocal call_count
-            call_count += 1
-            return {"name": "DynTool", "description": "dynamic"}
-
-        reg = ToolRegistry()
-        entry = ToolEntry(
-            name="DynTool",
-            mode=ToolMode.INLINE,
-            schema=schema_fn,
-            handler=lambda: "ok",
-            source="test",
-        )
-        reg.register(entry)
-        schemas = reg.get_inline_schemas()
-        assert call_count >= 1
-        assert any(s["name"] == "DynTool" for s in schemas)
-
-
-# ---------------------------------------------------------------------------
-# ToolValidator
-# ---------------------------------------------------------------------------
-
-
-class TestToolValidator:
-    def _schema(self, required: list[str], props: dict) -> dict:
-        return {
-            "name": "TestTool",
-            "parameters": {
-                "type": "object",
-                "required": required,
-                "properties": {k: {"type": v} for k, v in props.items()},
-            },
-        }
-
-    def test_valid_args_pass(self):
-        v = ToolValidator()
-        schema = self._schema(["file_path"], {"file_path": "string"})
-        result = v.validate(schema, {"file_path": "/tmp/x"})
-        assert result.ok
-
-    def test_missing_required_raises_layer1(self):
-        v = ToolValidator()
-        schema = self._schema(["file_path"], {"file_path": "string"})
-        with pytest.raises(InputValidationError) as exc_info:
-            v.validate(schema, {})
-        assert "file_path" in str(exc_info.value)
-        assert "missing" in str(exc_info.value)
-
-    def test_wrong_type_raises_layer1(self):
-        v = ToolValidator()
-        schema = self._schema(["count"], {"count": "integer"})
-        with pytest.raises(InputValidationError):
-            v.validate(schema, {"count": "not-an-int"})
-
-    def test_extra_params_allowed(self):
-        v = ToolValidator()
-        schema = self._schema(["a"], {"a": "string"})
-        result = v.validate(schema, {"a": "hello", "extra": "ok"})
-        assert result.ok
-
-
-# ---------------------------------------------------------------------------
-# ToolRunner — P0 error normalization
-# ---------------------------------------------------------------------------
-
-
-def _make_runner(entries: list[ToolEntry]) -> ToolRunner:
-    reg = ToolRegistry()
-    for e in entries:
-        reg.register(e)
-    return ToolRunner(registry=reg)
-
-
-def _make_tool_call_request(name: str, args: dict, call_id: str = "tc-1"):
-    req = MagicMock()
-    req.tool_call = {"name": name, "args": args, "id": call_id}
-    return req
-
-
-class TestToolRunnerErrorNormalization:
-    """P0: three-tier error normalization."""
-
-    def test_layer1_missing_param_returns_input_validation_error(self):
-        entry = ToolEntry(
-            name="Read",
-            mode=ToolMode.INLINE,
-            schema={
-                "name": "Read",
-                "parameters": {
-                    "type": "object",
-                    "required": ["file_path"],
-                    "properties": {"file_path": {"type": "string"}},
-                },
-            },
-            handler=lambda file_path: "content",
-            source="test",
-        )
-        runner = _make_runner([entry])
-        req = _make_tool_call_request("Read", {})  # missing file_path
-
-        called_upstream = []
-
-        def upstream(r):
-            called_upstream.append(r)
-            return MagicMock()
-
-        result = runner.wrap_tool_call(req, upstream)
-        # Layer 1 error format: InputValidationError: {name} failed due to...
-        assert "InputValidationError" in result.content
-        assert "Read" in result.content
-        assert not called_upstream  # must not fall through to upstream
-
-    def test_layer2_handler_exception_returns_tool_use_error(self):
-        def bad_handler(**kwargs):
-            raise ValueError("disk full")
-
-        entry = ToolEntry(
-            name="Write",
-            mode=ToolMode.INLINE,
-            schema={
-                "name": "Write",
-                "parameters": {
-                    "type": "object",
-                    "required": [],
-                    "properties": {},
-                },
-            },
-            handler=bad_handler,
-            source="test",
-        )
-        runner = _make_runner([entry])
-        req = _make_tool_call_request("Write", {})
-        result = runner.wrap_tool_call(req, lambda r: MagicMock())
-        # Layer 2 error format: <tool_use_error>...</tool_use_error>
-        assert "<tool_use_error>" in result.content
-        assert "disk full" in result.content
-
-    def test_layer3_handler_returns_soft_failure_text(self):
-        def soft_fail(**kwargs):
-            return "No files found"
-
-        entry = ToolEntry(
-            name="Glob",
-            mode=ToolMode.INLINE,
-            schema={
-                "name": "Glob",
-                "parameters": {
-                    "type": "object",
-                    "required": ["pattern"],
-                    "properties": {"pattern": {"type": "string"}},
-                },
-            },
-            handler=soft_fail,
-            source="test",
-        )
-        runner = _make_runner([entry])
-        req = _make_tool_call_request("Glob", {"pattern": "**/*.xyz"})
-        result = runner.wrap_tool_call(req, lambda r: MagicMock())
-        # Layer 3: plain text, no tags
-        assert result.content == "No files found"
-        assert "<tool_use_error>" not in result.content
-        assert "InputValidationError" not in result.content
-
-    def test_unknown_tool_falls_through_to_upstream(self):
-        runner = _make_runner([])  # empty registry
-        req = _make_tool_call_request("UnknownMCPTool", {})
-        upstream_called = []
-
-        def upstream(r):
-            upstream_called.append(r)
-            msg = MagicMock()
-            msg.content = "mcp result"
-            return msg
-
-        result = runner.wrap_tool_call(req, upstream)
-        assert upstream_called
-        assert result.content == "mcp result"
-
-
-class TestToolRunnerInlineInjection:
-    """P1: ToolRunner injects inline schemas into model call."""
-
-    def test_inline_schemas_injected(self):
-        entry = ToolEntry(
-            name="Read",
-            mode=ToolMode.INLINE,
-            schema={"name": "Read", "description": "read file"},
-            handler=lambda: "ok",
-            source="test",
-        )
-        runner = _make_runner([entry])
-
-        # Build a mock ModelRequest
-        request = MagicMock()
-        request.tools = []
-
-        captured = []
-
-        def handler(req):
-            captured.append(req)
-            return MagicMock()
-
-        request.override.return_value = request
-        runner.wrap_model_call(request, handler)
-
-        # Should have called override with tools containing Read
-        assert request.override.called
-        call_kwargs = request.override.call_args
-        _tools_arg = call_kwargs[1].get("tools") or (call_kwargs[0][0] if call_kwargs[0] else None)
-        # override was called — inline tools were injected
-
-    def test_deferred_schemas_not_injected(self):
-        deferred = ToolEntry(
-            name="TaskCreate",
-            mode=ToolMode.DEFERRED,
-            schema={"name": "TaskCreate", "description": "create task"},
-            handler=lambda: "ok",
-            source="test",
-        )
-        runner = _make_runner([deferred])
-        schemas = runner._registry.get_inline_schemas()
-        assert all(s["name"] != "TaskCreate" for s in schemas)
-
-
-# ---------------------------------------------------------------------------
-# P1: tool_modes from config honored
-# ---------------------------------------------------------------------------
-
-
-class TestToolModeFromConfig:
-    """Verify tool_modes config is applied during service init."""
-
-    def test_task_service_registers_deferred(self, tmp_path):
-        reg = ToolRegistry()
-        from core.tools.task.service import TaskService
-
-        _svc = TaskService(registry=reg, db_path=tmp_path / "test.db")
-        # TaskCreate/TaskUpdate/TaskList/TaskGet should be DEFERRED
-        for tool_name in ["TaskCreate", "TaskGet", "TaskList", "TaskUpdate"]:
-            entry = reg.get(tool_name)
-            assert entry is not None, f"{tool_name} not registered"
-            assert entry.mode == ToolMode.DEFERRED, f"{tool_name} should be DEFERRED, got {entry.mode}"
-
-    def test_search_service_registers_inline(self, tmp_path):
-        reg = ToolRegistry()
-        from core.tools.search.service import SearchService
-
-        _svc = SearchService(registry=reg, workspace_root=tmp_path)
-        for tool_name in ["Grep", "Glob"]:
-            entry = reg.get(tool_name)
-            assert entry is not None, f"{tool_name} not registered"
-            assert entry.mode == ToolMode.INLINE, f"{tool_name} should be INLINE, got {entry.mode}"
diff --git a/uv.lock b/uv.lock
index 721e5c891..78f682840 100644
--- a/uv.lock
+++ b/uv.lock
@@ -375,6 +375,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/90/45/f458fa2c388e79dd9d8b9b0c99f1d31b568f27388f2fdba7bb66bbc0c6ed/cachetools-6.2.6-py3-none-any.whl", hash = "sha256:8c9717235b3c651603fff0076db52d6acbfd1b338b8ed50256092f7ce9c85bda", size = 11668, upload-time = "2026-01-27T20:32:58.527Z" },
 ]
 
+[[package]]
+name = "cattrs"
+version = "26.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a0/ec/ba18945e7d6e55a58364d9fb2e46049c1c2998b3d805f19b703f14e81057/cattrs-26.1.0.tar.gz", hash = "sha256:fa239e0f0ec0715ba34852ce813986dfed1e12117e209b816ab87401271cdd40", size = 495672, upload-time = "2026-02-18T22:15:19.406Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/80/56/60547f7801b97c67e97491dc3d9ade9fbccbd0325058fd3dfcb2f5d98d90/cattrs-26.1.0-py3-none-any.whl", hash = "sha256:d1e0804c42639494d469d08d4f26d6b9de9b8ab26b446db7b5f8c2e97f7c3096", size = 73054, upload-time = "2026-02-18T22:15:17.958Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2026.1.4"
@@ -719,6 +732,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
 ]
 
+[[package]]
+name = "docstring-to-markdown"
+version = "0.17"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/52/d8/8abe80d62c5dce1075578031bcfde07e735bcf0afe2886dd48b470162ab4/docstring_to_markdown-0.17.tar.gz", hash = "sha256:df72a112294c7492487c9da2451cae0faeee06e86008245c188c5761c9590ca3", size = 32260, upload-time = "2025-05-02T15:09:07.932Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/7b/af3d0da15bed3a8665419bb3a630585756920f4ad67abfdfef26240ebcc0/docstring_to_markdown-0.17-py3-none-any.whl", hash = "sha256:fd7d5094aa83943bf5f9e1a13701866b7c452eac19765380dead666e36d3711c", size = 23479, upload-time = "2025-05-02T15:09:06.676Z" },
+]
+
 [[package]]
 name = "duckduckgo-search"
 version = "8.1.1"
@@ -1089,6 +1115,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "jedi"
+version = "0.19.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "parso" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" },
+]
+
+[[package]]
+name = "jedi-language-server"
+version = "0.41.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cattrs" },
+    { name = "docstring-to-markdown" },
+    { name = "jedi" },
+    { name = "lsprotocol" },
+    { name = "pygls" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f3/34/4a35094c680040c8dd598b1ee9153a701289351c1dcbad1a0f2d196c524b/jedi_language_server-0.41.3.tar.gz", hash = "sha256:113ec22b95fadaceefbb704b5f365384bed296b82ede59026be375ecc97a9f8a", size = 29113, upload-time = "2024-02-26T04:28:05.521Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/67/2cf4419a8c418b0e5cba0b43dc1ea33a0bb42907694d6a786a3644889f32/jedi_language_server-0.41.3-py3-none-any.whl", hash = "sha256:7411f7479cdc9e9ea495f91e20b182a5d00170c0a8a4a87d3a147462282c06af", size = 27615, upload-time = "2024-02-26T04:28:02.084Z" },
+]
+
 [[package]]
 name = "jiter"
 version = "0.12.0"
@@ -1422,10 +1476,12 @@ dependencies = [
     { name = "langgraph" },
     { name = "langgraph-checkpoint-postgres" },
     { name = "langgraph-checkpoint-sqlite" },
+    { name = "multilspy" },
     { name = "pillow" },
     { name = "psycopg", extra = ["binary"] },
     { name = "pydantic" },
     { name = "pyjwt" },
+    { name = "pyright" },
     { name = "pyyaml" },
     { name = "rich" },
     { name = "sse-starlette" },
@@ -1513,6 +1569,7 @@ requires-dist = [
     { name = "langgraph-checkpoint-sqlite", specifier = ">=2.0.0" },
     { name = "langsmith", marker = "extra == 'all'", specifier = ">=0.1.0" },
     { name = "langsmith", marker = "extra == 'langsmith'", specifier = ">=0.1.0" },
+    { name = "multilspy", specifier = ">=0.0.15" },
     { name = "opentelemetry-api", marker = "extra == 'otel'", specifier = ">=1.20.0" },
     { name = "opentelemetry-exporter-otlp", marker = "extra == 'otel'", specifier = ">=1.20.0" },
     { name = "opentelemetry-sdk", marker = "extra == 'otel'", specifier = ">=1.20.0" },
@@ -1523,6 +1580,7 @@ requires-dist = [
     { name = "pymupdf", marker = "extra == 'all'", specifier = ">=1.24.0" },
     { name = "pymupdf", marker = "extra == 'docs'", specifier = ">=1.24.0" },
     { name = "pymupdf", marker = "extra == 'pdf'", specifier = ">=1.24.0" },
+    { name = "pyright", specifier = ">=1.1.0" },
     { name = "python-pptx", marker = "extra == 'all'", specifier = ">=1.0.0" },
     { name = "python-pptx", marker = "extra == 'docs'", specifier = ">=1.0.0" },
     { name = "python-pptx", marker = "extra == 'pptx'", specifier = ">=1.0.0" },
@@ -1560,6 +1618,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
 ]
 
+[[package]]
+name = "lsprotocol"
+version = "2023.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "cattrs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9d/f6/6e80484ec078d0b50699ceb1833597b792a6c695f90c645fbaf54b947e6f/lsprotocol-2023.0.1.tar.gz", hash = "sha256:cc5c15130d2403c18b734304339e51242d3018a05c4f7d0f198ad6e0cd21861d", size = 69434, upload-time = "2024-01-09T17:21:12.625Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/37/2351e48cb3309673492d3a8c59d407b75fb6630e560eb27ecd4da03adc9a/lsprotocol-2023.0.1-py3-none-any.whl", hash = "sha256:c75223c9e4af2f24272b14c6375787438279369236cd568f596d4951052a60f2", size = 70826, upload-time = "2024-01-09T17:21:14.491Z" },
+]
+
 [[package]]
 name = "lxml"
 version = "6.0.2"
@@ -1876,6 +1947,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" },
 ]
 
+[[package]]
+name = "multilspy"
+version = "0.0.15"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jedi-language-server" },
+    { name = "psutil" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d8/a8/4d6ab48e624f911eb5229aa01b3524b916470c9d036a9e8cc96d6fb81673/multilspy-0.0.15.tar.gz", hash = "sha256:b27a0b7c5c5306216b31fe1df9b4a42d2797735d0a78928e0df9ef8dfbcc97c5", size = 120639, upload-time = "2025-04-03T07:01:27.216Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/4d/b9d3492d6a7a2536498fc7fd49c1cc7bc86a41acf93b0ad967d75dbe5cd6/multilspy-0.0.15-py3-none-any.whl", hash = "sha256:3fa88939b953ed5d39aba4688a34105ec1e5cf2b2f778167fee2b78b3c0e1427", size = 137361, upload-time = "2025-04-03T07:01:25.492Z" },
+]
+
 [[package]]
 name = "multipart"
 version = "1.3.0"
@@ -2176,6 +2262,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
+[[package]]
+name = "parso"
+version = "0.8.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/81/76/a1e769043c0c0c9fe391b702539d594731a4362334cdf4dc25d0c09761e7/parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd", size = 401621, upload-time = "2026-02-09T15:45:24.425Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff", size = 106894, upload-time = "2026-02-09T15:45:21.391Z" },
+]
+
 [[package]]
 name = "pillow"
 version = "12.1.0"
@@ -2403,6 +2498,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" },
 ]
 
+[[package]]
+name = "psutil"
+version = "7.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" },
+    { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" },
+    { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" },
+    { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
+    { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
+    { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
+    { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
+]
+
 [[package]]
 name = "psycopg"
 version = "3.3.3"
@@ -2594,6 +2717,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" },
 ]
 
+[[package]]
+name = "pygls"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cattrs" },
+    { name = "lsprotocol" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/86/b9/41d173dad9eaa9db9c785a85671fc3d68961f08d67706dc2e79011e10b5c/pygls-1.3.1.tar.gz", hash = "sha256:140edceefa0da0e9b3c533547c892a42a7d2fd9217ae848c330c53d266a55018", size = 45527, upload-time = "2024-03-26T18:44:25.679Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/11/19/b74a10dd24548e96e8c80226cbacb28b021bc3a168a7d2709fb0d0185348/pygls-1.3.1-py3-none-any.whl", hash = "sha256:6e00f11efc56321bdeb6eac04f6d86131f654c7d49124344a9ebb968da3dd91e", size = 56031, upload-time = "2024-03-26T18:44:24.249Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.2"
@@ -3030,7 +3166,7 @@ wheels = [
 
 [[package]]
 name = "requests"
-version = "2.32.5"
+version = "2.32.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
@@ -3038,9 +3174,9 @@ dependencies = [
     { name = "idna" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218, upload-time = "2024-05-29T15:37:49.536Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928, upload-time = "2024-05-29T15:37:47.027Z" },
 ]
 
 [[package]]