From 151d3a83c5f101003fab1c9fe12c3c659c126297 Mon Sep 17 00:00:00 2001
From: Spherrrical <malikmusa1323@gmail.com>
Date: Wed, 6 May 2026 10:58:41 -0700
Subject: [PATCH] feat(cli): add planoai launch group + claude-desktop
 integration

---
 cli/planoai/claude_desktop.py                 | 625 ++++++++++++++++++
 cli/planoai/launch_cmd.py                     | 331 ++++++++++
 cli/planoai/main.py                           |  77 +--
 cli/planoai/rich_click_config.py              |  26 +-
 cli/test/test_claude_desktop.py               | 366 ++++++++++
 cli/test/test_launch_cmd.py                   | 231 +++++++
 .../frontier_model_routing/README.md          | 423 ++++++++++++
 .../frontier_model_routing/config.yaml        | 103 +++
 .../frontier_model_routing/run_demo.sh        |  63 ++
 .../frontier_model_routing/test.rest          | 212 ++++++
 .../frontier_model_routing/test.sh            | 119 ++++
 11 files changed, 2500 insertions(+), 76 deletions(-)
 create mode 100644 cli/planoai/claude_desktop.py
 create mode 100644 cli/planoai/launch_cmd.py
 create mode 100644 cli/test/test_claude_desktop.py
 create mode 100644 cli/test/test_launch_cmd.py
 create mode 100644 demos/llm_routing/frontier_model_routing/README.md
 create mode 100644 demos/llm_routing/frontier_model_routing/config.yaml
 create mode 100755 demos/llm_routing/frontier_model_routing/run_demo.sh
 create mode 100644 demos/llm_routing/frontier_model_routing/test.rest
 create mode 100755 demos/llm_routing/frontier_model_routing/test.sh

diff --git a/cli/planoai/claude_desktop.py b/cli/planoai/claude_desktop.py
new file mode 100644
index 000000000..29783f3db
--- /dev/null
+++ b/cli/planoai/claude_desktop.py
@@ -0,0 +1,625 @@
+"""Configure Claude Desktop to use the local Plano gateway.
+
+Python port of Ollama's `cmd/launch/claude_desktop.go` tailored for Plano. The
+flow is intentionally simpler than Ollama's:
+
+1. Detect Claude Desktop on macOS / Windows.
+2. Pick a string to put in Claude's ``inferenceGatewayApiKey`` slot (Claude
+   Desktop requires the field; Plano's local gateway does not enforce bearer
+   auth, so a placeholder is fine — see ``_resolve_api_key`` for precedence).
+3. Rewrite Claude Desktop config JSON files with ``.bak`` backups to switch
+   Claude into 3rd-party gateway mode pointed at Plano.
+4. Optionally restart Claude Desktop so the changes take effect immediately.
+
+Restoring flips ``deploymentMode`` back to ``1p`` and removes the Plano gateway
+profile + meta entry.
+
+The Claude Desktop ``deploymentMode = "3p"`` profile structure used here is
+defined by Anthropic / observed via the Ollama integration; we do not control
+it. We re-use the same JSON shape so Claude Desktop happily accepts the Plano
+profile alongside any other third-party profile the user may have.
+"""
+
+from __future__ import annotations
+
+import glob as _glob
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+from dataclasses import dataclass, field
+from typing import Callable, Optional
+
+from planoai.utils import getLogger
+
+log = getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+INTEGRATION_NAME = "claude-desktop"
+PROFILE_NAME = "Plano"
+# Deterministic UUID-v4 distinct from Ollama's `…0114`. The trailing bytes
+# spell "PlanO" in ASCII to make it easy to identify the profile in
+# `_meta.json`.
+PROFILE_ID = "00000000-0000-4000-8000-0000506C616E"
+DEFAULT_BASE_URL = "http://localhost:12000"
+SUCCESS_MESSAGE = "Claude Desktop profile changed to Plano."
+RESTORE_HINT = (
+    "To restore the usual Claude profile, run: "
+    "planoai launch claude-desktop --restore"
+)
+RESTORED_MESSAGE = "Claude Desktop restored to the usual Claude profile."
+
+# Placeholder Claude Desktop writes into the gateway profile when the user
+# hasn't overridden it. Plano's local gateway does not enforce a bearer
+# token; this string only exists so Claude Desktop has a non-empty value to
+# attach to outbound requests.
+DEFAULT_API_KEY = "plano"
+
+# How long we wait for Claude Desktop to fully exit on restart.
+_QUIT_TIMEOUT_SECONDS = 30
+
+
+# ---------------------------------------------------------------------------
+# Test seams: replace these in tests instead of monkey-patching os/subprocess.
+# ---------------------------------------------------------------------------
+
+
+# Platform identifier. ``"darwin"``, ``"windows"``, or anything else (which
+# is treated as unsupported). Module-level so tests can override it.
+def _detect_goos() -> str:
+    if os.name == "nt":
+        return "windows"
+    if sys.platform == "darwin":
+        return "darwin"
+    return sys.platform
+
+
+_GOOS: str = _detect_goos()
+
+_user_home: Callable[[], str] = os.path.expanduser  # called as _user_home("~")
+
+
+def _is_running() -> bool:
+    """Return True if Claude Desktop is currently running."""
+    if _GOOS == "darwin":
+        try:
+            out = subprocess.run(
+                ["pgrep", "-f", "Claude.app/Contents/MacOS/Claude"],
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            return out.returncode == 0 and out.stdout.strip() != ""
+        except FileNotFoundError:
+            return False
+    if _GOOS == "windows":
+        script = (
+            "(Get-Process claude -ErrorAction SilentlyContinue "
+            "| Where-Object { $_.MainWindowHandle -ne 0 } "
+            "| Select-Object -First 1).Id"
+        )
+        try:
+            out = subprocess.run(
+                ["powershell.exe", "-NoProfile", "-Command", script],
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            return out.returncode == 0 and out.stdout.strip() != ""
+        except FileNotFoundError:
+            return False
+    return False
+
+
+def _quit() -> None:
+    """Ask Claude Desktop to quit gracefully."""
+    if _GOOS == "darwin":
+        subprocess.run(
+            ["osascript", "-e", 'tell application "Claude" to quit'],
+            check=False,
+        )
+        return
+    if _GOOS == "windows":
+        script = (
+            "Get-Process claude -ErrorAction SilentlyContinue "
+            "| Where-Object { $_.MainWindowHandle -ne 0 } "
+            "| ForEach-Object { [void]$_.CloseMainWindow() }"
+        )
+        subprocess.run(
+            ["powershell.exe", "-NoProfile", "-Command", script],
+            check=False,
+        )
+
+
+def _open() -> None:
+    """Launch Claude Desktop."""
+    if _GOOS == "darwin":
+        subprocess.run(["open", "-a", "Claude"], check=False)
+        return
+    if _GOOS == "windows":
+        path = _claude_app_path()
+        if not path:
+            raise RuntimeError(
+                "Claude Desktop executable was not found; open Claude Desktop "
+                "manually once and re-run 'planoai launch claude-desktop'"
+            )
+        ps_path = "'" + path.replace("'", "''") + "'"
+        subprocess.run(
+            [
+                "powershell.exe",
+                "-NoProfile",
+                "-Command",
+                f"Start-Process -FilePath {ps_path}",
+            ],
+            check=False,
+        )
+
+
+def _sleep(seconds: float) -> None:
+    time.sleep(seconds)
+
+
+# ---------------------------------------------------------------------------
+# Path discovery
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _ThirdPartyPaths:
+    desktop_config: str
+    meta: str
+    profile: str
+
+
+@dataclass
+class _Targets:
+    normal_configs: list[str] = field(default_factory=list)
+    third_party_profiles: list[_ThirdPartyPaths] = field(default_factory=list)
+
+
+def supported() -> Optional[str]:
+    """Return ``None`` if the platform is supported, else an error message."""
+    if _GOOS in ("darwin", "windows"):
+        return None
+    return "Claude Desktop launch is only supported on macOS and Windows"
+
+
+def _home() -> str:
+    home = _user_home("~")
+    if home == "~" or not home:
+        raise RuntimeError("could not resolve user home directory")
+    return home
+
+
+def _local_app_data() -> str:
+    val = (os.environ.get("LOCALAPPDATA") or "").strip()
+    if val:
+        return val
+    user = (os.environ.get("USERPROFILE") or "").strip()
+    if user:
+        return os.path.join(user, "AppData", "Local")
+    return os.path.join(_home(), "AppData", "Local")
+
+
+def _darwin_profile_roots() -> tuple[list[str], list[str]]:
+    base = os.path.join(_home(), "Library", "Application Support")
+    return ([os.path.join(base, "Claude")], [os.path.join(base, "Claude-3p")])
+
+
+def _windows_profile_roots() -> tuple[list[str], list[str]]:
+    local = _local_app_data()
+    normal = [
+        os.path.join(local, "Claude"),
+        os.path.join(local, "Claude Nest"),
+    ]
+    third_party = [
+        os.path.join(local, "Claude-3p"),
+        os.path.join(local, "Claude Nest-3p"),
+    ]
+    return normal, third_party
+
+
+def _dedupe_paths(paths: list[str]) -> list[str]:
+    out: list[str] = []
+    seen: set[str] = set()
+    for path in paths:
+        if not path or not path.strip():
+            continue
+        key = path.lower()
+        if key in seen:
+            continue
+        seen.add(key)
+        out.append(path)
+    return out
+
+
+def _target_paths() -> _Targets:
+    err = supported()
+    if err is not None:
+        raise RuntimeError(err)
+
+    if _GOOS == "darwin":
+        normal, third = _darwin_profile_roots()
+    else:
+        normal, third = _windows_profile_roots()
+
+    targets = _Targets()
+    for root in _dedupe_paths(normal):
+        targets.normal_configs.append(os.path.join(root, "claude_desktop_config.json"))
+    for root in _dedupe_paths(third):
+        targets.third_party_profiles.append(
+            _ThirdPartyPaths(
+                desktop_config=os.path.join(root, "claude_desktop_config.json"),
+                meta=os.path.join(root, "configLibrary", "_meta.json"),
+                profile=os.path.join(root, "configLibrary", f"{PROFILE_ID}.json"),
+            )
+        )
+    return targets
+
+
+def _claude_app_path() -> str:
+    """Return path to the Claude Desktop executable, or ``""`` if unknown."""
+    if _GOOS == "darwin":
+        candidates = ["/Applications/Claude.app"]
+        candidates.append(os.path.join(_home(), "Applications", "Claude.app"))
+        for path in candidates:
+            if os.path.exists(path):
+                return path
+        return ""
+    if _GOOS == "windows":
+        local = _local_app_data()
+        candidates = [
+            os.path.join(local, "Programs", "Claude", "Claude.exe"),
+            os.path.join(local, "Programs", "Claude Desktop", "Claude.exe"),
+            os.path.join(local, "Claude", "Claude.exe"),
+            os.path.join(local, "Claude Nest", "Claude.exe"),
+            os.path.join(local, "Claude Desktop", "Claude.exe"),
+            os.path.join(local, "AnthropicClaude", "Claude.exe"),
+        ]
+        for pattern in (
+            os.path.join(local, "AnthropicClaude", "app-*", "Claude.exe"),
+            os.path.join(local, "Programs", "Claude", "app-*", "Claude.exe"),
+            os.path.join(local, "Programs", "Claude Desktop", "app-*", "Claude.exe"),
+        ):
+            candidates.extend(_glob.glob(pattern))
+        for path in _dedupe_paths(candidates):
+            if os.path.exists(path):
+                return path
+        return ""
+    return ""
+
+
+def is_installed() -> bool:
+    """Best-effort check: app binary or any profile dir is present."""
+    if _claude_app_path():
+        return True
+    if _GOOS == "windows" and _is_running():
+        return True
+    if _GOOS == "darwin":
+        normal, third = _darwin_profile_roots()
+    elif _GOOS == "windows":
+        normal, third = _windows_profile_roots()
+    else:
+        return False
+    for path in normal + third:
+        if os.path.isdir(path):
+            return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# JSON IO with atomic write + .bak backup
+# ---------------------------------------------------------------------------
+
+
+def _read_json(path: str) -> dict:
+    with open(path, "r", encoding="utf-8") as f:
+        data = f.read()
+    if not data.strip():
+        return {}
+    parsed = json.loads(data)
+    return parsed if isinstance(parsed, dict) else {}
+
+
+def _read_json_allow_missing(path: str) -> dict:
+    try:
+        return _read_json(path)
+    except FileNotFoundError:
+        return {}
+
+
+def _atomic_write_with_backup(path: str, payload: bytes) -> None:
+    """Write ``payload`` to ``path`` atomically, keeping a ``.bak`` copy."""
+    parent = os.path.dirname(path)
+    if parent:
+        os.makedirs(parent, exist_ok=True)
+    if os.path.exists(path):
+        try:
+            shutil.copy2(path, path + ".bak")
+        except OSError as e:
+            log.debug("could not write backup for %s: %s", path, e)
+
+    fd, tmp_path = tempfile.mkstemp(prefix=".plano_", suffix=".tmp", dir=parent or None)
+    try:
+        with os.fdopen(fd, "wb") as f:
+            f.write(payload)
+        os.replace(tmp_path, path)
+    except Exception:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
+
+
+def _write_json(path: str, value: dict) -> None:
+    payload = (json.dumps(value, indent=2) + "\n").encode("utf-8")
+    _atomic_write_with_backup(path, payload)
+
+
+# ---------------------------------------------------------------------------
+# JSON shape mutators (1:1 with Ollama)
+# ---------------------------------------------------------------------------
+
+
+def _write_deployment_mode(path: str, mode: str) -> None:
+    cfg = _read_json_allow_missing(path)
+    cfg["deploymentMode"] = mode
+    _write_json(path, cfg)
+
+
+def _write_meta(path: str, profile_id: str, name: str) -> None:
+    meta = _read_json_allow_missing(path)
+    meta["appliedId"] = profile_id
+
+    raw_entries = meta.get("entries")
+    entries: list = []
+    if isinstance(raw_entries, list):
+        for entry in raw_entries:
+            if isinstance(entry, dict) and entry.get("id") == profile_id:
+                continue
+            entries.append(entry)
+    entries.append({"id": profile_id, "name": name})
+    meta["entries"] = entries
+    _write_json(path, meta)
+
+
+def _write_gateway_profile(
+    path: str, api_key: str, base_url: str, force_chooser: bool
+) -> None:
+    cfg = _read_json_allow_missing(path)
+    cfg["inferenceProvider"] = "gateway"
+    cfg["inferenceGatewayBaseUrl"] = base_url
+    cfg["inferenceGatewayApiKey"] = api_key
+    cfg["inferenceGatewayAuthScheme"] = "bearer"
+    cfg.pop("inferenceModels", None)
+    cfg["disableDeploymentModeChooser"] = force_chooser
+    _write_json(path, cfg)
+
+
+def _restore_meta(path: str) -> None:
+    meta = _read_json_allow_missing(path)
+    if not meta:
+        return
+    changed = False
+    if meta.get("appliedId") == PROFILE_ID:
+        meta.pop("appliedId", None)
+        changed = True
+
+    raw_entries = meta.get("entries")
+    if isinstance(raw_entries, list):
+        filtered: list = []
+        for entry in raw_entries:
+            if isinstance(entry, dict) and entry.get("id") == PROFILE_ID:
+                changed = True
+                continue
+            filtered.append(entry)
+        meta["entries"] = filtered
+
+    if changed:
+        _write_json(path, meta)
+
+
+def _restore_profile(path: str) -> None:
+    cfg = _read_json_allow_missing(path)
+    if not cfg:
+        return
+    cfg["disableDeploymentModeChooser"] = False
+    for key in (
+        "inferenceProvider",
+        "inferenceGatewayBaseUrl",
+        "inferenceGatewayAuthScheme",
+        "inferenceModels",
+    ):
+        cfg.pop(key, None)
+    _write_json(path, cfg)
+
+
+def _read_applied_id(path: str) -> str:
+    try:
+        meta = _read_json(path)
+    except (FileNotFoundError, json.JSONDecodeError):
+        return ""
+    val = meta.get("appliedId")
+    return val if isinstance(val, str) else ""
+
+
+def _read_deployment_mode(path: str) -> str:
+    try:
+        cfg = _read_json(path)
+    except (FileNotFoundError, json.JSONDecodeError):
+        return ""
+    val = cfg.get("deploymentMode")
+    return val if isinstance(val, str) else ""
+
+
+def _read_gateway_api_key(path: str) -> str:
+    try:
+        cfg = _read_json(path)
+    except (FileNotFoundError, json.JSONDecodeError):
+        return ""
+    val = cfg.get("inferenceGatewayApiKey")
+    return val.strip() if isinstance(val, str) else ""
+
+
+def _third_party_profile_ok(t: _ThirdPartyPaths) -> bool:
+    if _read_applied_id(t.meta) != PROFILE_ID:
+        return False
+    try:
+        cfg = _read_json(t.profile)
+    except (FileNotFoundError, json.JSONDecodeError):
+        return False
+    if cfg.get("inferenceProvider") != "gateway":
+        return False
+    base_url = cfg.get("inferenceGatewayBaseUrl")
+    if not isinstance(base_url, str) or not base_url.strip():
+        return False
+    api_key = cfg.get("inferenceGatewayApiKey")
+    if not isinstance(api_key, str) or not api_key.strip():
+        return False
+    return True
+
+
+def is_configured() -> bool:
+    try:
+        targets = _target_paths()
+    except RuntimeError:
+        return False
+    if not targets.normal_configs or not targets.third_party_profiles:
+        return False
+    for path in targets.normal_configs:
+        if _read_deployment_mode(path) != "3p":
+            return False
+    for t in targets.third_party_profiles:
+        if _read_deployment_mode(t.desktop_config) != "3p":
+            return False
+        if not _third_party_profile_ok(t):
+            return False
+    return True
+
+
+# ---------------------------------------------------------------------------
+# API key resolution
+# ---------------------------------------------------------------------------
+#
+# Plano's local gateway does not enforce bearer auth — there's no such thing
+# as a "Plano API key". Claude Desktop's third-party profile schema, however,
+# requires ``inferenceGatewayApiKey`` to be a non-empty string before it will
+# treat the profile as configured. We therefore pick *some* string to write
+# into that slot, with the following precedence so users running Plano behind
+# their own auth proxy can opt-in:
+#
+#   1. ``$PLANO_API_KEY`` — explicit override (e.g. an internal auth token).
+#   2. The existing ``inferenceGatewayApiKey`` already in Claude's 3p profile,
+#      so re-running ``planoai launch claude-desktop`` does not clobber a
+#      value the user manually set.
+#   3. The fixed placeholder ``DEFAULT_API_KEY`` ("plano").
+#
+# We do not validate this string against the gateway. The gateway's
+# reachability is already surfaced by ``launch_cmd._is_plano_running()``
+# before this module is invoked.
+
+
+def _resolve_api_key(profile_paths: list[str]) -> str:
+    env_key = (os.environ.get("PLANO_API_KEY") or "").strip()
+    if env_key:
+        return env_key
+
+    for path in profile_paths:
+        existing = _read_gateway_api_key(path)
+        if existing:
+            return existing
+
+    return DEFAULT_API_KEY
+
+
+# ---------------------------------------------------------------------------
+# Public configure / restore / launch
+# ---------------------------------------------------------------------------
+
+
+def configure(base_url: str = DEFAULT_BASE_URL, *, force_chooser: bool = True) -> None:
+    """Switch Claude Desktop into 3p mode pointed at the local Plano gateway."""
+    err = supported()
+    if err is not None:
+        raise RuntimeError(err)
+
+    targets = _target_paths()
+    profile_paths = [t.profile for t in targets.third_party_profiles]
+    api_key = _resolve_api_key(profile_paths)
+
+    for path in targets.normal_configs:
+        _write_deployment_mode(path, "3p")
+    for t in targets.third_party_profiles:
+        _write_deployment_mode(t.desktop_config, "3p")
+        _write_meta(t.meta, PROFILE_ID, PROFILE_NAME)
+        _write_gateway_profile(t.profile, api_key, base_url, force_chooser)
+
+
+def restore() -> None:
+    """Flip Claude Desktop back to the default Anthropic profile."""
+    err = supported()
+    if err is not None:
+        raise RuntimeError(err)
+
+    targets = _target_paths()
+    for path in targets.normal_configs:
+        _write_deployment_mode(path, "1p")
+    for t in targets.third_party_profiles:
+        _write_deployment_mode(t.desktop_config, "1p")
+        _restore_meta(t.meta)
+        _restore_profile(t.profile)
+
+
+def _can_prompt() -> bool:
+    return sys.stdin.isatty() and sys.stderr.isatty()
+
+
+def _confirm(prompt: str, yes: bool) -> bool:
+    if yes:
+        return True
+    if not _can_prompt():
+        return False
+    try:
+        answer = input(f"{prompt} [Y/n] ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        sys.stderr.write("\n")
+        return False
+    return answer in ("", "y", "yes")
+
+
+def launch_or_restart(prompt: str, yes: bool) -> None:
+    """Open Claude Desktop, restarting it first if it is already running."""
+    err = supported()
+    if err is not None:
+        raise RuntimeError(err)
+
+    if not _is_running():
+        _open()
+        return
+
+    if not _confirm(prompt, yes):
+        sys.stderr.write(
+            "Quit and reopen Claude Desktop when you're ready for the "
+            "profile change to take effect.\n"
+        )
+        return
+
+    _quit()
+    deadline = time.time() + _QUIT_TIMEOUT_SECONDS
+    while time.time() < deadline:
+        if not _is_running():
+            break
+        _sleep(0.2)
+    else:
+        raise RuntimeError(
+            "Claude Desktop did not quit; quit it manually and re-run " "the command"
+        )
+    _open()
diff --git a/cli/planoai/launch_cmd.py b/cli/planoai/launch_cmd.py
new file mode 100644
index 000000000..6fc242502
--- /dev/null
+++ b/cli/planoai/launch_cmd.py
@@ -0,0 +1,331 @@
+"""``planoai launch`` command group.
+
+Launches CLI agents (Claude Code, Codex) or the Claude Desktop app against the
+local Plano gateway. This replaces the old ``planoai cli-agent`` command.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from typing import Optional
+
+import rich_click as click
+import yaml
+
+from planoai import claude_desktop as _cd
+from planoai.consts import NATIVE_PID_FILE, PLANO_DOCKER_NAME
+from planoai.core import _resolve_cli_agent_endpoint, start_cli_agent
+from planoai.docker_cli import docker_container_status
+from planoai.defaults import DEFAULT_LLM_LISTENER_PORT
+from planoai.utils import find_config_file, getLogger
+
+log = getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _is_native_plano_running() -> bool:
+    if not os.path.exists(NATIVE_PID_FILE):
+        return False
+    try:
+        with open(NATIVE_PID_FILE, "r") as f:
+            pids = json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return False
+
+    envoy_pid = pids.get("envoy_pid")
+    brightstaff_pid = pids.get("brightstaff_pid")
+    if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int):
+        return False
+
+    for pid in (envoy_pid, brightstaff_pid):
+        try:
+            os.kill(pid, 0)
+        except ProcessLookupError:
+            return False
+        except PermissionError:
+            continue
+    return True
+
+
+def _is_plano_running() -> bool:
+    if _is_native_plano_running():
+        return True
+    return docker_container_status(PLANO_DOCKER_NAME) == "running"
+
+
+def _require_plano_running(console) -> None:
+    if _is_plano_running():
+        return
+    console.print("[red]✗[/red] Plano is not running.")
+    console.print(
+        "[dim]Start Plano first using 'planoai up <config.yaml>' "
+        "(native or --docker mode).[/dim]"
+    )
+    sys.exit(1)
+
+
+def _start_plano_with_config(config_path: str, console) -> None:
+    """Invoke `planoai up` against the given config and wait for it to be healthy.
+
+    Reuses the click ``up`` command's callback so we get the same validation,
+    env loading, and native runner behavior as a top-level invocation. ``up``
+    runs in detached/background mode by default and only returns once Plano is
+    healthy, so we can safely continue with the Claude Desktop config flow
+    after it returns.
+    """
+    # Lazy import: ``planoai.main`` pulls in heavy modules (rich, native runner,
+    # etc.) and would create a circular import at module-load time.
+    from planoai.main import up
+
+    abs_path = os.path.abspath(config_path)
+    if not os.path.exists(abs_path):
+        console.print(f"[red]✗[/red] Config file not found: {abs_path}")
+        sys.exit(1)
+
+    console.print(
+        f"[dim]Starting Plano with config " f"[cyan]{abs_path}[/cyan]...[/dim]"
+    )
+    up.callback(
+        file=abs_path,
+        path=".",
+        foreground=False,
+        with_tracing=False,
+        tracing_port=4317,
+        docker=False,
+        verbose=False,
+        listener_port=DEFAULT_LLM_LISTENER_PORT,
+    )
+
+
+def _base_url_from_config_file(config_path: str) -> Optional[str]:
+    try:
+        with open(config_path, "r") as f:
+            cfg = yaml.safe_load(f) or {}
+    except (OSError, yaml.YAMLError):
+        return None
+    _host, port = _resolve_cli_agent_endpoint(cfg)
+    return f"http://localhost:{port}"
+
+
+def _resolve_plano_config(file: Optional[str], path: str, console) -> str:
+    plano_config_file = find_config_file(path, file)
+    if not os.path.exists(plano_config_file):
+        console.print(f"[red]✗[/red] Config file not found: {plano_config_file}")
+        sys.exit(1)
+    return plano_config_file
+
+
+def _run_cli_agent(agent_type: str, file, path, settings) -> None:
+    from rich.console import Console
+
+    console = Console()
+    _require_plano_running(console)
+    plano_config_file = _resolve_plano_config(file, path, console)
+    try:
+        start_cli_agent(plano_config_file, agent_type, settings)
+    except SystemExit:
+        raise
+    except Exception as e:
+        click.echo(f"Error: {e}")
+        sys.exit(1)
+
+
+# ---------------------------------------------------------------------------
+# Group + subcommands
+# ---------------------------------------------------------------------------
+
+
+@click.group()
+def launch():
+    """Launch a CLI agent or desktop app against the local Plano gateway."""
+
+
+@launch.command("claude-cli")
+@click.argument("file", required=False)
+@click.option(
+    "--path", default=".", help="Path to the directory containing plano_config.yaml"
+)
+@click.option(
+    "--settings",
+    default="{}",
+    help="Additional settings as JSON string for the CLI agent.",
+)
+def claude_cli(file, path, settings):
+    """Launch the Claude Code CLI connected to Plano."""
+    _run_cli_agent("claude", file, path, settings)
+
+
+@launch.command("codex")
+@click.argument("file", required=False)
+@click.option(
+    "--path", default=".", help="Path to the directory containing plano_config.yaml"
+)
+@click.option(
+    "--settings",
+    default="{}",
+    help="Additional settings as JSON string for the CLI agent.",
+)
+def codex(file, path, settings):
+    """Launch the Codex CLI connected to Plano."""
+    _run_cli_agent("codex", file, path, settings)
+
+
+@launch.command("claude-desktop")
+@click.option(
+    "--config",
+    "config_path",
+    type=click.Path(dir_okay=False),
+    default=None,
+    help="Path to a Plano config; if Plano isn't already running, "
+    "`planoai up <config>` is invoked first so the gateway is ready before "
+    "Claude Desktop is configured.",
+)
+@click.option(
+    "--no-launch",
+    "no_launch",
+    is_flag=True,
+    default=False,
+    help="Configure Claude Desktop but do not (re)open the app afterwards.",
+)
+@click.option(
+    "--restore",
+    "restore_flag",
+    is_flag=True,
+    default=False,
+    help="Switch Claude Desktop back to its usual Anthropic Claude profile.",
+)
+@click.option(
+    "--yes",
+    "-y",
+    "yes_flag",
+    is_flag=True,
+    default=False,
+    help="Auto-approve restart prompts.",
+)
+@click.option(
+    "--base-url",
+    default=None,
+    help="Plano LLM listener URL (default: derived from --config or running Plano, falling back to http://localhost:12000).",
+)
+def claude_desktop_cmd(config_path, no_launch, restore_flag, yes_flag, base_url):
+    """Configure Claude Desktop to use the local Plano gateway.
+
+    Mirrors `ollama launch claude-desktop`: rewrites Claude Desktop's profile
+    JSONs (with `.bak` backups) to switch into third-party gateway mode pointed
+    at Plano, then optionally restarts Claude Desktop so the change takes
+    effect. When `--config <path>` is supplied and Plano is not already
+    running, this command also starts Plano with that config first, so the
+    end-to-end flow is a single command.
+    """
+    from rich.console import Console
+
+    console = Console()
+
+    err = _cd.supported()
+    if err is not None:
+        console.print(f"[red]✗[/red] {err}")
+        sys.exit(1)
+
+    if restore_flag:
+        if config_path is not None:
+            console.print(
+                "[yellow]⚠[/yellow] --config is ignored when --restore is set."
+            )
+        try:
+            _cd.restore()
+        except Exception as e:
+            console.print(f"[red]✗[/red] Failed to restore Claude Desktop: {e}")
+            sys.exit(1)
+        console.print(f"[green]✓[/green] {_cd.RESTORED_MESSAGE}")
+        if no_launch:
+            return
+        try:
+            _cd.launch_or_restart(
+                "Restart Claude Desktop to use the usual Claude profile?",
+                yes_flag,
+            )
+        except Exception as e:
+            console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}")
+        return
+
+    # Auto-start Plano if --config was provided and nothing is running yet.
+    if config_path is not None:
+        abs_config = os.path.abspath(config_path)
+        if not os.path.exists(abs_config):
+            console.print(f"[red]✗[/red] Config file not found: {abs_config}")
+            sys.exit(1)
+        if _is_plano_running():
+            console.print(
+                "[dim]Plano already running; skipping startup. Using listener "
+                "from [cyan]"
+                f"{abs_config}[/cyan] for the gateway URL.[/dim]"
+            )
+        else:
+            _start_plano_with_config(abs_config, console)
+
+    # Resolve base URL precedence: --base-url > --config file > running Plano > default.
+    resolved_url = (
+        base_url
+        or (
+            _base_url_from_config_file(os.path.abspath(config_path))
+            if config_path is not None
+            else None
+        )
+        or _resolve_base_url_from_running_plano()
+        or _cd.DEFAULT_BASE_URL
+    )
+
+    if not _is_plano_running():
+        console.print(
+            "[yellow]⚠[/yellow] Plano does not appear to be running. "
+            "Start it with [cyan]planoai up[/cyan] (or pass [cyan]--config "
+            "<path>[/cyan]) before using Claude Desktop."
+        )
+
+    console.print(
+        f"[dim]Configuring Claude Desktop to use Plano at "
+        f"[cyan]{resolved_url}[/cyan][/dim]"
+    )
+    try:
+        _cd.configure(resolved_url)
+    except Exception as e:
+        console.print(f"[red]✗[/red] Failed to configure Claude Desktop: {e}")
+        sys.exit(1)
+
+    console.print(f"[green]✓[/green] {_cd.SUCCESS_MESSAGE}")
+    console.print(f"[dim]{_cd.RESTORE_HINT}[/dim]")
+
+    if no_launch:
+        return
+
+    try:
+        _cd.launch_or_restart("Restart Claude Desktop to use Plano?", yes_flag)
+    except Exception as e:
+        console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}")
+
+
+def _resolve_base_url_from_running_plano() -> Optional[str]:
+    """Return ``http://localhost:<port>`` for the active Plano LLM listener.
+
+    Best-effort: if no config can be located, return ``None`` so the caller
+    falls back to ``DEFAULT_BASE_URL``.
+    """
+    try:
+        plano_config_file = find_config_file(".", None)
+    except Exception:
+        return None
+    if not plano_config_file or not os.path.exists(plano_config_file):
+        return None
+    try:
+        with open(plano_config_file, "r") as f:
+            cfg = yaml.safe_load(f) or {}
+    except (OSError, yaml.YAMLError):
+        return None
+    _host, port = _resolve_cli_agent_endpoint(cfg)
+    return f"http://localhost:{port}"
diff --git a/cli/planoai/main.py b/cli/planoai/main.py
index ea43a1a8a..e1cad8c7d 100644
--- a/cli/planoai/main.py
+++ b/cli/planoai/main.py
@@ -1,4 +1,3 @@
-import json
 import os
 import multiprocessing
 import subprocess
@@ -19,7 +18,6 @@
 from planoai.docker_cli import (
     docker_validate_plano_schema,
     stream_gateway_logs,
-    docker_container_status,
 )
 from planoai.utils import (
     getLogger,
@@ -33,19 +31,17 @@
 from planoai.core import (
     start_plano,
     stop_docker_container,
-    start_cli_agent,
 )
 from planoai.init_cmd import init as init_cmd
+from planoai.launch_cmd import launch as launch_cmd
 from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background
 from planoai.chatgpt_cmd import chatgpt as chatgpt_cmd
 from planoai.obs_cmd import obs as obs_cmd
 from planoai.consts import (
     DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
     DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT,
-    NATIVE_PID_FILE,
     PLANO_RUN_DIR,
     PLANO_DOCKER_IMAGE,
-    PLANO_DOCKER_NAME,
 )
 from planoai.rich_click_config import configure_rich_click
 from planoai.versioning import check_version_status, get_latest_version, get_version
@@ -53,30 +49,6 @@
 log = getLogger(__name__)
 
 
-def _is_native_plano_running() -> bool:
-    if not os.path.exists(NATIVE_PID_FILE):
-        return False
-    try:
-        with open(NATIVE_PID_FILE, "r") as f:
-            pids = json.load(f)
-    except (OSError, json.JSONDecodeError):
-        return False
-
-    envoy_pid = pids.get("envoy_pid")
-    brightstaff_pid = pids.get("brightstaff_pid")
-    if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int):
-        return False
-
-    for pid in (envoy_pid, brightstaff_pid):
-        try:
-            os.kill(pid, 0)
-        except ProcessLookupError:
-            return False
-        except PermissionError:
-            continue
-    return True
-
-
 def _is_port_in_use(port: int) -> bool:
     """Check if a TCP port is already bound on localhost."""
     import socket
@@ -690,57 +662,12 @@ def logs(debug, follow, docker):
             plano_process.terminate()
 
 
-@click.command()
-@click.argument("type", type=click.Choice(["claude", "codex"]), required=True)
-@click.argument("file", required=False)  # Optional file argument
-@click.option(
-    "--path", default=".", help="Path to the directory containing plano_config.yaml"
-)
-@click.option(
-    "--settings",
-    default="{}",
-    help="Additional settings as JSON string for the CLI agent.",
-)
-def cli_agent(type, file, path, settings):
-    """Start a CLI agent connected to Plano.
-
-    CLI_AGENT: The type of CLI agent to start ('claude' or 'codex')
-    """
-
-    native_running = _is_native_plano_running()
-    docker_running = False
-    if not native_running:
-        docker_running = docker_container_status(PLANO_DOCKER_NAME) == "running"
-
-    if not (native_running or docker_running):
-        log.error("Plano is not running.")
-        log.error(
-            "Start Plano first using 'planoai up <config.yaml>' (native or --docker mode)."
-        )
-        sys.exit(1)
-
-    # Determine plano_config.yaml path
-    plano_config_file = find_config_file(path, file)
-    if not os.path.exists(plano_config_file):
-        log.error(f"Config file not found: {plano_config_file}")
-        sys.exit(1)
-
-    try:
-        start_cli_agent(plano_config_file, type, settings)
-    except SystemExit:
-        # Re-raise SystemExit to preserve exit codes
-        raise
-    except Exception as e:
-        click.echo(f"Error: {e}")
-        sys.exit(1)
-
-
 # add commands to the main group
 main.add_command(up)
 main.add_command(down)
 main.add_command(build)
 main.add_command(logs)
-main.add_command(cli_agent)
+main.add_command(launch_cmd, name="launch")
 main.add_command(generate_prompt_targets)
 main.add_command(init_cmd, name="init")
 main.add_command(trace_cmd, name="trace")
diff --git a/cli/planoai/rich_click_config.py b/cli/planoai/rich_click_config.py
index fe90dcf13..5c0e8e076 100644
--- a/cli/planoai/rich_click_config.py
+++ b/cli/planoai/rich_click_config.py
@@ -46,6 +46,20 @@ def configure_rich_click(plano_color: str) -> None:
                 "options": ["--debug", "--follow"],
             },
         ],
+        "planoai launch claude-desktop": [
+            {
+                "name": "Plano gateway",
+                "options": ["--config", "--base-url"],
+            },
+            {
+                "name": "Mode",
+                "options": ["--no-launch", "--restore"],
+            },
+            {
+                "name": "Confirmation",
+                "options": ["--yes"],
+            },
+        ],
     }
 
     # Command groups for main help.
@@ -57,7 +71,7 @@ def configure_rich_click(plano_color: str) -> None:
             },
             {
                 "name": "Agent Commands",
-                "commands": ["cli-agent"],
+                "commands": ["launch"],
             },
             {
                 "name": "Observability",
@@ -68,4 +82,14 @@ def configure_rich_click(plano_color: str) -> None:
                 "commands": ["generate-prompt-targets"],
             },
         ],
+        "planoai launch": [
+            {
+                "name": "CLI Agents",
+                "commands": ["claude-cli", "codex"],
+            },
+            {
+                "name": "Desktop Apps",
+                "commands": ["claude-desktop"],
+            },
+        ],
     }
diff --git a/cli/test/test_claude_desktop.py b/cli/test/test_claude_desktop.py
new file mode 100644
index 000000000..b5b07f8cd
--- /dev/null
+++ b/cli/test/test_claude_desktop.py
@@ -0,0 +1,366 @@
+"""Tests for `planoai launch claude-desktop` configuration logic."""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+import pytest
+
+from planoai import claude_desktop as cd
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def fake_home(tmp_path, monkeypatch):
+    """Pretend we're on macOS with a fresh home directory.
+
+    Plano's local gateway has no API key concept, so by default we ensure
+    ``$PLANO_API_KEY`` is unset; tests that exercise the env-override path
+    re-set it explicitly.
+    """
+    monkeypatch.setattr(cd, "_GOOS", "darwin")
+    monkeypatch.setattr(cd, "_user_home", lambda _: str(tmp_path))
+    monkeypatch.delenv("PLANO_API_KEY", raising=False)
+    return tmp_path
+
+
+def _normal_config_path(home: Path) -> Path:
+    return (
+        home
+        / "Library"
+        / "Application Support"
+        / "Claude"
+        / "claude_desktop_config.json"
+    )
+
+
+def _third_party_root(home: Path) -> Path:
+    return home / "Library" / "Application Support" / "Claude-3p"
+
+
+def _third_party_config_path(home: Path) -> Path:
+    return _third_party_root(home) / "claude_desktop_config.json"
+
+
+def _meta_path(home: Path) -> Path:
+    return _third_party_root(home) / "configLibrary" / "_meta.json"
+
+
+def _profile_path(home: Path) -> Path:
+    return _third_party_root(home) / "configLibrary" / f"{cd.PROFILE_ID}.json"
+
+
+# ---------------------------------------------------------------------------
+# configure() / restore()
+# ---------------------------------------------------------------------------
+
+
+def test_configure_writes_all_four_files_with_default_api_key(fake_home):
+    cd.configure("http://localhost:12000")
+
+    normal_cfg = json.loads(_normal_config_path(fake_home).read_text())
+    assert normal_cfg["deploymentMode"] == "3p"
+
+    third_cfg = json.loads(_third_party_config_path(fake_home).read_text())
+    assert third_cfg["deploymentMode"] == "3p"
+
+    meta = json.loads(_meta_path(fake_home).read_text())
+    assert meta["appliedId"] == cd.PROFILE_ID
+    assert any(
+        isinstance(e, dict) and e.get("id") == cd.PROFILE_ID for e in meta["entries"]
+    )
+
+    profile = json.loads(_profile_path(fake_home).read_text())
+    assert profile["inferenceProvider"] == "gateway"
+    assert profile["inferenceGatewayBaseUrl"] == "http://localhost:12000"
+    # No env override and no pre-existing profile -> placeholder is written.
+    assert profile["inferenceGatewayApiKey"] == cd.DEFAULT_API_KEY
+    assert profile["inferenceGatewayAuthScheme"] == "bearer"
+    assert profile["disableDeploymentModeChooser"] is True
+    assert "inferenceModels" not in profile
+
+
+def test_configure_uses_env_override_when_set(fake_home, monkeypatch):
+    monkeypatch.setenv("PLANO_API_KEY", "from-env")
+    cd.configure("http://localhost:12000")
+
+    profile = json.loads(_profile_path(fake_home).read_text())
+    assert profile["inferenceGatewayApiKey"] == "from-env"
+
+
+def test_configure_preserves_existing_profile_api_key(fake_home):
+    profile = _profile_path(fake_home)
+    profile.parent.mkdir(parents=True, exist_ok=True)
+    profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
+
+    cd.configure("http://localhost:12000")
+
+    written = json.loads(profile.read_text())
+    assert written["inferenceGatewayApiKey"] == "from-profile"
+
+
+def test_configure_does_not_call_network(fake_home, monkeypatch):
+    """Plano's local gateway is not validated at configure time. We must not
+    attempt any HTTP request — a 503 from the gateway must not block setup.
+    """
+
+    def boom(*_args, **_kwargs):
+        raise AssertionError("configure() must not perform network calls")
+
+    monkeypatch.setattr("urllib.request.urlopen", boom)
+    cd.configure("http://localhost:12000")
+
+    profile = json.loads(_profile_path(fake_home).read_text())
+    assert profile["inferenceProvider"] == "gateway"
+
+
+def test_configure_preserves_existing_unrelated_keys(fake_home):
+    normal_path = _normal_config_path(fake_home)
+    normal_path.parent.mkdir(parents=True, exist_ok=True)
+    normal_path.write_text(
+        json.dumps({"someOtherSetting": 123, "deploymentMode": "1p"})
+    )
+
+    cd.configure("http://localhost:12000")
+
+    cfg = json.loads(normal_path.read_text())
+    assert cfg["someOtherSetting"] == 123
+    assert cfg["deploymentMode"] == "3p"
+
+
+def test_configure_writes_backup_of_existing_files(fake_home):
+    normal_path = _normal_config_path(fake_home)
+    normal_path.parent.mkdir(parents=True, exist_ok=True)
+    normal_path.write_text('{"deploymentMode":"1p"}')
+
+    cd.configure("http://localhost:12000")
+
+    backup = normal_path.with_suffix(normal_path.suffix + ".bak")
+    assert backup.exists()
+    assert json.loads(backup.read_text())["deploymentMode"] == "1p"
+
+
+def test_restore_reverts_deployment_mode_and_strips_gateway_keys(fake_home):
+    cd.configure("http://localhost:12000")
+    cd.restore()
+
+    assert (
+        json.loads(_normal_config_path(fake_home).read_text())["deploymentMode"] == "1p"
+    )
+    third_cfg = json.loads(_third_party_config_path(fake_home).read_text())
+    assert third_cfg["deploymentMode"] == "1p"
+
+    meta = json.loads(_meta_path(fake_home).read_text())
+    assert meta.get("appliedId") != cd.PROFILE_ID
+    assert all(
+        not (isinstance(e, dict) and e.get("id") == cd.PROFILE_ID)
+        for e in meta.get("entries", [])
+    )
+
+    profile = json.loads(_profile_path(fake_home).read_text())
+    assert profile["disableDeploymentModeChooser"] is False
+    for stripped in (
+        "inferenceProvider",
+        "inferenceGatewayBaseUrl",
+        "inferenceGatewayAuthScheme",
+        "inferenceModels",
+    ):
+        assert stripped not in profile
+
+
+def test_restore_meta_keeps_unrelated_entries(fake_home):
+    meta_path = _meta_path(fake_home)
+    meta_path.parent.mkdir(parents=True, exist_ok=True)
+    meta_path.write_text(
+        json.dumps(
+            {
+                "appliedId": cd.PROFILE_ID,
+                "entries": [
+                    {"id": cd.PROFILE_ID, "name": "Plano"},
+                    {"id": "00000000-0000-0000-0000-000000000001", "name": "Other"},
+                ],
+            }
+        )
+    )
+
+    cd._restore_meta(str(meta_path))
+
+    meta = json.loads(meta_path.read_text())
+    assert meta.get("appliedId") in (None, "")
+    ids = [e["id"] for e in meta["entries"] if isinstance(e, dict)]
+    assert ids == ["00000000-0000-0000-0000-000000000001"]
+
+
+# ---------------------------------------------------------------------------
+# is_configured()
+# ---------------------------------------------------------------------------
+
+
+def test_is_configured_false_on_fresh_home(fake_home):
+    assert cd.is_configured() is False
+
+
+def test_is_configured_true_after_configure(fake_home):
+    cd.configure("http://localhost:12000")
+    assert cd.is_configured() is True
+
+
+def test_is_configured_false_when_only_normal_config_set(fake_home):
+    cd.configure("http://localhost:12000")
+
+    third_cfg = _third_party_config_path(fake_home)
+    data = json.loads(third_cfg.read_text())
+    data["deploymentMode"] = "1p"
+    third_cfg.write_text(json.dumps(data))
+
+    assert cd.is_configured() is False
+
+
+# ---------------------------------------------------------------------------
+# API key resolution (placeholder by default; env override; profile preserve)
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_api_key_returns_placeholder_when_no_inputs(fake_home):
+    assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY
+
+
+def test_resolve_api_key_uses_env_when_set(fake_home, monkeypatch):
+    monkeypatch.setenv("PLANO_API_KEY", "from-env")
+    profile = _profile_path(fake_home)
+    profile.parent.mkdir(parents=True, exist_ok=True)
+    profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
+
+    # Env wins over profile.
+    assert cd._resolve_api_key([str(profile)]) == "from-env"
+
+
+def test_resolve_api_key_falls_back_to_existing_profile(fake_home):
+    profile = _profile_path(fake_home)
+    profile.parent.mkdir(parents=True, exist_ok=True)
+    profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
+
+    assert cd._resolve_api_key([str(profile)]) == "from-profile"
+
+
+def test_resolve_api_key_skips_blank_env(fake_home, monkeypatch):
+    monkeypatch.setenv("PLANO_API_KEY", "   ")
+    assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY
+
+
+# ---------------------------------------------------------------------------
+# Atomic write
+# ---------------------------------------------------------------------------
+
+
+def test_atomic_write_creates_backup_of_existing_file(tmp_path):
+    target = tmp_path / "deep" / "nested" / "file.json"
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_text("ORIGINAL")
+
+    cd._atomic_write_with_backup(str(target), b"NEW")
+
+    assert target.read_text() == "NEW"
+    assert (tmp_path / "deep" / "nested" / "file.json.bak").read_text() == "ORIGINAL"
+
+
+def test_atomic_write_skips_backup_when_no_existing_file(tmp_path):
+    target = tmp_path / "fresh.json"
+    cd._atomic_write_with_backup(str(target), b"DATA")
+
+    assert target.read_text() == "DATA"
+    assert not (tmp_path / "fresh.json.bak").exists()
+
+
+def test_atomic_write_does_not_truncate_on_failure(tmp_path, monkeypatch):
+    target = tmp_path / "file.json"
+    target.write_text("ORIGINAL")
+
+    real_replace = os.replace
+
+    def boom(_src, _dst):
+        raise OSError("disk full")
+
+    monkeypatch.setattr(os, "replace", boom)
+    with pytest.raises(OSError):
+        cd._atomic_write_with_backup(str(target), b"NEW")
+    monkeypatch.setattr(os, "replace", real_replace)
+
+    assert target.read_text() == "ORIGINAL"
+    leftover = list(tmp_path.glob(".plano_*.tmp"))
+    assert leftover == []
+
+
+# ---------------------------------------------------------------------------
+# Platform support
+# ---------------------------------------------------------------------------
+
+
+def test_supported_returns_error_on_linux(monkeypatch):
+    monkeypatch.setattr(cd, "_GOOS", "linux")
+    msg = cd.supported()
+    assert msg is not None
+    assert "macOS" in msg and "Windows" in msg
+
+
+def test_supported_returns_none_on_darwin(monkeypatch):
+    monkeypatch.setattr(cd, "_GOOS", "darwin")
+    assert cd.supported() is None
+
+
+def test_configure_raises_on_unsupported_platform(monkeypatch):
+    monkeypatch.setattr(cd, "_GOOS", "linux")
+    with pytest.raises(RuntimeError, match="macOS"):
+        cd.configure()
+
+
+def test_restore_raises_on_unsupported_platform(monkeypatch):
+    monkeypatch.setattr(cd, "_GOOS", "linux")
+    with pytest.raises(RuntimeError, match="macOS"):
+        cd.restore()
+
+
+# ---------------------------------------------------------------------------
+# launch_or_restart()
+# ---------------------------------------------------------------------------
+
+
+def test_launch_or_restart_opens_when_not_running(monkeypatch):
+    monkeypatch.setattr(cd, "_GOOS", "darwin")
+    monkeypatch.setattr(cd, "_is_running", lambda: False)
+    opened = []
+    monkeypatch.setattr(cd, "_open", lambda: opened.append(True))
+    monkeypatch.setattr(
+        cd, "_quit", lambda: pytest.fail("should not quit when not running")
+    )
+
+    cd.launch_or_restart("prompt", yes=True)
+    assert opened == [True]
+
+
+def test_launch_or_restart_with_yes_quits_then_opens(monkeypatch):
+    monkeypatch.setattr(cd, "_GOOS", "darwin")
+    running = [True]
+    monkeypatch.setattr(cd, "_is_running", lambda: running[0])
+
+    def quit_app():
+        running[0] = False
+
+    quit_calls = []
+    open_calls = []
+    monkeypatch.setattr(
+        cd,
+        "_quit",
+        lambda: (quit_calls.append(True), quit_app()),
+    )
+    monkeypatch.setattr(cd, "_open", lambda: open_calls.append(True))
+    monkeypatch.setattr(cd, "_sleep", lambda _: None)
+
+    cd.launch_or_restart("Restart?", yes=True)
+    assert quit_calls == [True]
+    assert open_calls == [True]
diff --git a/cli/test/test_launch_cmd.py b/cli/test/test_launch_cmd.py
new file mode 100644
index 000000000..5e304bd10
--- /dev/null
+++ b/cli/test/test_launch_cmd.py
@@ -0,0 +1,231 @@
+"""Tests for the `planoai launch claude-desktop` click command.
+
+Focused on the wiring between the CLI flags and the underlying
+`claude_desktop` module / `up` invocation. The actual JSON-rewriting and key
+validation are covered in `test_claude_desktop.py`.
+"""
+
+from __future__ import annotations
+
+from click.testing import CliRunner
+
+from planoai import claude_desktop as cd
+from planoai import launch_cmd as lc
+
+
+def _stub_cd(monkeypatch):
+    """Replace ``claude_desktop`` side-effects with no-ops + call recorders."""
+    calls: dict[str, list] = {
+        "configure": [],
+        "restore": [],
+        "launch_or_restart": [],
+    }
+    monkeypatch.setattr(cd, "supported", lambda: None)
+    monkeypatch.setattr(
+        cd,
+        "configure",
+        lambda base_url, **_kw: calls["configure"].append(base_url),
+    )
+    monkeypatch.setattr(cd, "restore", lambda: calls["restore"].append(True))
+    monkeypatch.setattr(
+        cd,
+        "launch_or_restart",
+        lambda prompt, yes: calls["launch_or_restart"].append((prompt, yes)),
+    )
+    return calls
+
+
+def test_config_path_starts_plano_when_not_running(tmp_path, monkeypatch):
+    config = tmp_path / "plano_config.yaml"
+    config.write_text(
+        "version: v0.4.0\n"
+        "listeners:\n"
+        "  - name: llm\n"
+        "    type: model\n"
+        "    port: 12345\n"
+        "    address: 0.0.0.0\n"
+        "model_providers: []\n"
+    )
+
+    cd_calls = _stub_cd(monkeypatch)
+    monkeypatch.setattr(lc, "_is_plano_running", lambda: False)
+
+    up_calls = []
+
+    def fake_up(
+        file,
+        path,
+        foreground,
+        with_tracing,
+        tracing_port,
+        docker,
+        verbose,
+        listener_port,
+    ):
+        up_calls.append(
+            {
+                "file": file,
+                "foreground": foreground,
+                "docker": docker,
+                "listener_port": listener_port,
+            }
+        )
+
+    from planoai.main import up as up_cmd
+
+    monkeypatch.setattr(up_cmd, "callback", fake_up)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        lc.launch,
+        ["claude-desktop", "--config", str(config), "--yes"],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert len(up_calls) == 1
+    assert up_calls[0]["file"] == str(config)
+    assert up_calls[0]["foreground"] is False
+    assert cd_calls["configure"] == ["http://localhost:12345"]
+    # --yes implies we restart Claude Desktop after configuring.
+    assert cd_calls["launch_or_restart"]
+    assert cd_calls["launch_or_restart"][0][1] is True
+
+
+def test_config_path_skips_up_when_plano_already_running(tmp_path, monkeypatch):
+    config = tmp_path / "plano_config.yaml"
+    config.write_text(
+        "version: v0.4.0\n"
+        "listeners:\n"
+        "  - name: llm\n"
+        "    type: model\n"
+        "    port: 12500\n"
+        "model_providers: []\n"
+    )
+
+    cd_calls = _stub_cd(monkeypatch)
+    monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
+
+    sentinel = []
+
+    def boom(*args, **kwargs):
+        sentinel.append("called")
+
+    from planoai.main import up as up_cmd
+
+    monkeypatch.setattr(up_cmd, "callback", boom)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        lc.launch,
+        ["claude-desktop", "--config", str(config), "--no-launch"],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert sentinel == [], "should not invoke up.callback when Plano is already running"
+    assert cd_calls["configure"] == ["http://localhost:12500"]
+    # --no-launch skips the restart step.
+    assert cd_calls["launch_or_restart"] == []
+
+
+def test_config_path_must_exist(tmp_path, monkeypatch):
+    cd_calls = _stub_cd(monkeypatch)
+    monkeypatch.setattr(lc, "_is_plano_running", lambda: False)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        lc.launch,
+        ["claude-desktop", "--config", str(tmp_path / "nope.yaml")],
+    )
+
+    assert result.exit_code != 0
+    assert "not found" in result.output.lower()
+    assert cd_calls["configure"] == []
+
+
+def test_no_launch_skips_open(monkeypatch):
+    cd_calls = _stub_cd(monkeypatch)
+    monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        lc.launch,
+        ["claude-desktop", "--no-launch", "--base-url", "http://localhost:9999"],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert cd_calls["configure"] == ["http://localhost:9999"]
+    assert cd_calls["launch_or_restart"] == []
+
+
+def test_restore_ignores_config_path(tmp_path, monkeypatch):
+    config = tmp_path / "plano_config.yaml"
+    config.write_text("version: v0.4.0\nmodel_providers: []\n")
+
+    cd_calls = _stub_cd(monkeypatch)
+    monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        lc.launch,
+        ["claude-desktop", "--restore", "--config", str(config), "--yes"],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert cd_calls["restore"] == [True]
+    assert cd_calls["configure"] == []
+    assert "ignored" in result.output.lower()
+
+
+def test_base_url_overrides_config_file(tmp_path, monkeypatch):
+    config = tmp_path / "plano_config.yaml"
+    config.write_text(
+        "version: v0.4.0\n"
+        "listeners:\n"
+        "  - name: llm\n"
+        "    type: model\n"
+        "    port: 12345\n"
+        "model_providers: []\n"
+    )
+
+    cd_calls = _stub_cd(monkeypatch)
+    monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        lc.launch,
+        [
+            "claude-desktop",
+            "--config",
+            str(config),
+            "--base-url",
+            "http://10.0.0.5:8080",
+            "--no-launch",
+        ],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert cd_calls["configure"] == ["http://10.0.0.5:8080"]
+
+
+def test_unsupported_platform_errors(monkeypatch):
+    monkeypatch.setattr(
+        cd,
+        "supported",
+        lambda: "Claude Desktop launch is only supported on macOS and Windows",
+    )
+
+    runner = CliRunner()
+    result = runner.invoke(lc.launch, ["claude-desktop"])
+
+    assert result.exit_code != 0
+    assert "macOS" in result.output
+
+
+def test_help_lists_new_flags(monkeypatch):
+    runner = CliRunner()
+    result = runner.invoke(lc.launch, ["claude-desktop", "--help"])
+
+    assert result.exit_code == 0, result.output
+    assert "--config" in result.output
+    assert "--no-launch" in result.output
+    assert "--restore" in result.output
diff --git a/demos/llm_routing/frontier_model_routing/README.md b/demos/llm_routing/frontier_model_routing/README.md
new file mode 100644
index 000000000..c12419a53
--- /dev/null
+++ b/demos/llm_routing/frontier_model_routing/README.md
@@ -0,0 +1,423 @@
+# Frontier Model Routing: Sonnet 4.6 + GPT 5.5 + Opus 4.7
+
+A worked example of using Plano to route across the three current frontier
+LLMs from three different providers — without your application caring which
+model handled any given request, and with **per-route fallbacks** so a
+provider outage never takes the demo down.
+
+| Tier             | Primary model                          | Provider           | What it's great at                                       |
+| ---------------- | -------------------------------------- | ------------------ | -------------------------------------------------------- |
+| `frontier.fast`  | `anthropic-claude-sonnet-4-6`          | DigitalOcean       | Daily driver — chat, summaries, drafts, light reasoning  |
+| `frontier.smart` | `gpt-5.5`                              | OpenAI             | Multi-step reasoning, math, tool/function calling        |
+| `frontier.max`   | `claude-opus-4-7`                      | Anthropic          | Code, deep analysis, long-context evaluation, refactors  |
+
+The same prompt picks the right model automatically — Plano's preference
+aligned router (Plano-Orchestrator) reads the user's intent and dispatches to
+the route whose `routing_preferences` description best matches. Each route
+is backed by an **ordered candidate pool**, so when the primary provider
+returns a `429`/`5xx` the next entry in the pool serves the request.
+
+```
+                        ┌────────────────────────────────────┐
+client ──── /v1 ───▶    │  Plano gateway (port 12000)        │
+(OpenAI / Anthropic /   │   ├── Plano-Orchestrator (router)  │
+ Claude Desktop / SDK)  │   └── Envoy + brightstaff          │
+                        └────────────────────────────────────┘
+                              │              │             │
+                  ┌───────────┘              │             └────────────┐
+                  ▼                          ▼                          ▼
+       DigitalOcean Gradient AI       OpenAI                    Anthropic
+   anthropic-claude-sonnet-4-6      gpt-5.5                  claude-opus-4-7
+   (daily conversation route)   (complex reasoning)      (code + deep analysis)
+```
+
+## Why this layout
+
+- **Cost-quality fit per request.** Casual prompts go to Sonnet 4.6 on
+  DigitalOcean (cheaper inference, still excellent quality); complex
+  reasoning goes to GPT 5.5; code and deep analysis go to Opus 4.7.
+- **Provider diversity = resilience.** Every route lists a fallback model
+  from a different provider — if Anthropic rate-limits Opus, Plano hands
+  the next request in that route to GPT 5.5 with no client changes.
+- **Zero client changes.** The OpenAI SDK, Anthropic SDK, Claude Desktop,
+  Codex CLI, and curl all hit the same `:12000` endpoint and use the same
+  alias names. Switching `frontier.max` from Opus to whatever ships next
+  is a one-line config change.
+
+## The new routing-preferences architecture (v0.4.0)
+
+This demo uses Plano's **top-level `routing_preferences`** block — the
+canonical shape since `v0.4.0`. The older inline form (preferences nested
+under each `model_provider`) is auto-migrated by the Plano CLI but emits a
+deprecation warning. The top-level shape gives each route an ordered
+candidate pool, which is what makes per-route fallbacks possible.
+
+```yaml
+routing_preferences:
+  - name: code generation
+    description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests
+    models:
+      - anthropic/claude-opus-4-7        # primary
+      - openai/gpt-5.5                   # fallback on 429/5xx
+```
+
+What changes vs. the v0.3.0 inline style:
+
+| Capability                                | v0.3.0 inline | v0.4.0 top-level |
+| ----------------------------------------- | :-----------: | :--------------: |
+| Multiple models can serve the same route  |       no      |        yes       |
+| Explicit primary + ranked fallback chain  |       no      |        yes       |
+| Per-request override via request body     |       no      |        yes       |
+| Decision-only endpoint (`/routing/v1/...`)|       no      |        yes       |
+| `X-Model-Affinity` header for agent loops |       no      |        yes       |
+
+## Prerequisites
+
+- **Plano CLI** — `uv tool install planoai` or `pip install planoai`
+- API keys for all three providers:
+
+  | Env var             | Where to get it                                                          |
+  | ------------------- | ------------------------------------------------------------------------ |
+  | `DO_API_KEY`        | <https://cloud.digitalocean.com/account/api/tokens> (Gradient AI access) |
+  | `OPENAI_API_KEY`    | <https://platform.openai.com/api-keys>                                   |
+  | `ANTHROPIC_API_KEY` | <https://console.anthropic.com/>                                         |
+
+## Quick start
+
+```bash
+export DO_API_KEY=...
+export OPENAI_API_KEY=...
+export ANTHROPIC_API_KEY=...
+
+cd demos/llm_routing/frontier_model_routing
+./run_demo.sh
+```
+
+`run_demo.sh` writes a local `.env`, then runs `planoai up config.yaml`.
+Plano daemonizes and is ready when the script returns.
+
+To shut down:
+
+```bash
+./run_demo.sh down
+```
+
+## Try it
+
+### Let Plano pick the right tier
+
+```bash
+./test.sh
+```
+
+The script does two things for each prompt:
+
+1. Calls `POST /routing/v1/chat/completions` — Plano's **decision-only**
+   endpoint — to print the matched route name and the ranked candidate
+   pool for that prompt.
+2. Calls `POST /v1/chat/completions` to actually run the request and
+   prints the model that handled it.
+
+A healthy run resolves like this:
+
+```
+[daily conversation -> expects DigitalOcean Sonnet 4.6]
+  matched route:  daily conversation
+  ranked models:  ["digitalocean/anthropic-claude-sonnet-4-6","openai/gpt-5.5"]
+  routed_to:      digitalocean/anthropic-claude-sonnet-4-6
+
+[complex reasoning -> expects OpenAI GPT 5.5]
+  matched route:  complex reasoning
+  ranked models:  ["openai/gpt-5.5","anthropic/claude-opus-4-7"]
+  routed_to:      openai/gpt-5.5
+
+[code generation -> expects Anthropic Opus 4.7]
+  matched route:  code generation
+  ranked models:  ["anthropic/claude-opus-4-7","openai/gpt-5.5"]
+  routed_to:      anthropic/claude-opus-4-7
+```
+
+The trick: every request is sent with `model: frontier.fast`, but Plano runs
+the orchestrator on every chat completion when `routing_preferences` are
+configured and overrides the `model` when a preference matches. The
+`frontier.fast` value is the explicit fallback used when no preference
+matches — so casual prompts stay on the cheap tier and only "real" reasoning
+or code work escalates to GPT 5.5 or Opus 4.7.
+
+Want to watch the router decide live? In a second terminal:
+
+```bash
+planoai trace
+```
+
+You'll see the orchestrator's route selection for each request, including
+the matched preference, ranked models, and response time.
+
+### Inspect the routing decision without burning a token
+
+The `/routing/v1/...` endpoint returns the routing decision **without
+calling the upstream model**. Useful for previewing classification, building
+a UI, or wiring fallback logic into a custom client.
+
+```bash
+curl -sS -X POST http://localhost:12000/routing/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "frontier.fast",
+    "messages": [{"role":"user","content":"refactor this function to remove the global"}]
+  }' | jq .
+```
+
+```json
+{
+  "models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"],
+  "route": "code generation",
+  "trace_id": "4bf92f3577b34da6a3ce929d0e0e4736",
+  "pinned": false
+}
+```
+
+Use `models[0]` as the primary; retry with `models[1]` on `429` / `5xx`.
+
+### Pin a route across an agent loop with `X-Model-Affinity`
+
+In a tool-using agent loop a single user task may produce a dozen LLM
+calls. Their topics drift (tool selection looks like code, summarising
+results looks like analysis), and the router would otherwise route each
+turn independently — bouncing between providers and invalidating their
+KV caches. Pin the decision once with an arbitrary session id:
+
+```bash
+SID=$(uuidgen)
+
+curl -sS -X POST http://localhost:12000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "X-Model-Affinity: $SID" \
+  -d '{"model":"frontier.fast","messages":[{"role":"user","content":"start a refactor of the auth module"}]}'
+
+# every subsequent call with the same SID skips routing and reuses the
+# cached model decision until the session TTL (10 min by default) expires.
+curl -sS -X POST http://localhost:12000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "X-Model-Affinity: $SID" \
+  -d '{"model":"frontier.fast","messages":[{"role":"user","content":"now write the unit tests"}]}'
+```
+
+TTL and cache size are configurable under `routing:` in `config.yaml`.
+
+### Override the routing policy per-request
+
+Sometimes one caller needs a different policy without redeploying the
+gateway. Send `routing_preferences` inline in the request body — it is
+stripped before forwarding upstream:
+
+```bash
+curl -sS -X POST http://localhost:12000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "frontier.fast",
+    "messages": [{"role":"user","content":"draft me a haiku about Postgres"}],
+    "routing_preferences": [
+      {
+        "name": "creative writing",
+        "description": "poetry, fiction, lyrical or playful prose",
+        "models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"]
+      }
+    ]
+  }' | jq .
+```
+
+### Pin a request to a specific tier (skip routing)
+
+For prompts that don't match any preference description, the requested
+model is what serves the request. Pin to a tier by sending its alias
+directly:
+
+```bash
+# DigitalOcean Sonnet 4.6 — fast and cheap
+curl -sS -X POST http://localhost:12000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model":"frontier.fast","messages":[{"role":"user","content":"hello"}]}' | jq .
+
+# OpenAI GPT 5.5
+curl -sS -X POST http://localhost:12000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model":"frontier.smart","messages":[{"role":"user","content":"hello"}]}' | jq .
+
+# Anthropic Opus 4.7
+curl -sS -X POST http://localhost:12000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq .
+```
+
+### From a Claude-native client (Anthropic Messages API)
+
+Plano translates between OpenAI and Anthropic shapes, so the same gateway
+serves both client SDKs:
+
+```bash
+curl -sS -X POST http://localhost:12000/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -H "x-api-key: test-key" \
+  -d '{
+    "model": "frontier.max",
+    "max_tokens": 512,
+    "messages": [{"role":"user","content":"explain CAP theorem like I have a CS undergrad background"}]
+  }' | jq .
+```
+
+### From Claude Desktop
+
+Once Plano is up, point Claude Desktop at it with one command:
+
+```bash
+planoai launch claude-desktop --config config.yaml
+```
+
+Claude Desktop will switch into third-party gateway mode pointed at
+`http://localhost:12000`, auto-discover the three model aliases via
+`/v1/models`, and let you pick `frontier.fast` / `.smart` / `.max` from the
+in-app model selector. To revert: `planoai launch claude-desktop --restore`.
+
+### From Codex CLI
+
+```bash
+planoai launch codex
+codex --model frontier.smart   # or frontier.fast / frontier.max
+```
+
+### From the Claude Code CLI
+
+```bash
+planoai launch claude-cli
+```
+
+The CLI will use Plano as its Anthropic endpoint; ask it for code-heavy work
+and it'll resolve to Opus 4.7 automatically.
+
+## Config walkthrough
+
+[`config.yaml`](config.yaml) declares each provider once, then declares
+**top-level routing preferences** that reference those providers by their
+full `<provider>/<model>` name. Each route owns an ordered `models` pool —
+primary first, fallbacks next.
+
+```yaml
+model_providers:
+  - model: digitalocean/anthropic-claude-sonnet-4-6
+    access_key: $DO_API_KEY
+    default: true                         # used when no preference matches
+  - model: openai/gpt-5.5
+    access_key: $OPENAI_API_KEY
+  - model: anthropic/claude-opus-4-7
+    access_key: $ANTHROPIC_API_KEY
+
+routing_preferences:
+  - name: code generation
+    description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code
+    models:
+      - anthropic/claude-opus-4-7         # primary
+      - openai/gpt-5.5                    # fallback on 429 / 5xx
+
+  - name: deep analysis
+    description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique
+    models:
+      - anthropic/claude-opus-4-7
+      - openai/gpt-5.5
+
+  - name: complex reasoning
+    description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction
+    models:
+      - openai/gpt-5.5
+      - anthropic/claude-opus-4-7
+
+  - name: daily conversation
+    description: general chat, casual Q&A, summaries, drafting messages, quick rewrites
+    models:
+      - digitalocean/anthropic-claude-sonnet-4-6
+      - openai/gpt-5.5
+
+model_aliases:
+  frontier.fast:  { target: anthropic-claude-sonnet-4-6 }
+  frontier.smart: { target: gpt-5.5 }
+  frontier.max:   { target: claude-opus-4-7 }
+```
+
+A few things to call out:
+
+1. **Preference *descriptions* drive routing accuracy.** They're embedded
+   into the orchestrator's prompt; vague descriptions = vague routing.
+   Following the [LLM Routing best practices](../../../docs/source/guides/llm_router.rst):
+   - keep names specific and non-overlapping,
+   - prefer noun-centric descriptors over imperative phrasing,
+   - always include a generic "domain"-style route — here that's
+     `daily conversation` pinned to the cheapest tier — so unmatched
+     prompts still land somewhere deliberate.
+2. **Ordered `models`** is a candidate pool. `models[0]` is the primary;
+   anything after it is a fallback that the client (or Plano's retry
+   logic) tries on `429`/`5xx`. Mix providers across the pool so a single
+   provider outage doesn't break the route.
+3. **The `default: true` provider** is the safety net for prompts the
+   orchestrator can't confidently classify (e.g. one-word "thanks!").
+4. **Aliases** decouple your callers from provider/model strings. When the
+   next Sonnet ships, change the alias target — every caller picks it up
+   instantly.
+
+## Tracing
+
+`tracing.random_sampling: 100` in the config enables full OTLP tracing. Open
+a second terminal and run:
+
+```bash
+planoai trace
+```
+
+Each routed call shows up with the matched preference, ranked candidate
+pool, selected model, end-to-end latency, and per-stage spans (router
+decision, provider call, streaming chunks).
+
+## Cost framing
+
+A rough mix of 60% conversation, 30% reasoning, 10% deep code work — say
+1,000 prompts/day at 1k input + 500 output tokens each — illustrates why
+this layout pays off. Exact numbers depend on per-provider pricing the day
+you read this; the point is that calling Opus 4.7 for casual chat is wasted
+spend, and falling back to a small model on complex code is wasted output.
+Plano's job is to let each provider do what it's best at, and to fail over
+to the next entry in `models` when the primary throttles.
+
+## Customizing
+
+- **Swap a provider:** change the model string and `access_key`. e.g.
+  point `frontier.smart` at `azure_openai/gpt-5.5` by replacing the OpenAI
+  block with an Azure block, then update the matching entries inside
+  `routing_preferences[].models`.
+- **Add fallbacks:** append more entries to any route's `models` list.
+  The orchestrator returns the full ranked pool, and Plano (or your
+  client) walks it on `429`/`5xx`.
+- **Add a new route:** add another entry under `routing_preferences` with
+  a noun-centric description and its own `models` pool. No code change,
+  no client change — every existing caller benefits immediately.
+- **Per-call policy override:** ship a `routing_preferences` field in the
+  request body to override the config for that one call (see the curl
+  example above).
+- **Self-host the orchestrator:** see
+  [`../preference_based_routing/plano_config_local.yaml`](../preference_based_routing/plano_config_local.yaml)
+  for an Ollama-backed orchestrator. Drop the `overrides.llm_routing_model`
+  block into this config and you're off the hosted Plano-Orchestrator.
+
+## Files
+
+| File                                          | Purpose                                                                |
+| --------------------------------------------- | ---------------------------------------------------------------------- |
+| [`config.yaml`](config.yaml)                  | Plano configuration (top-level routing_preferences + aliases)          |
+| [`run_demo.sh`](run_demo.sh)                  | Bring the demo up/down (`./run_demo.sh [down]`)                        |
+| [`test.sh`](test.sh)                          | Per-prompt routing decision + chat completion across all three routes  |
+| [`test.rest`](test.rest)                      | REST Client snippets for VS Code / IntelliJ                            |
+
+## Stopping
+
+```bash
+./run_demo.sh down   # or: planoai down
+```
diff --git a/demos/llm_routing/frontier_model_routing/config.yaml b/demos/llm_routing/frontier_model_routing/config.yaml
new file mode 100644
index 000000000..249be1ae4
--- /dev/null
+++ b/demos/llm_routing/frontier_model_routing/config.yaml
@@ -0,0 +1,103 @@
+version: v0.4.0
+
+# Frontier-tier model routing across three providers.
+#
+# - DigitalOcean Sonnet 4.6  -> daily driver: balanced quality + cost
+# - OpenAI GPT 5.5            -> multimodal reasoning, tool use, math
+# - Anthropic Opus 4.7        -> top-tier reasoning, long-form analysis, code
+#
+# Plano's preference-aligned router (Plano-Orchestrator) inspects each prompt
+# and dispatches to the model whose top-level `routing_preferences` entry best
+# matches the user's intent. Each route owns an ordered `models` list:
+# `models[0]` is the primary; subsequent entries are fallbacks the client
+# (or Plano's retry logic) can try on `429`/`5xx` errors.
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+# ---------------------------------------------------------------------------
+# Model providers (declared once, referenced by every route below)
+# ---------------------------------------------------------------------------
+# The `digitalocean/`, `openai/`, and `anthropic/` prefixes are recognized
+# natively by Plano — no `base_url` or provider interface override needed.
+model_providers:
+  - model: digitalocean/anthropic-claude-4.6-sonnet
+    access_key: $DO_API_KEY
+    default: true                       # used when no routing preference matches
+
+  - model: digitalocean/openai-gpt-5.5
+    access_key: $DO_API_KEY
+
+  - model: digitalocean/anthropic-claude-opus-4.7
+    access_key: $DO_API_KEY
+
+# ---------------------------------------------------------------------------
+# Routing preferences (v0.4.0 top-level form)
+# ---------------------------------------------------------------------------
+# Best-practice notes (per the LLM Routing guide):
+#   - Names should be specific, non-overlapping, and aligned with the
+#     description so the orchestrator can disambiguate cleanly.
+#   - Descriptions are noun-centric phrases describing *the work*, not
+#     conversational instructions.
+#   - Always include a generic "domain" route so prompts that don't match a
+#     specific action still land on a deliberate model — here that's
+#     "daily conversation" pinned to the cheapest tier.
+#   - `models` is an ordered candidate pool; entry 0 is primary and entries
+#     1..n are fallbacks (clients retry on 429/5xx).
+routing_preferences:
+  - name: code generation
+    description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code
+    models:
+      - digitalocean/anthropic-claude-opus-4.7        # primary: top-tier code quality
+      - digitalocean/openai-gpt-5.5                   # fallback if Opus is rate-limited / down
+
+  - name: deep analysis
+    description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique
+    models:
+      - digitalocean/anthropic-claude-opus-4.7
+      - digitalocean/openai-gpt-5.5
+
+  - name: complex reasoning
+    description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction
+    models:
+      - digitalocean/openai-gpt-5.5                   # primary: strong reasoning + tool use
+      - digitalocean/anthropic-claude-opus-4.7
+
+  - name: daily conversation
+    description: general chat, casual Q&A, summaries, drafting messages, quick rewrites, day-to-day requests where speed and cost matter
+    models:
+      - digitalocean/anthropic-claude-4.6-sonnet
+      - digitalocean/openai-gpt-5.5
+
+# ---------------------------------------------------------------------------
+# Aliases — stable, human-friendly handles for clients
+# ---------------------------------------------------------------------------
+# Clients can pin to a tier without thinking about the underlying provider,
+# and the underlying model can change without breaking callers.
+model_aliases:
+  # Daily driver -> Claude Sonnet 4.6
+  frontier.fast:
+    target: digitalocean/anthropic-claude-4.6-sonnet
+
+  # Reasoning + tool calling -> OpenAI GPT 5.5
+  frontier.smart:
+    target: digitalocean/openai-gpt-5.5
+
+  # Code + deep analysis -> Anthropic Opus 4.7
+  frontier.max:
+    target: digitalocean/anthropic-claude-opus-4.7
+
+# ---------------------------------------------------------------------------
+# Model affinity for agentic loops
+# ---------------------------------------------------------------------------
+# In a tool-using agent loop, successive prompts can look like different
+# routes (tool selection ~ code, reasoning ~ analysis), causing the router
+# to flip between models mid-session. Clients send `X-Model-Affinity: <id>`
+# and Plano caches the routing decision for the session TTL below.
+routing:
+  session_ttl_seconds: 600
+
+tracing:
+  random_sampling: 100
diff --git a/demos/llm_routing/frontier_model_routing/run_demo.sh b/demos/llm_routing/frontier_model_routing/run_demo.sh
new file mode 100755
index 000000000..244f5fefe
--- /dev/null
+++ b/demos/llm_routing/frontier_model_routing/run_demo.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+set -e
+
+# ---------------------------------------------------------------------------
+# Frontier model routing demo: DigitalOcean Sonnet 4.6 + GPT 5.5 + Opus 4.7
+# ---------------------------------------------------------------------------
+
+start_demo() {
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    missing=()
+    [ -z "$DO_API_KEY" ]         && missing+=("DO_API_KEY")
+    [ -z "$OPENAI_API_KEY" ]     && missing+=("OPENAI_API_KEY")
+    [ -z "$ANTHROPIC_API_KEY" ]  && missing+=("ANTHROPIC_API_KEY")
+
+    if [ ${#missing[@]} -ne 0 ]; then
+      echo "Error: the following environment variables are not set:"
+      for key in "${missing[@]}"; do echo "  - $key"; done
+      echo
+      echo "Set them in your shell, then re-run this script. Example:"
+      echo "  export DO_API_KEY=...        # from https://cloud.digitalocean.com/account/api/tokens"
+      echo "  export OPENAI_API_KEY=...    # from https://platform.openai.com/api-keys"
+      echo "  export ANTHROPIC_API_KEY=... # from https://console.anthropic.com/"
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    {
+      echo "DO_API_KEY=$DO_API_KEY"
+      echo "OPENAI_API_KEY=$OPENAI_API_KEY"
+      echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
+    } > .env
+    echo ".env file created."
+  fi
+
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  cat <<'EOF'
+
+Plano is up. Try the demo with:
+  ./test.sh           # runs three sample prompts and shows which model handled each
+  planoai trace        # live router decisions in a separate terminal
+
+Or call any model directly using its alias:
+  curl -sS -X POST http://localhost:12000/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq .
+
+EOF
+}
+
+stop_demo() {
+  echo "Stopping Plano..."
+  planoai down
+}
+
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
diff --git a/demos/llm_routing/frontier_model_routing/test.rest b/demos/llm_routing/frontier_model_routing/test.rest
new file mode 100644
index 000000000..5866198ae
--- /dev/null
+++ b/demos/llm_routing/frontier_model_routing/test.rest
@@ -0,0 +1,212 @@
+### Frontier model routing — REST Client / VS Code REST snippets
+###
+### Plano runs the preference-aligned orchestrator on every chat request
+### when top-level `routing_preferences` are configured. The `model` field
+### in the body is the *fallback* if no preference matches; pinning it to
+### `frontier.fast` gives a cheap default. Each route owns an ordered
+### `models` pool — primary first, fallbacks next — that the client (or
+### Plano's retry logic) walks on 429/5xx.
+
+@endpoint = http://localhost:12000
+
+### -------------------------------------------------------------------------
+### 1. Decision-only endpoint: see what the router would pick (no upstream call)
+### Returns: { "models": [...ranked pool...], "route": "...", "trace_id": "..." }
+### -------------------------------------------------------------------------
+POST {{endpoint}}/routing/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.fast",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Refactor this Rust function to remove the global mutable state and add unit tests."
+    }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 2. Routed by intent: daily conversation -> DigitalOcean Sonnet 4.6
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 256,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hey! Give me three fun facts about octopuses I can drop into a dinner conversation."
+    }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 3. Routed by intent: complex reasoning -> OpenAI GPT 5.5
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 512,
+  "messages": [
+    {
+      "role": "user",
+      "content": "A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet."
+    }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 4. Routed by intent: code generation -> Anthropic Opus 4.7
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 800,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Write a Rust function that takes a Vec<u8> of UTF-8 bytes and returns a HashMap<char, usize> with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully."
+    }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 5. Routed by intent: deep analysis -> Anthropic Opus 4.7
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 600,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);"
+    }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 6. Per-request routing override (config-defined preferences are bypassed
+###    for this single call). The `routing_preferences` field is stripped
+###    before the upstream provider sees the body.
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 256,
+  "messages": [
+    { "role": "user", "content": "Draft me a haiku about Postgres replication slots." }
+  ],
+  "routing_preferences": [
+    {
+      "name": "creative writing",
+      "description": "poetry, fiction, lyrical or playful prose",
+      "models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"]
+    }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 7. Pin a routing decision across an agentic loop with X-Model-Affinity.
+###    The first call routes normally and caches the decision. Subsequent
+###    calls with the same id reuse the cached model until the session TTL
+###    (default 10 min) expires.
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+X-Model-Affinity: agent-session-7f3e
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 256,
+  "messages": [
+    { "role": "user", "content": "Plan a small refactor of an auth module — order of operations?" }
+  ]
+}
+
+### Same affinity id — reuses cached routing decision (no re-classification)
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+X-Model-Affinity: agent-session-7f3e
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 256,
+  "messages": [
+    { "role": "user", "content": "Now write the unit tests for step one." }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 8. Pin to DigitalOcean Sonnet 4.6 via alias (skip routing entirely)
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 128,
+  "messages": [
+    { "role": "user", "content": "One sentence: who painted the ceiling of the Sistine Chapel?" }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 9. Pin to OpenAI GPT 5.5 via alias
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.smart",
+  "max_tokens": 256,
+  "messages": [
+    { "role": "user", "content": "Outline a 30/60/90 day plan for a new platform engineering hire." }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 10. Pin to Anthropic Opus 4.7 via alias
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "frontier.max",
+  "max_tokens": 600,
+  "messages": [
+    { "role": "user", "content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);" }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 11. Anthropic Messages API (Claude-native client) -> routed by intent
+### -------------------------------------------------------------------------
+POST {{endpoint}}/v1/messages HTTP/1.1
+Content-Type: application/json
+anthropic-version: 2023-06-01
+x-api-key: test-key
+
+{
+  "model": "frontier.fast",
+  "max_tokens": 256,
+  "messages": [
+    { "role": "user", "content": "Recommend a senior engineering reading list with three picks and one sentence each." }
+  ]
+}
+
+### -------------------------------------------------------------------------
+### 12. Inspect available models (auto-discovered for Claude Desktop / clients)
+### -------------------------------------------------------------------------
+GET {{endpoint}}/v1/models HTTP/1.1
diff --git a/demos/llm_routing/frontier_model_routing/test.sh b/demos/llm_routing/frontier_model_routing/test.sh
new file mode 100755
index 000000000..9bcbe8511
--- /dev/null
+++ b/demos/llm_routing/frontier_model_routing/test.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# ---------------------------------------------------------------------------
+# Frontier Model Routing demo — driver script
+#
+# For each of three intent-biased prompts we:
+#   1. Hit POST /routing/v1/chat/completions (Plano's decision-only endpoint)
+#      to print the matched route name and the ranked candidate pool.
+#   2. Hit POST /v1/chat/completions to actually run the request and print
+#      the model that handled it.
+#
+# Plano runs the orchestrator on every chat completion when top-level
+# `routing_preferences` are configured. The `model` field in the request is
+# the *fallback* used when no preference matches — we pin it to
+# `frontier.fast` so unmatched prompts land on the cheapest tier.
+# ---------------------------------------------------------------------------
+
+set -e
+
+GATEWAY=${GATEWAY:-http://localhost:12000}
+DECISION_ENDPOINT="$GATEWAY/routing/v1/chat/completions"
+CHAT_ENDPOINT="$GATEWAY/v1/chat/completions"
+
+ask() {
+  local label="$1"
+  local prompt="$2"
+
+  local body
+  body="$(jq -n --arg p "$prompt" '{
+    "model": "frontier.fast",
+    "max_tokens": 256,
+    "messages": [{"role":"user","content":$p}]
+  }')"
+
+  echo
+  echo "=========================================================="
+  echo "[$label]"
+  echo "prompt: $prompt"
+  echo "----------------------------------------------------------"
+
+  # Step 1: decision-only — what would the router pick?
+  echo "  routing decision:"
+  curl -sS -X POST "$DECISION_ENDPOINT" \
+    -H "Content-Type: application/json" \
+    -d "$body" \
+    | jq '{
+        matched_route: .route,
+        ranked_models: .models,
+        pinned: .pinned
+      }' \
+    | sed 's/^/    /'
+
+  # Step 2: actually run the request through the chosen model.
+  echo "  chat completion:"
+  curl -sS -X POST "$CHAT_ENDPOINT" \
+    -H "Content-Type: application/json" \
+    -d "$body" \
+    | jq '{
+        routed_to: .model,
+        reply: .choices[0].message.content
+      }' \
+    | sed 's/^/    /'
+}
+
+ask "daily conversation -> expects DigitalOcean Sonnet 4.6" \
+  "Hey! Give me three fun facts about octopuses I can drop into a dinner conversation."
+
+ask "complex reasoning -> expects OpenAI GPT 5.5" \
+  "A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet."
+
+ask "code generation -> expects Anthropic Opus 4.7" \
+  "Write a Rust function that takes a Vec<u8> of UTF-8 bytes and returns a HashMap<char, usize> with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully."
+
+ask "deep analysis -> expects Anthropic Opus 4.7" \
+  "Review this Postgres schema for normalization, indexing, and migration risk. Give me a prioritized list of issues:
+CREATE TABLE orders (
+  id SERIAL PRIMARY KEY,
+  customer_email TEXT,
+  customer_name TEXT,
+  items_json JSONB,
+  total NUMERIC,
+  created_at TIMESTAMPTZ DEFAULT now()
+);"
+
+# ---------------------------------------------------------------------------
+# Bonus: pin a routing decision across an agentic loop with X-Model-Affinity.
+# Both calls hit the same gateway with the same affinity id, so the second
+# call reuses the first call's routing decision instead of reclassifying.
+# ---------------------------------------------------------------------------
+echo
+echo "=========================================================="
+echo "[bonus: model affinity across two turns of an agent loop]"
+echo "----------------------------------------------------------"
+
+SID="demo-$(date +%s)-$RANDOM"
+echo "  X-Model-Affinity: $SID"
+
+turn() {
+  local turn_label="$1"
+  local prompt="$2"
+  echo "  $turn_label:"
+  curl -sS -X POST "$CHAT_ENDPOINT" \
+    -H "Content-Type: application/json" \
+    -H "X-Model-Affinity: $SID" \
+    -d "$(jq -n --arg p "$prompt" '{
+      "model": "frontier.fast",
+      "max_tokens": 128,
+      "messages": [{"role":"user","content":$p}]
+    }')" \
+    | jq '{ routed_to: .model }' \
+    | sed 's/^/    /'
+}
+
+turn "turn 1 (sets affinity)"  "Plan a small refactor of an auth module — what's the order of operations?"
+turn "turn 2 (reuses decision)" "Now write the unit tests for step one."
+
+echo
+echo "=========================================================="
+echo "Done. Want to inspect routing decisions live? Run:  planoai trace"
+echo "=========================================================="