From 151d3a83c5f101003fab1c9fe12c3c659c126297 Mon Sep 17 00:00:00 2001 From: Spherrrical Date: Wed, 6 May 2026 10:58:41 -0700 Subject: [PATCH] feat(cli): add planoai launch group + claude-desktop integration --- cli/planoai/claude_desktop.py | 625 ++++++++++++++++++ cli/planoai/launch_cmd.py | 331 ++++++++++ cli/planoai/main.py | 77 +-- cli/planoai/rich_click_config.py | 26 +- cli/test/test_claude_desktop.py | 366 ++++++++++ cli/test/test_launch_cmd.py | 231 +++++++ .../frontier_model_routing/README.md | 423 ++++++++++++ .../frontier_model_routing/config.yaml | 103 +++ .../frontier_model_routing/run_demo.sh | 63 ++ .../frontier_model_routing/test.rest | 212 ++++++ .../frontier_model_routing/test.sh | 119 ++++ 11 files changed, 2500 insertions(+), 76 deletions(-) create mode 100644 cli/planoai/claude_desktop.py create mode 100644 cli/planoai/launch_cmd.py create mode 100644 cli/test/test_claude_desktop.py create mode 100644 cli/test/test_launch_cmd.py create mode 100644 demos/llm_routing/frontier_model_routing/README.md create mode 100644 demos/llm_routing/frontier_model_routing/config.yaml create mode 100755 demos/llm_routing/frontier_model_routing/run_demo.sh create mode 100644 demos/llm_routing/frontier_model_routing/test.rest create mode 100755 demos/llm_routing/frontier_model_routing/test.sh diff --git a/cli/planoai/claude_desktop.py b/cli/planoai/claude_desktop.py new file mode 100644 index 000000000..29783f3db --- /dev/null +++ b/cli/planoai/claude_desktop.py @@ -0,0 +1,625 @@ +"""Configure Claude Desktop to use the local Plano gateway. + +Python port of Ollama's `cmd/launch/claude_desktop.go` tailored for Plano. The +flow is intentionally simpler than Ollama's: + +1. Detect Claude Desktop on macOS / Windows. +2. Pick a string to put in Claude's ``inferenceGatewayApiKey`` slot (Claude + Desktop requires the field; Plano's local gateway does not enforce bearer + auth, so a placeholder is fine — see ``_resolve_api_key`` for precedence). +3. Rewrite Claude Desktop config JSON files with ``.bak`` backups to switch + Claude into 3rd-party gateway mode pointed at Plano. +4. Optionally restart Claude Desktop so the changes take effect immediately. + +Restoring flips ``deploymentMode`` back to ``1p`` and removes the Plano gateway +profile + meta entry. + +The Claude Desktop ``deploymentMode = "3p"`` profile structure used here is +defined by Anthropic / observed via the Ollama integration; we do not control +it. We re-use the same JSON shape so Claude Desktop happily accepts the Plano +profile alongside any other third-party profile the user may have. +""" + +from __future__ import annotations + +import glob as _glob +import json +import os +import shutil +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass, field +from typing import Callable, Optional + +from planoai.utils import getLogger + +log = getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +INTEGRATION_NAME = "claude-desktop" +PROFILE_NAME = "Plano" +# Deterministic UUID-v4 distinct from Ollama's `…0114`. The trailing bytes +# spell "PlanO" in ASCII to make it easy to identify the profile in +# `_meta.json`. +PROFILE_ID = "00000000-0000-4000-8000-0000506C616E" +DEFAULT_BASE_URL = "http://localhost:12000" +SUCCESS_MESSAGE = "Claude Desktop profile changed to Plano." +RESTORE_HINT = ( + "To restore the usual Claude profile, run: " + "planoai launch claude-desktop --restore" +) +RESTORED_MESSAGE = "Claude Desktop restored to the usual Claude profile." + +# Placeholder Claude Desktop writes into the gateway profile when the user +# hasn't overridden it. Plano's local gateway does not enforce a bearer +# token; this string only exists so Claude Desktop has a non-empty value to +# attach to outbound requests. +DEFAULT_API_KEY = "plano" + +# How long we wait for Claude Desktop to fully exit on restart. +_QUIT_TIMEOUT_SECONDS = 30 + + +# --------------------------------------------------------------------------- +# Test seams: replace these in tests instead of monkey-patching os/subprocess. +# --------------------------------------------------------------------------- + + +# Platform identifier. ``"darwin"``, ``"windows"``, or anything else (which +# is treated as unsupported). Module-level so tests can override it. +def _detect_goos() -> str: + if os.name == "nt": + return "windows" + if sys.platform == "darwin": + return "darwin" + return sys.platform + + +_GOOS: str = _detect_goos() + +_user_home: Callable[[], str] = os.path.expanduser # called as _user_home("~") + + +def _is_running() -> bool: + """Return True if Claude Desktop is currently running.""" + if _GOOS == "darwin": + try: + out = subprocess.run( + ["pgrep", "-f", "Claude.app/Contents/MacOS/Claude"], + capture_output=True, + text=True, + check=False, + ) + return out.returncode == 0 and out.stdout.strip() != "" + except FileNotFoundError: + return False + if _GOOS == "windows": + script = ( + "(Get-Process claude -ErrorAction SilentlyContinue " + "| Where-Object { $_.MainWindowHandle -ne 0 } " + "| Select-Object -First 1).Id" + ) + try: + out = subprocess.run( + ["powershell.exe", "-NoProfile", "-Command", script], + capture_output=True, + text=True, + check=False, + ) + return out.returncode == 0 and out.stdout.strip() != "" + except FileNotFoundError: + return False + return False + + +def _quit() -> None: + """Ask Claude Desktop to quit gracefully.""" + if _GOOS == "darwin": + subprocess.run( + ["osascript", "-e", 'tell application "Claude" to quit'], + check=False, + ) + return + if _GOOS == "windows": + script = ( + "Get-Process claude -ErrorAction SilentlyContinue " + "| Where-Object { $_.MainWindowHandle -ne 0 } " + "| ForEach-Object { [void]$_.CloseMainWindow() }" + ) + subprocess.run( + ["powershell.exe", "-NoProfile", "-Command", script], + check=False, + ) + + +def _open() -> None: + """Launch Claude Desktop.""" + if _GOOS == "darwin": + subprocess.run(["open", "-a", "Claude"], check=False) + return + if _GOOS == "windows": + path = _claude_app_path() + if not path: + raise RuntimeError( + "Claude Desktop executable was not found; open Claude Desktop " + "manually once and re-run 'planoai launch claude-desktop'" + ) + ps_path = "'" + path.replace("'", "''") + "'" + subprocess.run( + [ + "powershell.exe", + "-NoProfile", + "-Command", + f"Start-Process -FilePath {ps_path}", + ], + check=False, + ) + + +def _sleep(seconds: float) -> None: + time.sleep(seconds) + + +# --------------------------------------------------------------------------- +# Path discovery +# --------------------------------------------------------------------------- + + +@dataclass +class _ThirdPartyPaths: + desktop_config: str + meta: str + profile: str + + +@dataclass +class _Targets: + normal_configs: list[str] = field(default_factory=list) + third_party_profiles: list[_ThirdPartyPaths] = field(default_factory=list) + + +def supported() -> Optional[str]: + """Return ``None`` if the platform is supported, else an error message.""" + if _GOOS in ("darwin", "windows"): + return None + return "Claude Desktop launch is only supported on macOS and Windows" + + +def _home() -> str: + home = _user_home("~") + if home == "~" or not home: + raise RuntimeError("could not resolve user home directory") + return home + + +def _local_app_data() -> str: + val = (os.environ.get("LOCALAPPDATA") or "").strip() + if val: + return val + user = (os.environ.get("USERPROFILE") or "").strip() + if user: + return os.path.join(user, "AppData", "Local") + return os.path.join(_home(), "AppData", "Local") + + +def _darwin_profile_roots() -> tuple[list[str], list[str]]: + base = os.path.join(_home(), "Library", "Application Support") + return ([os.path.join(base, "Claude")], [os.path.join(base, "Claude-3p")]) + + +def _windows_profile_roots() -> tuple[list[str], list[str]]: + local = _local_app_data() + normal = [ + os.path.join(local, "Claude"), + os.path.join(local, "Claude Nest"), + ] + third_party = [ + os.path.join(local, "Claude-3p"), + os.path.join(local, "Claude Nest-3p"), + ] + return normal, third_party + + +def _dedupe_paths(paths: list[str]) -> list[str]: + out: list[str] = [] + seen: set[str] = set() + for path in paths: + if not path or not path.strip(): + continue + key = path.lower() + if key in seen: + continue + seen.add(key) + out.append(path) + return out + + +def _target_paths() -> _Targets: + err = supported() + if err is not None: + raise RuntimeError(err) + + if _GOOS == "darwin": + normal, third = _darwin_profile_roots() + else: + normal, third = _windows_profile_roots() + + targets = _Targets() + for root in _dedupe_paths(normal): + targets.normal_configs.append(os.path.join(root, "claude_desktop_config.json")) + for root in _dedupe_paths(third): + targets.third_party_profiles.append( + _ThirdPartyPaths( + desktop_config=os.path.join(root, "claude_desktop_config.json"), + meta=os.path.join(root, "configLibrary", "_meta.json"), + profile=os.path.join(root, "configLibrary", f"{PROFILE_ID}.json"), + ) + ) + return targets + + +def _claude_app_path() -> str: + """Return path to the Claude Desktop executable, or ``""`` if unknown.""" + if _GOOS == "darwin": + candidates = ["/Applications/Claude.app"] + candidates.append(os.path.join(_home(), "Applications", "Claude.app")) + for path in candidates: + if os.path.exists(path): + return path + return "" + if _GOOS == "windows": + local = _local_app_data() + candidates = [ + os.path.join(local, "Programs", "Claude", "Claude.exe"), + os.path.join(local, "Programs", "Claude Desktop", "Claude.exe"), + os.path.join(local, "Claude", "Claude.exe"), + os.path.join(local, "Claude Nest", "Claude.exe"), + os.path.join(local, "Claude Desktop", "Claude.exe"), + os.path.join(local, "AnthropicClaude", "Claude.exe"), + ] + for pattern in ( + os.path.join(local, "AnthropicClaude", "app-*", "Claude.exe"), + os.path.join(local, "Programs", "Claude", "app-*", "Claude.exe"), + os.path.join(local, "Programs", "Claude Desktop", "app-*", "Claude.exe"), + ): + candidates.extend(_glob.glob(pattern)) + for path in _dedupe_paths(candidates): + if os.path.exists(path): + return path + return "" + return "" + + +def is_installed() -> bool: + """Best-effort check: app binary or any profile dir is present.""" + if _claude_app_path(): + return True + if _GOOS == "windows" and _is_running(): + return True + if _GOOS == "darwin": + normal, third = _darwin_profile_roots() + elif _GOOS == "windows": + normal, third = _windows_profile_roots() + else: + return False + for path in normal + third: + if os.path.isdir(path): + return True + return False + + +# --------------------------------------------------------------------------- +# JSON IO with atomic write + .bak backup +# --------------------------------------------------------------------------- + + +def _read_json(path: str) -> dict: + with open(path, "r", encoding="utf-8") as f: + data = f.read() + if not data.strip(): + return {} + parsed = json.loads(data) + return parsed if isinstance(parsed, dict) else {} + + +def _read_json_allow_missing(path: str) -> dict: + try: + return _read_json(path) + except FileNotFoundError: + return {} + + +def _atomic_write_with_backup(path: str, payload: bytes) -> None: + """Write ``payload`` to ``path`` atomically, keeping a ``.bak`` copy.""" + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) + if os.path.exists(path): + try: + shutil.copy2(path, path + ".bak") + except OSError as e: + log.debug("could not write backup for %s: %s", path, e) + + fd, tmp_path = tempfile.mkstemp(prefix=".plano_", suffix=".tmp", dir=parent or None) + try: + with os.fdopen(fd, "wb") as f: + f.write(payload) + os.replace(tmp_path, path) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + +def _write_json(path: str, value: dict) -> None: + payload = (json.dumps(value, indent=2) + "\n").encode("utf-8") + _atomic_write_with_backup(path, payload) + + +# --------------------------------------------------------------------------- +# JSON shape mutators (1:1 with Ollama) +# --------------------------------------------------------------------------- + + +def _write_deployment_mode(path: str, mode: str) -> None: + cfg = _read_json_allow_missing(path) + cfg["deploymentMode"] = mode + _write_json(path, cfg) + + +def _write_meta(path: str, profile_id: str, name: str) -> None: + meta = _read_json_allow_missing(path) + meta["appliedId"] = profile_id + + raw_entries = meta.get("entries") + entries: list = [] + if isinstance(raw_entries, list): + for entry in raw_entries: + if isinstance(entry, dict) and entry.get("id") == profile_id: + continue + entries.append(entry) + entries.append({"id": profile_id, "name": name}) + meta["entries"] = entries + _write_json(path, meta) + + +def _write_gateway_profile( + path: str, api_key: str, base_url: str, force_chooser: bool +) -> None: + cfg = _read_json_allow_missing(path) + cfg["inferenceProvider"] = "gateway" + cfg["inferenceGatewayBaseUrl"] = base_url + cfg["inferenceGatewayApiKey"] = api_key + cfg["inferenceGatewayAuthScheme"] = "bearer" + cfg.pop("inferenceModels", None) + cfg["disableDeploymentModeChooser"] = force_chooser + _write_json(path, cfg) + + +def _restore_meta(path: str) -> None: + meta = _read_json_allow_missing(path) + if not meta: + return + changed = False + if meta.get("appliedId") == PROFILE_ID: + meta.pop("appliedId", None) + changed = True + + raw_entries = meta.get("entries") + if isinstance(raw_entries, list): + filtered: list = [] + for entry in raw_entries: + if isinstance(entry, dict) and entry.get("id") == PROFILE_ID: + changed = True + continue + filtered.append(entry) + meta["entries"] = filtered + + if changed: + _write_json(path, meta) + + +def _restore_profile(path: str) -> None: + cfg = _read_json_allow_missing(path) + if not cfg: + return + cfg["disableDeploymentModeChooser"] = False + for key in ( + "inferenceProvider", + "inferenceGatewayBaseUrl", + "inferenceGatewayAuthScheme", + "inferenceModels", + ): + cfg.pop(key, None) + _write_json(path, cfg) + + +def _read_applied_id(path: str) -> str: + try: + meta = _read_json(path) + except (FileNotFoundError, json.JSONDecodeError): + return "" + val = meta.get("appliedId") + return val if isinstance(val, str) else "" + + +def _read_deployment_mode(path: str) -> str: + try: + cfg = _read_json(path) + except (FileNotFoundError, json.JSONDecodeError): + return "" + val = cfg.get("deploymentMode") + return val if isinstance(val, str) else "" + + +def _read_gateway_api_key(path: str) -> str: + try: + cfg = _read_json(path) + except (FileNotFoundError, json.JSONDecodeError): + return "" + val = cfg.get("inferenceGatewayApiKey") + return val.strip() if isinstance(val, str) else "" + + +def _third_party_profile_ok(t: _ThirdPartyPaths) -> bool: + if _read_applied_id(t.meta) != PROFILE_ID: + return False + try: + cfg = _read_json(t.profile) + except (FileNotFoundError, json.JSONDecodeError): + return False + if cfg.get("inferenceProvider") != "gateway": + return False + base_url = cfg.get("inferenceGatewayBaseUrl") + if not isinstance(base_url, str) or not base_url.strip(): + return False + api_key = cfg.get("inferenceGatewayApiKey") + if not isinstance(api_key, str) or not api_key.strip(): + return False + return True + + +def is_configured() -> bool: + try: + targets = _target_paths() + except RuntimeError: + return False + if not targets.normal_configs or not targets.third_party_profiles: + return False + for path in targets.normal_configs: + if _read_deployment_mode(path) != "3p": + return False + for t in targets.third_party_profiles: + if _read_deployment_mode(t.desktop_config) != "3p": + return False + if not _third_party_profile_ok(t): + return False + return True + + +# --------------------------------------------------------------------------- +# API key resolution +# --------------------------------------------------------------------------- +# +# Plano's local gateway does not enforce bearer auth — there's no such thing +# as a "Plano API key". Claude Desktop's third-party profile schema, however, +# requires ``inferenceGatewayApiKey`` to be a non-empty string before it will +# treat the profile as configured. We therefore pick *some* string to write +# into that slot, with the following precedence so users running Plano behind +# their own auth proxy can opt-in: +# +# 1. ``$PLANO_API_KEY`` — explicit override (e.g. an internal auth token). +# 2. The existing ``inferenceGatewayApiKey`` already in Claude's 3p profile, +# so re-running ``planoai launch claude-desktop`` does not clobber a +# value the user manually set. +# 3. The fixed placeholder ``DEFAULT_API_KEY`` ("plano"). +# +# We do not validate this string against the gateway. The gateway's +# reachability is already surfaced by ``launch_cmd._is_plano_running()`` +# before this module is invoked. + + +def _resolve_api_key(profile_paths: list[str]) -> str: + env_key = (os.environ.get("PLANO_API_KEY") or "").strip() + if env_key: + return env_key + + for path in profile_paths: + existing = _read_gateway_api_key(path) + if existing: + return existing + + return DEFAULT_API_KEY + + +# --------------------------------------------------------------------------- +# Public configure / restore / launch +# --------------------------------------------------------------------------- + + +def configure(base_url: str = DEFAULT_BASE_URL, *, force_chooser: bool = True) -> None: + """Switch Claude Desktop into 3p mode pointed at the local Plano gateway.""" + err = supported() + if err is not None: + raise RuntimeError(err) + + targets = _target_paths() + profile_paths = [t.profile for t in targets.third_party_profiles] + api_key = _resolve_api_key(profile_paths) + + for path in targets.normal_configs: + _write_deployment_mode(path, "3p") + for t in targets.third_party_profiles: + _write_deployment_mode(t.desktop_config, "3p") + _write_meta(t.meta, PROFILE_ID, PROFILE_NAME) + _write_gateway_profile(t.profile, api_key, base_url, force_chooser) + + +def restore() -> None: + """Flip Claude Desktop back to the default Anthropic profile.""" + err = supported() + if err is not None: + raise RuntimeError(err) + + targets = _target_paths() + for path in targets.normal_configs: + _write_deployment_mode(path, "1p") + for t in targets.third_party_profiles: + _write_deployment_mode(t.desktop_config, "1p") + _restore_meta(t.meta) + _restore_profile(t.profile) + + +def _can_prompt() -> bool: + return sys.stdin.isatty() and sys.stderr.isatty() + + +def _confirm(prompt: str, yes: bool) -> bool: + if yes: + return True + if not _can_prompt(): + return False + try: + answer = input(f"{prompt} [Y/n] ").strip().lower() + except (EOFError, KeyboardInterrupt): + sys.stderr.write("\n") + return False + return answer in ("", "y", "yes") + + +def launch_or_restart(prompt: str, yes: bool) -> None: + """Open Claude Desktop, restarting it first if it is already running.""" + err = supported() + if err is not None: + raise RuntimeError(err) + + if not _is_running(): + _open() + return + + if not _confirm(prompt, yes): + sys.stderr.write( + "Quit and reopen Claude Desktop when you're ready for the " + "profile change to take effect.\n" + ) + return + + _quit() + deadline = time.time() + _QUIT_TIMEOUT_SECONDS + while time.time() < deadline: + if not _is_running(): + break + _sleep(0.2) + else: + raise RuntimeError( + "Claude Desktop did not quit; quit it manually and re-run " "the command" + ) + _open() diff --git a/cli/planoai/launch_cmd.py b/cli/planoai/launch_cmd.py new file mode 100644 index 000000000..6fc242502 --- /dev/null +++ b/cli/planoai/launch_cmd.py @@ -0,0 +1,331 @@ +"""``planoai launch`` command group. + +Launches CLI agents (Claude Code, Codex) or the Claude Desktop app against the +local Plano gateway. This replaces the old ``planoai cli-agent`` command. +""" + +from __future__ import annotations + +import json +import os +import sys +from typing import Optional + +import rich_click as click +import yaml + +from planoai import claude_desktop as _cd +from planoai.consts import NATIVE_PID_FILE, PLANO_DOCKER_NAME +from planoai.core import _resolve_cli_agent_endpoint, start_cli_agent +from planoai.docker_cli import docker_container_status +from planoai.defaults import DEFAULT_LLM_LISTENER_PORT +from planoai.utils import find_config_file, getLogger + +log = getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _is_native_plano_running() -> bool: + if not os.path.exists(NATIVE_PID_FILE): + return False + try: + with open(NATIVE_PID_FILE, "r") as f: + pids = json.load(f) + except (OSError, json.JSONDecodeError): + return False + + envoy_pid = pids.get("envoy_pid") + brightstaff_pid = pids.get("brightstaff_pid") + if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int): + return False + + for pid in (envoy_pid, brightstaff_pid): + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + continue + return True + + +def _is_plano_running() -> bool: + if _is_native_plano_running(): + return True + return docker_container_status(PLANO_DOCKER_NAME) == "running" + + +def _require_plano_running(console) -> None: + if _is_plano_running(): + return + console.print("[red]✗[/red] Plano is not running.") + console.print( + "[dim]Start Plano first using 'planoai up ' " + "(native or --docker mode).[/dim]" + ) + sys.exit(1) + + +def _start_plano_with_config(config_path: str, console) -> None: + """Invoke `planoai up` against the given config and wait for it to be healthy. + + Reuses the click ``up`` command's callback so we get the same validation, + env loading, and native runner behavior as a top-level invocation. ``up`` + runs in detached/background mode by default and only returns once Plano is + healthy, so we can safely continue with the Claude Desktop config flow + after it returns. + """ + # Lazy import: ``planoai.main`` pulls in heavy modules (rich, native runner, + # etc.) and would create a circular import at module-load time. + from planoai.main import up + + abs_path = os.path.abspath(config_path) + if not os.path.exists(abs_path): + console.print(f"[red]✗[/red] Config file not found: {abs_path}") + sys.exit(1) + + console.print( + f"[dim]Starting Plano with config " f"[cyan]{abs_path}[/cyan]...[/dim]" + ) + up.callback( + file=abs_path, + path=".", + foreground=False, + with_tracing=False, + tracing_port=4317, + docker=False, + verbose=False, + listener_port=DEFAULT_LLM_LISTENER_PORT, + ) + + +def _base_url_from_config_file(config_path: str) -> Optional[str]: + try: + with open(config_path, "r") as f: + cfg = yaml.safe_load(f) or {} + except (OSError, yaml.YAMLError): + return None + _host, port = _resolve_cli_agent_endpoint(cfg) + return f"http://localhost:{port}" + + +def _resolve_plano_config(file: Optional[str], path: str, console) -> str: + plano_config_file = find_config_file(path, file) + if not os.path.exists(plano_config_file): + console.print(f"[red]✗[/red] Config file not found: {plano_config_file}") + sys.exit(1) + return plano_config_file + + +def _run_cli_agent(agent_type: str, file, path, settings) -> None: + from rich.console import Console + + console = Console() + _require_plano_running(console) + plano_config_file = _resolve_plano_config(file, path, console) + try: + start_cli_agent(plano_config_file, agent_type, settings) + except SystemExit: + raise + except Exception as e: + click.echo(f"Error: {e}") + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Group + subcommands +# --------------------------------------------------------------------------- + + +@click.group() +def launch(): + """Launch a CLI agent or desktop app against the local Plano gateway.""" + + +@launch.command("claude-cli") +@click.argument("file", required=False) +@click.option( + "--path", default=".", help="Path to the directory containing plano_config.yaml" +) +@click.option( + "--settings", + default="{}", + help="Additional settings as JSON string for the CLI agent.", +) +def claude_cli(file, path, settings): + """Launch the Claude Code CLI connected to Plano.""" + _run_cli_agent("claude", file, path, settings) + + +@launch.command("codex") +@click.argument("file", required=False) +@click.option( + "--path", default=".", help="Path to the directory containing plano_config.yaml" +) +@click.option( + "--settings", + default="{}", + help="Additional settings as JSON string for the CLI agent.", +) +def codex(file, path, settings): + """Launch the Codex CLI connected to Plano.""" + _run_cli_agent("codex", file, path, settings) + + +@launch.command("claude-desktop") +@click.option( + "--config", + "config_path", + type=click.Path(dir_okay=False), + default=None, + help="Path to a Plano config; if Plano isn't already running, " + "`planoai up ` is invoked first so the gateway is ready before " + "Claude Desktop is configured.", +) +@click.option( + "--no-launch", + "no_launch", + is_flag=True, + default=False, + help="Configure Claude Desktop but do not (re)open the app afterwards.", +) +@click.option( + "--restore", + "restore_flag", + is_flag=True, + default=False, + help="Switch Claude Desktop back to its usual Anthropic Claude profile.", +) +@click.option( + "--yes", + "-y", + "yes_flag", + is_flag=True, + default=False, + help="Auto-approve restart prompts.", +) +@click.option( + "--base-url", + default=None, + help="Plano LLM listener URL (default: derived from --config or running Plano, falling back to http://localhost:12000).", +) +def claude_desktop_cmd(config_path, no_launch, restore_flag, yes_flag, base_url): + """Configure Claude Desktop to use the local Plano gateway. + + Mirrors `ollama launch claude-desktop`: rewrites Claude Desktop's profile + JSONs (with `.bak` backups) to switch into third-party gateway mode pointed + at Plano, then optionally restarts Claude Desktop so the change takes + effect. When `--config ` is supplied and Plano is not already + running, this command also starts Plano with that config first, so the + end-to-end flow is a single command. + """ + from rich.console import Console + + console = Console() + + err = _cd.supported() + if err is not None: + console.print(f"[red]✗[/red] {err}") + sys.exit(1) + + if restore_flag: + if config_path is not None: + console.print( + "[yellow]⚠[/yellow] --config is ignored when --restore is set." + ) + try: + _cd.restore() + except Exception as e: + console.print(f"[red]✗[/red] Failed to restore Claude Desktop: {e}") + sys.exit(1) + console.print(f"[green]✓[/green] {_cd.RESTORED_MESSAGE}") + if no_launch: + return + try: + _cd.launch_or_restart( + "Restart Claude Desktop to use the usual Claude profile?", + yes_flag, + ) + except Exception as e: + console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}") + return + + # Auto-start Plano if --config was provided and nothing is running yet. + if config_path is not None: + abs_config = os.path.abspath(config_path) + if not os.path.exists(abs_config): + console.print(f"[red]✗[/red] Config file not found: {abs_config}") + sys.exit(1) + if _is_plano_running(): + console.print( + "[dim]Plano already running; skipping startup. Using listener " + "from [cyan]" + f"{abs_config}[/cyan] for the gateway URL.[/dim]" + ) + else: + _start_plano_with_config(abs_config, console) + + # Resolve base URL precedence: --base-url > --config file > running Plano > default. + resolved_url = ( + base_url + or ( + _base_url_from_config_file(os.path.abspath(config_path)) + if config_path is not None + else None + ) + or _resolve_base_url_from_running_plano() + or _cd.DEFAULT_BASE_URL + ) + + if not _is_plano_running(): + console.print( + "[yellow]⚠[/yellow] Plano does not appear to be running. " + "Start it with [cyan]planoai up[/cyan] (or pass [cyan]--config " + "[/cyan]) before using Claude Desktop." + ) + + console.print( + f"[dim]Configuring Claude Desktop to use Plano at " + f"[cyan]{resolved_url}[/cyan][/dim]" + ) + try: + _cd.configure(resolved_url) + except Exception as e: + console.print(f"[red]✗[/red] Failed to configure Claude Desktop: {e}") + sys.exit(1) + + console.print(f"[green]✓[/green] {_cd.SUCCESS_MESSAGE}") + console.print(f"[dim]{_cd.RESTORE_HINT}[/dim]") + + if no_launch: + return + + try: + _cd.launch_or_restart("Restart Claude Desktop to use Plano?", yes_flag) + except Exception as e: + console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}") + + +def _resolve_base_url_from_running_plano() -> Optional[str]: + """Return ``http://localhost:`` for the active Plano LLM listener. + + Best-effort: if no config can be located, return ``None`` so the caller + falls back to ``DEFAULT_BASE_URL``. + """ + try: + plano_config_file = find_config_file(".", None) + except Exception: + return None + if not plano_config_file or not os.path.exists(plano_config_file): + return None + try: + with open(plano_config_file, "r") as f: + cfg = yaml.safe_load(f) or {} + except (OSError, yaml.YAMLError): + return None + _host, port = _resolve_cli_agent_endpoint(cfg) + return f"http://localhost:{port}" diff --git a/cli/planoai/main.py b/cli/planoai/main.py index ea43a1a8a..e1cad8c7d 100644 --- a/cli/planoai/main.py +++ b/cli/planoai/main.py @@ -1,4 +1,3 @@ -import json import os import multiprocessing import subprocess @@ -19,7 +18,6 @@ from planoai.docker_cli import ( docker_validate_plano_schema, stream_gateway_logs, - docker_container_status, ) from planoai.utils import ( getLogger, @@ -33,19 +31,17 @@ from planoai.core import ( start_plano, stop_docker_container, - start_cli_agent, ) from planoai.init_cmd import init as init_cmd +from planoai.launch_cmd import launch as launch_cmd from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background from planoai.chatgpt_cmd import chatgpt as chatgpt_cmd from planoai.obs_cmd import obs as obs_cmd from planoai.consts import ( DEFAULT_OTEL_TRACING_GRPC_ENDPOINT, DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT, - NATIVE_PID_FILE, PLANO_RUN_DIR, PLANO_DOCKER_IMAGE, - PLANO_DOCKER_NAME, ) from planoai.rich_click_config import configure_rich_click from planoai.versioning import check_version_status, get_latest_version, get_version @@ -53,30 +49,6 @@ log = getLogger(__name__) -def _is_native_plano_running() -> bool: - if not os.path.exists(NATIVE_PID_FILE): - return False - try: - with open(NATIVE_PID_FILE, "r") as f: - pids = json.load(f) - except (OSError, json.JSONDecodeError): - return False - - envoy_pid = pids.get("envoy_pid") - brightstaff_pid = pids.get("brightstaff_pid") - if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int): - return False - - for pid in (envoy_pid, brightstaff_pid): - try: - os.kill(pid, 0) - except ProcessLookupError: - return False - except PermissionError: - continue - return True - - def _is_port_in_use(port: int) -> bool: """Check if a TCP port is already bound on localhost.""" import socket @@ -690,57 +662,12 @@ def logs(debug, follow, docker): plano_process.terminate() -@click.command() -@click.argument("type", type=click.Choice(["claude", "codex"]), required=True) -@click.argument("file", required=False) # Optional file argument -@click.option( - "--path", default=".", help="Path to the directory containing plano_config.yaml" -) -@click.option( - "--settings", - default="{}", - help="Additional settings as JSON string for the CLI agent.", -) -def cli_agent(type, file, path, settings): - """Start a CLI agent connected to Plano. - - CLI_AGENT: The type of CLI agent to start ('claude' or 'codex') - """ - - native_running = _is_native_plano_running() - docker_running = False - if not native_running: - docker_running = docker_container_status(PLANO_DOCKER_NAME) == "running" - - if not (native_running or docker_running): - log.error("Plano is not running.") - log.error( - "Start Plano first using 'planoai up ' (native or --docker mode)." - ) - sys.exit(1) - - # Determine plano_config.yaml path - plano_config_file = find_config_file(path, file) - if not os.path.exists(plano_config_file): - log.error(f"Config file not found: {plano_config_file}") - sys.exit(1) - - try: - start_cli_agent(plano_config_file, type, settings) - except SystemExit: - # Re-raise SystemExit to preserve exit codes - raise - except Exception as e: - click.echo(f"Error: {e}") - sys.exit(1) - - # add commands to the main group main.add_command(up) main.add_command(down) main.add_command(build) main.add_command(logs) -main.add_command(cli_agent) +main.add_command(launch_cmd, name="launch") main.add_command(generate_prompt_targets) main.add_command(init_cmd, name="init") main.add_command(trace_cmd, name="trace") diff --git a/cli/planoai/rich_click_config.py b/cli/planoai/rich_click_config.py index fe90dcf13..5c0e8e076 100644 --- a/cli/planoai/rich_click_config.py +++ b/cli/planoai/rich_click_config.py @@ -46,6 +46,20 @@ def configure_rich_click(plano_color: str) -> None: "options": ["--debug", "--follow"], }, ], + "planoai launch claude-desktop": [ + { + "name": "Plano gateway", + "options": ["--config", "--base-url"], + }, + { + "name": "Mode", + "options": ["--no-launch", "--restore"], + }, + { + "name": "Confirmation", + "options": ["--yes"], + }, + ], } # Command groups for main help. @@ -57,7 +71,7 @@ def configure_rich_click(plano_color: str) -> None: }, { "name": "Agent Commands", - "commands": ["cli-agent"], + "commands": ["launch"], }, { "name": "Observability", @@ -68,4 +82,14 @@ def configure_rich_click(plano_color: str) -> None: "commands": ["generate-prompt-targets"], }, ], + "planoai launch": [ + { + "name": "CLI Agents", + "commands": ["claude-cli", "codex"], + }, + { + "name": "Desktop Apps", + "commands": ["claude-desktop"], + }, + ], } diff --git a/cli/test/test_claude_desktop.py b/cli/test/test_claude_desktop.py new file mode 100644 index 000000000..b5b07f8cd --- /dev/null +++ b/cli/test/test_claude_desktop.py @@ -0,0 +1,366 @@ +"""Tests for `planoai launch claude-desktop` configuration logic.""" + +from __future__ import annotations + +import json +import os +from pathlib import Path + +import pytest + +from planoai import claude_desktop as cd + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def fake_home(tmp_path, monkeypatch): + """Pretend we're on macOS with a fresh home directory. + + Plano's local gateway has no API key concept, so by default we ensure + ``$PLANO_API_KEY`` is unset; tests that exercise the env-override path + re-set it explicitly. + """ + monkeypatch.setattr(cd, "_GOOS", "darwin") + monkeypatch.setattr(cd, "_user_home", lambda _: str(tmp_path)) + monkeypatch.delenv("PLANO_API_KEY", raising=False) + return tmp_path + + +def _normal_config_path(home: Path) -> Path: + return ( + home + / "Library" + / "Application Support" + / "Claude" + / "claude_desktop_config.json" + ) + + +def _third_party_root(home: Path) -> Path: + return home / "Library" / "Application Support" / "Claude-3p" + + +def _third_party_config_path(home: Path) -> Path: + return _third_party_root(home) / "claude_desktop_config.json" + + +def _meta_path(home: Path) -> Path: + return _third_party_root(home) / "configLibrary" / "_meta.json" + + +def _profile_path(home: Path) -> Path: + return _third_party_root(home) / "configLibrary" / f"{cd.PROFILE_ID}.json" + + +# --------------------------------------------------------------------------- +# configure() / restore() +# --------------------------------------------------------------------------- + + +def test_configure_writes_all_four_files_with_default_api_key(fake_home): + cd.configure("http://localhost:12000") + + normal_cfg = json.loads(_normal_config_path(fake_home).read_text()) + assert normal_cfg["deploymentMode"] == "3p" + + third_cfg = json.loads(_third_party_config_path(fake_home).read_text()) + assert third_cfg["deploymentMode"] == "3p" + + meta = json.loads(_meta_path(fake_home).read_text()) + assert meta["appliedId"] == cd.PROFILE_ID + assert any( + isinstance(e, dict) and e.get("id") == cd.PROFILE_ID for e in meta["entries"] + ) + + profile = json.loads(_profile_path(fake_home).read_text()) + assert profile["inferenceProvider"] == "gateway" + assert profile["inferenceGatewayBaseUrl"] == "http://localhost:12000" + # No env override and no pre-existing profile -> placeholder is written. + assert profile["inferenceGatewayApiKey"] == cd.DEFAULT_API_KEY + assert profile["inferenceGatewayAuthScheme"] == "bearer" + assert profile["disableDeploymentModeChooser"] is True + assert "inferenceModels" not in profile + + +def test_configure_uses_env_override_when_set(fake_home, monkeypatch): + monkeypatch.setenv("PLANO_API_KEY", "from-env") + cd.configure("http://localhost:12000") + + profile = json.loads(_profile_path(fake_home).read_text()) + assert profile["inferenceGatewayApiKey"] == "from-env" + + +def test_configure_preserves_existing_profile_api_key(fake_home): + profile = _profile_path(fake_home) + profile.parent.mkdir(parents=True, exist_ok=True) + profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"})) + + cd.configure("http://localhost:12000") + + written = json.loads(profile.read_text()) + assert written["inferenceGatewayApiKey"] == "from-profile" + + +def test_configure_does_not_call_network(fake_home, monkeypatch): + """Plano's local gateway is not validated at configure time. We must not + attempt any HTTP request — a 503 from the gateway must not block setup. + """ + + def boom(*_args, **_kwargs): + raise AssertionError("configure() must not perform network calls") + + monkeypatch.setattr("urllib.request.urlopen", boom) + cd.configure("http://localhost:12000") + + profile = json.loads(_profile_path(fake_home).read_text()) + assert profile["inferenceProvider"] == "gateway" + + +def test_configure_preserves_existing_unrelated_keys(fake_home): + normal_path = _normal_config_path(fake_home) + normal_path.parent.mkdir(parents=True, exist_ok=True) + normal_path.write_text( + json.dumps({"someOtherSetting": 123, "deploymentMode": "1p"}) + ) + + cd.configure("http://localhost:12000") + + cfg = json.loads(normal_path.read_text()) + assert cfg["someOtherSetting"] == 123 + assert cfg["deploymentMode"] == "3p" + + +def test_configure_writes_backup_of_existing_files(fake_home): + normal_path = _normal_config_path(fake_home) + normal_path.parent.mkdir(parents=True, exist_ok=True) + normal_path.write_text('{"deploymentMode":"1p"}') + + cd.configure("http://localhost:12000") + + backup = normal_path.with_suffix(normal_path.suffix + ".bak") + assert backup.exists() + assert json.loads(backup.read_text())["deploymentMode"] == "1p" + + +def test_restore_reverts_deployment_mode_and_strips_gateway_keys(fake_home): + cd.configure("http://localhost:12000") + cd.restore() + + assert ( + json.loads(_normal_config_path(fake_home).read_text())["deploymentMode"] == "1p" + ) + third_cfg = json.loads(_third_party_config_path(fake_home).read_text()) + assert third_cfg["deploymentMode"] == "1p" + + meta = json.loads(_meta_path(fake_home).read_text()) + assert meta.get("appliedId") != cd.PROFILE_ID + assert all( + not (isinstance(e, dict) and e.get("id") == cd.PROFILE_ID) + for e in meta.get("entries", []) + ) + + profile = json.loads(_profile_path(fake_home).read_text()) + assert profile["disableDeploymentModeChooser"] is False + for stripped in ( + "inferenceProvider", + "inferenceGatewayBaseUrl", + "inferenceGatewayAuthScheme", + "inferenceModels", + ): + assert stripped not in profile + + +def test_restore_meta_keeps_unrelated_entries(fake_home): + meta_path = _meta_path(fake_home) + meta_path.parent.mkdir(parents=True, exist_ok=True) + meta_path.write_text( + json.dumps( + { + "appliedId": cd.PROFILE_ID, + "entries": [ + {"id": cd.PROFILE_ID, "name": "Plano"}, + {"id": "00000000-0000-0000-0000-000000000001", "name": "Other"}, + ], + } + ) + ) + + cd._restore_meta(str(meta_path)) + + meta = json.loads(meta_path.read_text()) + assert meta.get("appliedId") in (None, "") + ids = [e["id"] for e in meta["entries"] if isinstance(e, dict)] + assert ids == ["00000000-0000-0000-0000-000000000001"] + + +# --------------------------------------------------------------------------- +# is_configured() +# --------------------------------------------------------------------------- + + +def test_is_configured_false_on_fresh_home(fake_home): + assert cd.is_configured() is False + + +def test_is_configured_true_after_configure(fake_home): + cd.configure("http://localhost:12000") + assert cd.is_configured() is True + + +def test_is_configured_false_when_only_normal_config_set(fake_home): + cd.configure("http://localhost:12000") + + third_cfg = _third_party_config_path(fake_home) + data = json.loads(third_cfg.read_text()) + data["deploymentMode"] = "1p" + third_cfg.write_text(json.dumps(data)) + + assert cd.is_configured() is False + + +# --------------------------------------------------------------------------- +# API key resolution (placeholder by default; env override; profile preserve) +# --------------------------------------------------------------------------- + + +def test_resolve_api_key_returns_placeholder_when_no_inputs(fake_home): + assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY + + +def test_resolve_api_key_uses_env_when_set(fake_home, monkeypatch): + monkeypatch.setenv("PLANO_API_KEY", "from-env") + profile = _profile_path(fake_home) + profile.parent.mkdir(parents=True, exist_ok=True) + profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"})) + + # Env wins over profile. + assert cd._resolve_api_key([str(profile)]) == "from-env" + + +def test_resolve_api_key_falls_back_to_existing_profile(fake_home): + profile = _profile_path(fake_home) + profile.parent.mkdir(parents=True, exist_ok=True) + profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"})) + + assert cd._resolve_api_key([str(profile)]) == "from-profile" + + +def test_resolve_api_key_skips_blank_env(fake_home, monkeypatch): + monkeypatch.setenv("PLANO_API_KEY", " ") + assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY + + +# --------------------------------------------------------------------------- +# Atomic write +# --------------------------------------------------------------------------- + + +def test_atomic_write_creates_backup_of_existing_file(tmp_path): + target = tmp_path / "deep" / "nested" / "file.json" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text("ORIGINAL") + + cd._atomic_write_with_backup(str(target), b"NEW") + + assert target.read_text() == "NEW" + assert (tmp_path / "deep" / "nested" / "file.json.bak").read_text() == "ORIGINAL" + + +def test_atomic_write_skips_backup_when_no_existing_file(tmp_path): + target = tmp_path / "fresh.json" + cd._atomic_write_with_backup(str(target), b"DATA") + + assert target.read_text() == "DATA" + assert not (tmp_path / "fresh.json.bak").exists() + + +def test_atomic_write_does_not_truncate_on_failure(tmp_path, monkeypatch): + target = tmp_path / "file.json" + target.write_text("ORIGINAL") + + real_replace = os.replace + + def boom(_src, _dst): + raise OSError("disk full") + + monkeypatch.setattr(os, "replace", boom) + with pytest.raises(OSError): + cd._atomic_write_with_backup(str(target), b"NEW") + monkeypatch.setattr(os, "replace", real_replace) + + assert target.read_text() == "ORIGINAL" + leftover = list(tmp_path.glob(".plano_*.tmp")) + assert leftover == [] + + +# --------------------------------------------------------------------------- +# Platform support +# --------------------------------------------------------------------------- + + +def test_supported_returns_error_on_linux(monkeypatch): + monkeypatch.setattr(cd, "_GOOS", "linux") + msg = cd.supported() + assert msg is not None + assert "macOS" in msg and "Windows" in msg + + +def test_supported_returns_none_on_darwin(monkeypatch): + monkeypatch.setattr(cd, "_GOOS", "darwin") + assert cd.supported() is None + + +def test_configure_raises_on_unsupported_platform(monkeypatch): + monkeypatch.setattr(cd, "_GOOS", "linux") + with pytest.raises(RuntimeError, match="macOS"): + cd.configure() + + +def test_restore_raises_on_unsupported_platform(monkeypatch): + monkeypatch.setattr(cd, "_GOOS", "linux") + with pytest.raises(RuntimeError, match="macOS"): + cd.restore() + + +# --------------------------------------------------------------------------- +# launch_or_restart() +# --------------------------------------------------------------------------- + + +def test_launch_or_restart_opens_when_not_running(monkeypatch): + monkeypatch.setattr(cd, "_GOOS", "darwin") + monkeypatch.setattr(cd, "_is_running", lambda: False) + opened = [] + monkeypatch.setattr(cd, "_open", lambda: opened.append(True)) + monkeypatch.setattr( + cd, "_quit", lambda: pytest.fail("should not quit when not running") + ) + + cd.launch_or_restart("prompt", yes=True) + assert opened == [True] + + +def test_launch_or_restart_with_yes_quits_then_opens(monkeypatch): + monkeypatch.setattr(cd, "_GOOS", "darwin") + running = [True] + monkeypatch.setattr(cd, "_is_running", lambda: running[0]) + + def quit_app(): + running[0] = False + + quit_calls = [] + open_calls = [] + monkeypatch.setattr( + cd, + "_quit", + lambda: (quit_calls.append(True), quit_app()), + ) + monkeypatch.setattr(cd, "_open", lambda: open_calls.append(True)) + monkeypatch.setattr(cd, "_sleep", lambda _: None) + + cd.launch_or_restart("Restart?", yes=True) + assert quit_calls == [True] + assert open_calls == [True] diff --git a/cli/test/test_launch_cmd.py b/cli/test/test_launch_cmd.py new file mode 100644 index 000000000..5e304bd10 --- /dev/null +++ b/cli/test/test_launch_cmd.py @@ -0,0 +1,231 @@ +"""Tests for the `planoai launch claude-desktop` click command. + +Focused on the wiring between the CLI flags and the underlying +`claude_desktop` module / `up` invocation. The actual JSON-rewriting and key +validation are covered in `test_claude_desktop.py`. +""" + +from __future__ import annotations + +from click.testing import CliRunner + +from planoai import claude_desktop as cd +from planoai import launch_cmd as lc + + +def _stub_cd(monkeypatch): + """Replace ``claude_desktop`` side-effects with no-ops + call recorders.""" + calls: dict[str, list] = { + "configure": [], + "restore": [], + "launch_or_restart": [], + } + monkeypatch.setattr(cd, "supported", lambda: None) + monkeypatch.setattr( + cd, + "configure", + lambda base_url, **_kw: calls["configure"].append(base_url), + ) + monkeypatch.setattr(cd, "restore", lambda: calls["restore"].append(True)) + monkeypatch.setattr( + cd, + "launch_or_restart", + lambda prompt, yes: calls["launch_or_restart"].append((prompt, yes)), + ) + return calls + + +def test_config_path_starts_plano_when_not_running(tmp_path, monkeypatch): + config = tmp_path / "plano_config.yaml" + config.write_text( + "version: v0.4.0\n" + "listeners:\n" + " - name: llm\n" + " type: model\n" + " port: 12345\n" + " address: 0.0.0.0\n" + "model_providers: []\n" + ) + + cd_calls = _stub_cd(monkeypatch) + monkeypatch.setattr(lc, "_is_plano_running", lambda: False) + + up_calls = [] + + def fake_up( + file, + path, + foreground, + with_tracing, + tracing_port, + docker, + verbose, + listener_port, + ): + up_calls.append( + { + "file": file, + "foreground": foreground, + "docker": docker, + "listener_port": listener_port, + } + ) + + from planoai.main import up as up_cmd + + monkeypatch.setattr(up_cmd, "callback", fake_up) + + runner = CliRunner() + result = runner.invoke( + lc.launch, + ["claude-desktop", "--config", str(config), "--yes"], + ) + + assert result.exit_code == 0, result.output + assert len(up_calls) == 1 + assert up_calls[0]["file"] == str(config) + assert up_calls[0]["foreground"] is False + assert cd_calls["configure"] == ["http://localhost:12345"] + # --yes implies we restart Claude Desktop after configuring. + assert cd_calls["launch_or_restart"] + assert cd_calls["launch_or_restart"][0][1] is True + + +def test_config_path_skips_up_when_plano_already_running(tmp_path, monkeypatch): + config = tmp_path / "plano_config.yaml" + config.write_text( + "version: v0.4.0\n" + "listeners:\n" + " - name: llm\n" + " type: model\n" + " port: 12500\n" + "model_providers: []\n" + ) + + cd_calls = _stub_cd(monkeypatch) + monkeypatch.setattr(lc, "_is_plano_running", lambda: True) + + sentinel = [] + + def boom(*args, **kwargs): + sentinel.append("called") + + from planoai.main import up as up_cmd + + monkeypatch.setattr(up_cmd, "callback", boom) + + runner = CliRunner() + result = runner.invoke( + lc.launch, + ["claude-desktop", "--config", str(config), "--no-launch"], + ) + + assert result.exit_code == 0, result.output + assert sentinel == [], "should not invoke up.callback when Plano is already running" + assert cd_calls["configure"] == ["http://localhost:12500"] + # --no-launch skips the restart step. + assert cd_calls["launch_or_restart"] == [] + + +def test_config_path_must_exist(tmp_path, monkeypatch): + cd_calls = _stub_cd(monkeypatch) + monkeypatch.setattr(lc, "_is_plano_running", lambda: False) + + runner = CliRunner() + result = runner.invoke( + lc.launch, + ["claude-desktop", "--config", str(tmp_path / "nope.yaml")], + ) + + assert result.exit_code != 0 + assert "not found" in result.output.lower() + assert cd_calls["configure"] == [] + + +def test_no_launch_skips_open(monkeypatch): + cd_calls = _stub_cd(monkeypatch) + monkeypatch.setattr(lc, "_is_plano_running", lambda: True) + + runner = CliRunner() + result = runner.invoke( + lc.launch, + ["claude-desktop", "--no-launch", "--base-url", "http://localhost:9999"], + ) + + assert result.exit_code == 0, result.output + assert cd_calls["configure"] == ["http://localhost:9999"] + assert cd_calls["launch_or_restart"] == [] + + +def test_restore_ignores_config_path(tmp_path, monkeypatch): + config = tmp_path / "plano_config.yaml" + config.write_text("version: v0.4.0\nmodel_providers: []\n") + + cd_calls = _stub_cd(monkeypatch) + monkeypatch.setattr(lc, "_is_plano_running", lambda: True) + + runner = CliRunner() + result = runner.invoke( + lc.launch, + ["claude-desktop", "--restore", "--config", str(config), "--yes"], + ) + + assert result.exit_code == 0, result.output + assert cd_calls["restore"] == [True] + assert cd_calls["configure"] == [] + assert "ignored" in result.output.lower() + + +def test_base_url_overrides_config_file(tmp_path, monkeypatch): + config = tmp_path / "plano_config.yaml" + config.write_text( + "version: v0.4.0\n" + "listeners:\n" + " - name: llm\n" + " type: model\n" + " port: 12345\n" + "model_providers: []\n" + ) + + cd_calls = _stub_cd(monkeypatch) + monkeypatch.setattr(lc, "_is_plano_running", lambda: True) + + runner = CliRunner() + result = runner.invoke( + lc.launch, + [ + "claude-desktop", + "--config", + str(config), + "--base-url", + "http://10.0.0.5:8080", + "--no-launch", + ], + ) + + assert result.exit_code == 0, result.output + assert cd_calls["configure"] == ["http://10.0.0.5:8080"] + + +def test_unsupported_platform_errors(monkeypatch): + monkeypatch.setattr( + cd, + "supported", + lambda: "Claude Desktop launch is only supported on macOS and Windows", + ) + + runner = CliRunner() + result = runner.invoke(lc.launch, ["claude-desktop"]) + + assert result.exit_code != 0 + assert "macOS" in result.output + + +def test_help_lists_new_flags(monkeypatch): + runner = CliRunner() + result = runner.invoke(lc.launch, ["claude-desktop", "--help"]) + + assert result.exit_code == 0, result.output + assert "--config" in result.output + assert "--no-launch" in result.output + assert "--restore" in result.output diff --git a/demos/llm_routing/frontier_model_routing/README.md b/demos/llm_routing/frontier_model_routing/README.md new file mode 100644 index 000000000..c12419a53 --- /dev/null +++ b/demos/llm_routing/frontier_model_routing/README.md @@ -0,0 +1,423 @@ +# Frontier Model Routing: Sonnet 4.6 + GPT 5.5 + Opus 4.7 + +A worked example of using Plano to route across the three current frontier +LLMs from three different providers — without your application caring which +model handled any given request, and with **per-route fallbacks** so a +provider outage never takes the demo down. + +| Tier | Primary model | Provider | What it's great at | +| ---------------- | -------------------------------------- | ------------------ | -------------------------------------------------------- | +| `frontier.fast` | `anthropic-claude-sonnet-4-6` | DigitalOcean | Daily driver — chat, summaries, drafts, light reasoning | +| `frontier.smart` | `gpt-5.5` | OpenAI | Multi-step reasoning, math, tool/function calling | +| `frontier.max` | `claude-opus-4-7` | Anthropic | Code, deep analysis, long-context evaluation, refactors | + +The same prompt picks the right model automatically — Plano's preference +aligned router (Plano-Orchestrator) reads the user's intent and dispatches to +the route whose `routing_preferences` description best matches. Each route +is backed by an **ordered candidate pool**, so when the primary provider +returns a `429`/`5xx` the next entry in the pool serves the request. + +``` + ┌────────────────────────────────────┐ +client ──── /v1 ───▶ │ Plano gateway (port 12000) │ +(OpenAI / Anthropic / │ ├── Plano-Orchestrator (router) │ + Claude Desktop / SDK) │ └── Envoy + brightstaff │ + └────────────────────────────────────┘ + │ │ │ + ┌───────────┘ │ └────────────┐ + ▼ ▼ ▼ + DigitalOcean Gradient AI OpenAI Anthropic + anthropic-claude-sonnet-4-6 gpt-5.5 claude-opus-4-7 + (daily conversation route) (complex reasoning) (code + deep analysis) +``` + +## Why this layout + +- **Cost-quality fit per request.** Casual prompts go to Sonnet 4.6 on + DigitalOcean (cheaper inference, still excellent quality); complex + reasoning goes to GPT 5.5; code and deep analysis go to Opus 4.7. +- **Provider diversity = resilience.** Every route lists a fallback model + from a different provider — if Anthropic rate-limits Opus, Plano hands + the next request in that route to GPT 5.5 with no client changes. +- **Zero client changes.** The OpenAI SDK, Anthropic SDK, Claude Desktop, + Codex CLI, and curl all hit the same `:12000` endpoint and use the same + alias names. Switching `frontier.max` from Opus to whatever ships next + is a one-line config change. + +## The new routing-preferences architecture (v0.4.0) + +This demo uses Plano's **top-level `routing_preferences`** block — the +canonical shape since `v0.4.0`. The older inline form (preferences nested +under each `model_provider`) is auto-migrated by the Plano CLI but emits a +deprecation warning. The top-level shape gives each route an ordered +candidate pool, which is what makes per-route fallbacks possible. + +```yaml +routing_preferences: + - name: code generation + description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests + models: + - anthropic/claude-opus-4-7 # primary + - openai/gpt-5.5 # fallback on 429/5xx +``` + +What changes vs. the v0.3.0 inline style: + +| Capability | v0.3.0 inline | v0.4.0 top-level | +| ----------------------------------------- | :-----------: | :--------------: | +| Multiple models can serve the same route | no | yes | +| Explicit primary + ranked fallback chain | no | yes | +| Per-request override via request body | no | yes | +| Decision-only endpoint (`/routing/v1/...`)| no | yes | +| `X-Model-Affinity` header for agent loops | no | yes | + +## Prerequisites + +- **Plano CLI** — `uv tool install planoai` or `pip install planoai` +- API keys for all three providers: + + | Env var | Where to get it | + | ------------------- | ------------------------------------------------------------------------ | + | `DO_API_KEY` | (Gradient AI access) | + | `OPENAI_API_KEY` | | + | `ANTHROPIC_API_KEY` | | + +## Quick start + +```bash +export DO_API_KEY=... +export OPENAI_API_KEY=... +export ANTHROPIC_API_KEY=... + +cd demos/llm_routing/frontier_model_routing +./run_demo.sh +``` + +`run_demo.sh` writes a local `.env`, then runs `planoai up config.yaml`. +Plano daemonizes and is ready when the script returns. + +To shut down: + +```bash +./run_demo.sh down +``` + +## Try it + +### Let Plano pick the right tier + +```bash +./test.sh +``` + +The script does two things for each prompt: + +1. Calls `POST /routing/v1/chat/completions` — Plano's **decision-only** + endpoint — to print the matched route name and the ranked candidate + pool for that prompt. +2. Calls `POST /v1/chat/completions` to actually run the request and + prints the model that handled it. + +A healthy run resolves like this: + +``` +[daily conversation -> expects DigitalOcean Sonnet 4.6] + matched route: daily conversation + ranked models: ["digitalocean/anthropic-claude-sonnet-4-6","openai/gpt-5.5"] + routed_to: digitalocean/anthropic-claude-sonnet-4-6 + +[complex reasoning -> expects OpenAI GPT 5.5] + matched route: complex reasoning + ranked models: ["openai/gpt-5.5","anthropic/claude-opus-4-7"] + routed_to: openai/gpt-5.5 + +[code generation -> expects Anthropic Opus 4.7] + matched route: code generation + ranked models: ["anthropic/claude-opus-4-7","openai/gpt-5.5"] + routed_to: anthropic/claude-opus-4-7 +``` + +The trick: every request is sent with `model: frontier.fast`, but Plano runs +the orchestrator on every chat completion when `routing_preferences` are +configured and overrides the `model` when a preference matches. The +`frontier.fast` value is the explicit fallback used when no preference +matches — so casual prompts stay on the cheap tier and only "real" reasoning +or code work escalates to GPT 5.5 or Opus 4.7. + +Want to watch the router decide live? In a second terminal: + +```bash +planoai trace +``` + +You'll see the orchestrator's route selection for each request, including +the matched preference, ranked models, and response time. + +### Inspect the routing decision without burning a token + +The `/routing/v1/...` endpoint returns the routing decision **without +calling the upstream model**. Useful for previewing classification, building +a UI, or wiring fallback logic into a custom client. + +```bash +curl -sS -X POST http://localhost:12000/routing/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "frontier.fast", + "messages": [{"role":"user","content":"refactor this function to remove the global"}] + }' | jq . +``` + +```json +{ + "models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"], + "route": "code generation", + "trace_id": "4bf92f3577b34da6a3ce929d0e0e4736", + "pinned": false +} +``` + +Use `models[0]` as the primary; retry with `models[1]` on `429` / `5xx`. + +### Pin a route across an agent loop with `X-Model-Affinity` + +In a tool-using agent loop a single user task may produce a dozen LLM +calls. Their topics drift (tool selection looks like code, summarising +results looks like analysis), and the router would otherwise route each +turn independently — bouncing between providers and invalidating their +KV caches. Pin the decision once with an arbitrary session id: + +```bash +SID=$(uuidgen) + +curl -sS -X POST http://localhost:12000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "X-Model-Affinity: $SID" \ + -d '{"model":"frontier.fast","messages":[{"role":"user","content":"start a refactor of the auth module"}]}' + +# every subsequent call with the same SID skips routing and reuses the +# cached model decision until the session TTL (10 min by default) expires. +curl -sS -X POST http://localhost:12000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "X-Model-Affinity: $SID" \ + -d '{"model":"frontier.fast","messages":[{"role":"user","content":"now write the unit tests"}]}' +``` + +TTL and cache size are configurable under `routing:` in `config.yaml`. + +### Override the routing policy per-request + +Sometimes one caller needs a different policy without redeploying the +gateway. Send `routing_preferences` inline in the request body — it is +stripped before forwarding upstream: + +```bash +curl -sS -X POST http://localhost:12000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "frontier.fast", + "messages": [{"role":"user","content":"draft me a haiku about Postgres"}], + "routing_preferences": [ + { + "name": "creative writing", + "description": "poetry, fiction, lyrical or playful prose", + "models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"] + } + ] + }' | jq . +``` + +### Pin a request to a specific tier (skip routing) + +For prompts that don't match any preference description, the requested +model is what serves the request. Pin to a tier by sending its alias +directly: + +```bash +# DigitalOcean Sonnet 4.6 — fast and cheap +curl -sS -X POST http://localhost:12000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model":"frontier.fast","messages":[{"role":"user","content":"hello"}]}' | jq . + +# OpenAI GPT 5.5 +curl -sS -X POST http://localhost:12000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model":"frontier.smart","messages":[{"role":"user","content":"hello"}]}' | jq . + +# Anthropic Opus 4.7 +curl -sS -X POST http://localhost:12000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq . +``` + +### From a Claude-native client (Anthropic Messages API) + +Plano translates between OpenAI and Anthropic shapes, so the same gateway +serves both client SDKs: + +```bash +curl -sS -X POST http://localhost:12000/v1/messages \ + -H "Content-Type: application/json" \ + -H "anthropic-version: 2023-06-01" \ + -H "x-api-key: test-key" \ + -d '{ + "model": "frontier.max", + "max_tokens": 512, + "messages": [{"role":"user","content":"explain CAP theorem like I have a CS undergrad background"}] + }' | jq . +``` + +### From Claude Desktop + +Once Plano is up, point Claude Desktop at it with one command: + +```bash +planoai launch claude-desktop --config config.yaml +``` + +Claude Desktop will switch into third-party gateway mode pointed at +`http://localhost:12000`, auto-discover the three model aliases via +`/v1/models`, and let you pick `frontier.fast` / `.smart` / `.max` from the +in-app model selector. To revert: `planoai launch claude-desktop --restore`. + +### From Codex CLI + +```bash +planoai launch codex +codex --model frontier.smart # or frontier.fast / frontier.max +``` + +### From the Claude Code CLI + +```bash +planoai launch claude-cli +``` + +The CLI will use Plano as its Anthropic endpoint; ask it for code-heavy work +and it'll resolve to Opus 4.7 automatically. + +## Config walkthrough + +[`config.yaml`](config.yaml) declares each provider once, then declares +**top-level routing preferences** that reference those providers by their +full `/` name. Each route owns an ordered `models` pool — +primary first, fallbacks next. + +```yaml +model_providers: + - model: digitalocean/anthropic-claude-sonnet-4-6 + access_key: $DO_API_KEY + default: true # used when no preference matches + - model: openai/gpt-5.5 + access_key: $OPENAI_API_KEY + - model: anthropic/claude-opus-4-7 + access_key: $ANTHROPIC_API_KEY + +routing_preferences: + - name: code generation + description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code + models: + - anthropic/claude-opus-4-7 # primary + - openai/gpt-5.5 # fallback on 429 / 5xx + + - name: deep analysis + description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique + models: + - anthropic/claude-opus-4-7 + - openai/gpt-5.5 + + - name: complex reasoning + description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction + models: + - openai/gpt-5.5 + - anthropic/claude-opus-4-7 + + - name: daily conversation + description: general chat, casual Q&A, summaries, drafting messages, quick rewrites + models: + - digitalocean/anthropic-claude-sonnet-4-6 + - openai/gpt-5.5 + +model_aliases: + frontier.fast: { target: anthropic-claude-sonnet-4-6 } + frontier.smart: { target: gpt-5.5 } + frontier.max: { target: claude-opus-4-7 } +``` + +A few things to call out: + +1. **Preference *descriptions* drive routing accuracy.** They're embedded + into the orchestrator's prompt; vague descriptions = vague routing. + Following the [LLM Routing best practices](../../../docs/source/guides/llm_router.rst): + - keep names specific and non-overlapping, + - prefer noun-centric descriptors over imperative phrasing, + - always include a generic "domain"-style route — here that's + `daily conversation` pinned to the cheapest tier — so unmatched + prompts still land somewhere deliberate. +2. **Ordered `models`** is a candidate pool. `models[0]` is the primary; + anything after it is a fallback that the client (or Plano's retry + logic) tries on `429`/`5xx`. Mix providers across the pool so a single + provider outage doesn't break the route. +3. **The `default: true` provider** is the safety net for prompts the + orchestrator can't confidently classify (e.g. one-word "thanks!"). +4. **Aliases** decouple your callers from provider/model strings. When the + next Sonnet ships, change the alias target — every caller picks it up + instantly. + +## Tracing + +`tracing.random_sampling: 100` in the config enables full OTLP tracing. Open +a second terminal and run: + +```bash +planoai trace +``` + +Each routed call shows up with the matched preference, ranked candidate +pool, selected model, end-to-end latency, and per-stage spans (router +decision, provider call, streaming chunks). + +## Cost framing + +A rough mix of 60% conversation, 30% reasoning, 10% deep code work — say +1,000 prompts/day at 1k input + 500 output tokens each — illustrates why +this layout pays off. Exact numbers depend on per-provider pricing the day +you read this; the point is that calling Opus 4.7 for casual chat is wasted +spend, and falling back to a small model on complex code is wasted output. +Plano's job is to let each provider do what it's best at, and to fail over +to the next entry in `models` when the primary throttles. + +## Customizing + +- **Swap a provider:** change the model string and `access_key`. e.g. + point `frontier.smart` at `azure_openai/gpt-5.5` by replacing the OpenAI + block with an Azure block, then update the matching entries inside + `routing_preferences[].models`. +- **Add fallbacks:** append more entries to any route's `models` list. + The orchestrator returns the full ranked pool, and Plano (or your + client) walks it on `429`/`5xx`. +- **Add a new route:** add another entry under `routing_preferences` with + a noun-centric description and its own `models` pool. No code change, + no client change — every existing caller benefits immediately. +- **Per-call policy override:** ship a `routing_preferences` field in the + request body to override the config for that one call (see the curl + example above). +- **Self-host the orchestrator:** see + [`../preference_based_routing/plano_config_local.yaml`](../preference_based_routing/plano_config_local.yaml) + for an Ollama-backed orchestrator. Drop the `overrides.llm_routing_model` + block into this config and you're off the hosted Plano-Orchestrator. + +## Files + +| File | Purpose | +| --------------------------------------------- | ---------------------------------------------------------------------- | +| [`config.yaml`](config.yaml) | Plano configuration (top-level routing_preferences + aliases) | +| [`run_demo.sh`](run_demo.sh) | Bring the demo up/down (`./run_demo.sh [down]`) | +| [`test.sh`](test.sh) | Per-prompt routing decision + chat completion across all three routes | +| [`test.rest`](test.rest) | REST Client snippets for VS Code / IntelliJ | + +## Stopping + +```bash +./run_demo.sh down # or: planoai down +``` diff --git a/demos/llm_routing/frontier_model_routing/config.yaml b/demos/llm_routing/frontier_model_routing/config.yaml new file mode 100644 index 000000000..249be1ae4 --- /dev/null +++ b/demos/llm_routing/frontier_model_routing/config.yaml @@ -0,0 +1,103 @@ +version: v0.4.0 + +# Frontier-tier model routing across three providers. +# +# - DigitalOcean Sonnet 4.6 -> daily driver: balanced quality + cost +# - OpenAI GPT 5.5 -> multimodal reasoning, tool use, math +# - Anthropic Opus 4.7 -> top-tier reasoning, long-form analysis, code +# +# Plano's preference-aligned router (Plano-Orchestrator) inspects each prompt +# and dispatches to the model whose top-level `routing_preferences` entry best +# matches the user's intent. Each route owns an ordered `models` list: +# `models[0]` is the primary; subsequent entries are fallbacks the client +# (or Plano's retry logic) can try on `429`/`5xx` errors. + +listeners: + - type: model + name: model_listener + port: 12000 + +# --------------------------------------------------------------------------- +# Model providers (declared once, referenced by every route below) +# --------------------------------------------------------------------------- +# The `digitalocean/`, `openai/`, and `anthropic/` prefixes are recognized +# natively by Plano — no `base_url` or provider interface override needed. +model_providers: + - model: digitalocean/anthropic-claude-4.6-sonnet + access_key: $DO_API_KEY + default: true # used when no routing preference matches + + - model: digitalocean/openai-gpt-5.5 + access_key: $DO_API_KEY + + - model: digitalocean/anthropic-claude-opus-4.7 + access_key: $DO_API_KEY + +# --------------------------------------------------------------------------- +# Routing preferences (v0.4.0 top-level form) +# --------------------------------------------------------------------------- +# Best-practice notes (per the LLM Routing guide): +# - Names should be specific, non-overlapping, and aligned with the +# description so the orchestrator can disambiguate cleanly. +# - Descriptions are noun-centric phrases describing *the work*, not +# conversational instructions. +# - Always include a generic "domain" route so prompts that don't match a +# specific action still land on a deliberate model — here that's +# "daily conversation" pinned to the cheapest tier. +# - `models` is an ordered candidate pool; entry 0 is primary and entries +# 1..n are fallbacks (clients retry on 429/5xx). +routing_preferences: + - name: code generation + description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code + models: + - digitalocean/anthropic-claude-opus-4.7 # primary: top-tier code quality + - digitalocean/openai-gpt-5.5 # fallback if Opus is rate-limited / down + + - name: deep analysis + description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique + models: + - digitalocean/anthropic-claude-opus-4.7 + - digitalocean/openai-gpt-5.5 + + - name: complex reasoning + description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction + models: + - digitalocean/openai-gpt-5.5 # primary: strong reasoning + tool use + - digitalocean/anthropic-claude-opus-4.7 + + - name: daily conversation + description: general chat, casual Q&A, summaries, drafting messages, quick rewrites, day-to-day requests where speed and cost matter + models: + - digitalocean/anthropic-claude-4.6-sonnet + - digitalocean/openai-gpt-5.5 + +# --------------------------------------------------------------------------- +# Aliases — stable, human-friendly handles for clients +# --------------------------------------------------------------------------- +# Clients can pin to a tier without thinking about the underlying provider, +# and the underlying model can change without breaking callers. +model_aliases: + # Daily driver -> Claude Sonnet 4.6 + frontier.fast: + target: digitalocean/anthropic-claude-4.6-sonnet + + # Reasoning + tool calling -> OpenAI GPT 5.5 + frontier.smart: + target: digitalocean/openai-gpt-5.5 + + # Code + deep analysis -> Anthropic Opus 4.7 + frontier.max: + target: digitalocean/anthropic-claude-opus-4.7 + +# --------------------------------------------------------------------------- +# Model affinity for agentic loops +# --------------------------------------------------------------------------- +# In a tool-using agent loop, successive prompts can look like different +# routes (tool selection ~ code, reasoning ~ analysis), causing the router +# to flip between models mid-session. Clients send `X-Model-Affinity: ` +# and Plano caches the routing decision for the session TTL below. +routing: + session_ttl_seconds: 600 + +tracing: + random_sampling: 100 diff --git a/demos/llm_routing/frontier_model_routing/run_demo.sh b/demos/llm_routing/frontier_model_routing/run_demo.sh new file mode 100755 index 000000000..244f5fefe --- /dev/null +++ b/demos/llm_routing/frontier_model_routing/run_demo.sh @@ -0,0 +1,63 @@ +#!/bin/bash +set -e + +# --------------------------------------------------------------------------- +# Frontier model routing demo: DigitalOcean Sonnet 4.6 + GPT 5.5 + Opus 4.7 +# --------------------------------------------------------------------------- + +start_demo() { + if [ -f ".env" ]; then + echo ".env file already exists. Skipping creation." + else + missing=() + [ -z "$DO_API_KEY" ] && missing+=("DO_API_KEY") + [ -z "$OPENAI_API_KEY" ] && missing+=("OPENAI_API_KEY") + [ -z "$ANTHROPIC_API_KEY" ] && missing+=("ANTHROPIC_API_KEY") + + if [ ${#missing[@]} -ne 0 ]; then + echo "Error: the following environment variables are not set:" + for key in "${missing[@]}"; do echo " - $key"; done + echo + echo "Set them in your shell, then re-run this script. Example:" + echo " export DO_API_KEY=... # from https://cloud.digitalocean.com/account/api/tokens" + echo " export OPENAI_API_KEY=... # from https://platform.openai.com/api-keys" + echo " export ANTHROPIC_API_KEY=... # from https://console.anthropic.com/" + exit 1 + fi + + echo "Creating .env file..." + { + echo "DO_API_KEY=$DO_API_KEY" + echo "OPENAI_API_KEY=$OPENAI_API_KEY" + echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" + } > .env + echo ".env file created." + fi + + echo "Starting Plano with config.yaml..." + planoai up config.yaml + + cat <<'EOF' + +Plano is up. Try the demo with: + ./test.sh # runs three sample prompts and shows which model handled each + planoai trace # live router decisions in a separate terminal + +Or call any model directly using its alias: + curl -sS -X POST http://localhost:12000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq . + +EOF +} + +stop_demo() { + echo "Stopping Plano..." + planoai down +} + +if [ "$1" == "down" ]; then + stop_demo +else + start_demo +fi diff --git a/demos/llm_routing/frontier_model_routing/test.rest b/demos/llm_routing/frontier_model_routing/test.rest new file mode 100644 index 000000000..5866198ae --- /dev/null +++ b/demos/llm_routing/frontier_model_routing/test.rest @@ -0,0 +1,212 @@ +### Frontier model routing — REST Client / VS Code REST snippets +### +### Plano runs the preference-aligned orchestrator on every chat request +### when top-level `routing_preferences` are configured. The `model` field +### in the body is the *fallback* if no preference matches; pinning it to +### `frontier.fast` gives a cheap default. Each route owns an ordered +### `models` pool — primary first, fallbacks next — that the client (or +### Plano's retry logic) walks on 429/5xx. + +@endpoint = http://localhost:12000 + +### ------------------------------------------------------------------------- +### 1. Decision-only endpoint: see what the router would pick (no upstream call) +### Returns: { "models": [...ranked pool...], "route": "...", "trace_id": "..." } +### ------------------------------------------------------------------------- +POST {{endpoint}}/routing/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.fast", + "messages": [ + { + "role": "user", + "content": "Refactor this Rust function to remove the global mutable state and add unit tests." + } + ] +} + +### ------------------------------------------------------------------------- +### 2. Routed by intent: daily conversation -> DigitalOcean Sonnet 4.6 +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.fast", + "max_tokens": 256, + "messages": [ + { + "role": "user", + "content": "Hey! Give me three fun facts about octopuses I can drop into a dinner conversation." + } + ] +} + +### ------------------------------------------------------------------------- +### 3. Routed by intent: complex reasoning -> OpenAI GPT 5.5 +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.fast", + "max_tokens": 512, + "messages": [ + { + "role": "user", + "content": "A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet." + } + ] +} + +### ------------------------------------------------------------------------- +### 4. Routed by intent: code generation -> Anthropic Opus 4.7 +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.fast", + "max_tokens": 800, + "messages": [ + { + "role": "user", + "content": "Write a Rust function that takes a Vec of UTF-8 bytes and returns a HashMap with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully." + } + ] +} + +### ------------------------------------------------------------------------- +### 5. Routed by intent: deep analysis -> Anthropic Opus 4.7 +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.fast", + "max_tokens": 600, + "messages": [ + { + "role": "user", + "content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);" + } + ] +} + +### ------------------------------------------------------------------------- +### 6. Per-request routing override (config-defined preferences are bypassed +### for this single call). The `routing_preferences` field is stripped +### before the upstream provider sees the body. +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.fast", + "max_tokens": 256, + "messages": [ + { "role": "user", "content": "Draft me a haiku about Postgres replication slots." } + ], + "routing_preferences": [ + { + "name": "creative writing", + "description": "poetry, fiction, lyrical or playful prose", + "models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"] + } + ] +} + +### ------------------------------------------------------------------------- +### 7. Pin a routing decision across an agentic loop with X-Model-Affinity. +### The first call routes normally and caches the decision. Subsequent +### calls with the same id reuse the cached model until the session TTL +### (default 10 min) expires. +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json +X-Model-Affinity: agent-session-7f3e + +{ + "model": "frontier.fast", + "max_tokens": 256, + "messages": [ + { "role": "user", "content": "Plan a small refactor of an auth module — order of operations?" } + ] +} + +### Same affinity id — reuses cached routing decision (no re-classification) +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json +X-Model-Affinity: agent-session-7f3e + +{ + "model": "frontier.fast", + "max_tokens": 256, + "messages": [ + { "role": "user", "content": "Now write the unit tests for step one." } + ] +} + +### ------------------------------------------------------------------------- +### 8. Pin to DigitalOcean Sonnet 4.6 via alias (skip routing entirely) +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.fast", + "max_tokens": 128, + "messages": [ + { "role": "user", "content": "One sentence: who painted the ceiling of the Sistine Chapel?" } + ] +} + +### ------------------------------------------------------------------------- +### 9. Pin to OpenAI GPT 5.5 via alias +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.smart", + "max_tokens": 256, + "messages": [ + { "role": "user", "content": "Outline a 30/60/90 day plan for a new platform engineering hire." } + ] +} + +### ------------------------------------------------------------------------- +### 10. Pin to Anthropic Opus 4.7 via alias +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "frontier.max", + "max_tokens": 600, + "messages": [ + { "role": "user", "content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);" } + ] +} + +### ------------------------------------------------------------------------- +### 11. Anthropic Messages API (Claude-native client) -> routed by intent +### ------------------------------------------------------------------------- +POST {{endpoint}}/v1/messages HTTP/1.1 +Content-Type: application/json +anthropic-version: 2023-06-01 +x-api-key: test-key + +{ + "model": "frontier.fast", + "max_tokens": 256, + "messages": [ + { "role": "user", "content": "Recommend a senior engineering reading list with three picks and one sentence each." } + ] +} + +### ------------------------------------------------------------------------- +### 12. Inspect available models (auto-discovered for Claude Desktop / clients) +### ------------------------------------------------------------------------- +GET {{endpoint}}/v1/models HTTP/1.1 diff --git a/demos/llm_routing/frontier_model_routing/test.sh b/demos/llm_routing/frontier_model_routing/test.sh new file mode 100755 index 000000000..9bcbe8511 --- /dev/null +++ b/demos/llm_routing/frontier_model_routing/test.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# --------------------------------------------------------------------------- +# Frontier Model Routing demo — driver script +# +# For each of three intent-biased prompts we: +# 1. Hit POST /routing/v1/chat/completions (Plano's decision-only endpoint) +# to print the matched route name and the ranked candidate pool. +# 2. Hit POST /v1/chat/completions to actually run the request and print +# the model that handled it. +# +# Plano runs the orchestrator on every chat completion when top-level +# `routing_preferences` are configured. The `model` field in the request is +# the *fallback* used when no preference matches — we pin it to +# `frontier.fast` so unmatched prompts land on the cheapest tier. +# --------------------------------------------------------------------------- + +set -e + +GATEWAY=${GATEWAY:-http://localhost:12000} +DECISION_ENDPOINT="$GATEWAY/routing/v1/chat/completions" +CHAT_ENDPOINT="$GATEWAY/v1/chat/completions" + +ask() { + local label="$1" + local prompt="$2" + + local body + body="$(jq -n --arg p "$prompt" '{ + "model": "frontier.fast", + "max_tokens": 256, + "messages": [{"role":"user","content":$p}] + }')" + + echo + echo "==========================================================" + echo "[$label]" + echo "prompt: $prompt" + echo "----------------------------------------------------------" + + # Step 1: decision-only — what would the router pick? + echo " routing decision:" + curl -sS -X POST "$DECISION_ENDPOINT" \ + -H "Content-Type: application/json" \ + -d "$body" \ + | jq '{ + matched_route: .route, + ranked_models: .models, + pinned: .pinned + }' \ + | sed 's/^/ /' + + # Step 2: actually run the request through the chosen model. + echo " chat completion:" + curl -sS -X POST "$CHAT_ENDPOINT" \ + -H "Content-Type: application/json" \ + -d "$body" \ + | jq '{ + routed_to: .model, + reply: .choices[0].message.content + }' \ + | sed 's/^/ /' +} + +ask "daily conversation -> expects DigitalOcean Sonnet 4.6" \ + "Hey! Give me three fun facts about octopuses I can drop into a dinner conversation." + +ask "complex reasoning -> expects OpenAI GPT 5.5" \ + "A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet." + +ask "code generation -> expects Anthropic Opus 4.7" \ + "Write a Rust function that takes a Vec of UTF-8 bytes and returns a HashMap with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully." + +ask "deep analysis -> expects Anthropic Opus 4.7" \ + "Review this Postgres schema for normalization, indexing, and migration risk. Give me a prioritized list of issues: +CREATE TABLE orders ( + id SERIAL PRIMARY KEY, + customer_email TEXT, + customer_name TEXT, + items_json JSONB, + total NUMERIC, + created_at TIMESTAMPTZ DEFAULT now() +);" + +# --------------------------------------------------------------------------- +# Bonus: pin a routing decision across an agentic loop with X-Model-Affinity. +# Both calls hit the same gateway with the same affinity id, so the second +# call reuses the first call's routing decision instead of reclassifying. +# --------------------------------------------------------------------------- +echo +echo "==========================================================" +echo "[bonus: model affinity across two turns of an agent loop]" +echo "----------------------------------------------------------" + +SID="demo-$(date +%s)-$RANDOM" +echo " X-Model-Affinity: $SID" + +turn() { + local turn_label="$1" + local prompt="$2" + echo " $turn_label:" + curl -sS -X POST "$CHAT_ENDPOINT" \ + -H "Content-Type: application/json" \ + -H "X-Model-Affinity: $SID" \ + -d "$(jq -n --arg p "$prompt" '{ + "model": "frontier.fast", + "max_tokens": 128, + "messages": [{"role":"user","content":$p}] + }')" \ + | jq '{ routed_to: .model }' \ + | sed 's/^/ /' +} + +turn "turn 1 (sets affinity)" "Plan a small refactor of an auth module — what's the order of operations?" +turn "turn 2 (reuses decision)" "Now write the unit tests for step one." + +echo +echo "==========================================================" +echo "Done. Want to inspect routing decisions live? Run: planoai trace" +echo "=========================================================="