diff --git a/CHANGELOG.md b/CHANGELOG.md index bdc8fab..aa0a0fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,29 @@ patch bumps fix bugs or polish without behaviour change. --- +## Unreleased + +### Prestart port guard (LaunchAgent) + +`quenchforge install` now also writes a prestart guard to +`~/.config/quenchforge/prestart-guard.sh` and points the generated plist's +`ProgramArguments[0]` at it. Before exec'ing `quenchforge serve` the guard +boots out Ollama's launchd job and evicts any non-quenchforge listener on +port 11434, so quenchforge authoritatively reclaims the canonical +Ollama-API port on every (re)start and at login. + +Fixes the recurring contention where Ollama.app's auto-launched +`ollama serve` grabbed 11434 during a quenchforge restart window: because +the pre-bind check yields (exits 0) on a held port and +`KeepAlive.SuccessfulExit=false` then leaves the job dead, the squatter +would win and quenchforge stayed down until hand-evicted. The guard +removes the manual step. It only kills the actual squatter (never a +running quenchforge / `llama-server`) and is a no-op when Ollama isn't +present. Source: `cmd/quenchforge/prestart-guard.sh`; covered by +`TestInstall_WritesPlistAndPrestartGuard`. + +--- + ## v0.8.0 — AMD-discrete GPU mode + VRAM-tier-adaptive sizing (2026-05-31) Promotes the `v0.8.0-rc2` AMD-discrete GPU-mode revival (below) to a diff --git a/cmd/quenchforge/install.go b/cmd/quenchforge/install.go index c5cdb75..f42060c 100644 --- a/cmd/quenchforge/install.go +++ b/cmd/quenchforge/install.go @@ -22,8 +22,16 @@ import ( //go:embed plist_template.plist var plistTemplate []byte +//go:embed prestart-guard.sh +var prestartGuard []byte + const plistFilename = "com.cerid.quenchforge.plist" +// prestartGuardRelPath is where the guard is written under the operator's +// HOME. The generated plist's ProgramArguments[0] points here (via the +// REPLACE_ME → $USER substitution), so the two must stay in sync. +var prestartGuardRelPath = filepath.Join(".config", "quenchforge", "prestart-guard.sh") + func cmdInstall(args []string, stdout, stderr io.Writer) error { fs := flag.NewFlagSet("install", flag.ContinueOnError) fs.SetOutput(stderr) @@ -91,7 +99,20 @@ func cmdInstall(args []string, stdout, stderr io.Writer) error { return fmt.Errorf("install: write %s: %w", targetPath, err) } + // Write the prestart guard the plist's ProgramArguments[0] points at. + // It reclaims port 11434 from a squatter (e.g. Ollama) before exec'ing + // `quenchforge serve`. Executable; lives under the operator's HOME so + // the REPLACE_ME → $USER substitution in the plist resolves to it. + guardPath := filepath.Join(home, prestartGuardRelPath) + if err := os.MkdirAll(filepath.Dir(guardPath), 0o755); err != nil { + return fmt.Errorf("install: mkdir %s: %w", filepath.Dir(guardPath), err) + } + if err := os.WriteFile(guardPath, prestartGuard, 0o755); err != nil { + return fmt.Errorf("install: write prestart guard %s: %w", guardPath, err) + } + fmt.Fprintf(stdout, "Installed LaunchAgent at %s (%d bytes)\n", targetPath, len(data)) + fmt.Fprintf(stdout, "Installed prestart port guard at %s\n", guardPath) if !*skipUserSub { fmt.Fprintf(stdout, " Substituted REPLACE_ME → %s\n", os.Getenv("USER")) } diff --git a/cmd/quenchforge/install_test.go b/cmd/quenchforge/install_test.go new file mode 100644 index 0000000..aadf37d --- /dev/null +++ b/cmd/quenchforge/install_test.go @@ -0,0 +1,63 @@ +// Copyright (c) 2026 Cerid AI and the Quenchforge Contributors. +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "bytes" + "os" + "path/filepath" + "runtime" + "strings" + "testing" +) + +func TestInstall_WritesPlistAndPrestartGuard(t *testing.T) { + if runtime.GOOS != "darwin" { + t.Skip("install is macOS-only") + } + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("USER", "tester") + + var out, errb bytes.Buffer + if err := cmdInstall(nil, &out, &errb); err != nil { + t.Fatalf("cmdInstall: %v (stderr=%s)", err, errb.String()) + } + + // Plist written, REPLACE_ME substituted, ProgramArguments points at the + // guard under the operator's home (the /Users/$USER convention the + // template uses for all its paths). + plist, err := os.ReadFile(filepath.Join(home, "Library", "LaunchAgents", plistFilename)) + if err != nil { + t.Fatalf("read plist: %v", err) + } + ps := string(plist) + if strings.Contains(ps, "REPLACE_ME") { + t.Errorf("plist still contains a REPLACE_ME placeholder") + } + wantRef := "/Users/tester/" + filepath.ToSlash(prestartGuardRelPath) + if !strings.Contains(ps, wantRef) { + t.Errorf("plist ProgramArguments should reference guard %q\n%s", wantRef, ps) + } + + // Guard written to the operator's HOME, executable, with the eviction + // logic intact. + guardAbs := filepath.Join(home, prestartGuardRelPath) + info, err := os.Stat(guardAbs) + if err != nil { + t.Fatalf("stat guard: %v", err) + } + if info.Mode().Perm()&0o100 == 0 { + t.Errorf("guard is not executable: mode %v", info.Mode()) + } + guard, err := os.ReadFile(guardAbs) + if err != nil { + t.Fatalf("read guard: %v", err) + } + for _, want := range []string{"com.ollama.ollama", "lsof", "exec "} { + if !strings.Contains(string(guard), want) { + t.Errorf("guard script missing expected content %q", want) + } + } +} diff --git a/cmd/quenchforge/plist_template.plist b/cmd/quenchforge/plist_template.plist index da36a58..16e7084 100644 --- a/cmd/quenchforge/plist_template.plist +++ b/cmd/quenchforge/plist_template.plist @@ -36,9 +36,19 @@ Label com.cerid.quenchforge + ProgramArguments - /usr/local/bin/quenchforge + /Users/REPLACE_ME/.config/quenchforge/prestart-guard.sh serve @@ -75,7 +85,10 @@ message when the port is held by Ollama or a stale quenchforge — KeepAlive= would respawn-loop on that, defeating the operator-friendly error. SuccessfulExit=false keeps the supervisor - restart-on-crash semantics while letting clean exits stick. + restart-on-crash semantics while letting clean exits stick. With the + prestart guard reclaiming the port first, that yield path now only + triggers when a squatter cannot be evicted (e.g. another user's + process), which is the correct time to surface the error and stop. --> KeepAlive diff --git a/cmd/quenchforge/prestart-guard.sh b/cmd/quenchforge/prestart-guard.sh new file mode 100755 index 0000000..60d630f --- /dev/null +++ b/cmd/quenchforge/prestart-guard.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# quenchforge prestart guard +# ------------------------------------------------------------------------- +# Reclaims the gateway port (default 11434, the canonical Ollama-API port) +# from a squatter — in practice Ollama.app's auto-launched `ollama serve` +# child — BEFORE handing off to `quenchforge serve`. +# +# Why this exists: quenchforge's own pre-bind check (v0.7.2+) deliberately +# exits 0 when the port is already held, and the LaunchAgent's +# KeepAlive.SuccessfulExit=false then leaves it dead. That makes quenchforge +# yield to a squatter. Wiring this guard as the LaunchAgent's +# ProgramArguments[0] means the port is reclaimed on every (re)start and at +# login, so quenchforge stays authoritative on the canonical port without +# ceding it — and without the operator hand-evicting Ollama during restart +# windows. +# +# Install: see packaging/macos/README.md. Idempotent and safe to run when +# no squatter is present. +set -u + +# launchd hands jobs a minimal PATH; lsof lives in /usr/sbin, launchctl in +# /bin. Use an explicit PATH so the guard works regardless of the plist's. +export PATH="/usr/sbin:/usr/bin:/bin:/usr/local/bin" + +PORT="${QUENCHFORGE_GUARD_PORT:-11434}" +QF_BIN="${QUENCHFORGE_BIN:-/usr/local/bin/quenchforge}" +UID_NUM="$(id -u)" + +log() { printf '[prestart-guard] %s\n' "$*" >&2; } + +# 1. Boot out Ollama's launchd job so it cannot immediately respawn the +# serve child we are about to evict. Best-effort: not-loaded is fine. +if launchctl print "gui/${UID_NUM}/com.ollama.ollama" >/dev/null 2>&1; then + log "booting out com.ollama.ollama" + launchctl bootout "gui/${UID_NUM}/com.ollama.ollama" 2>/dev/null || true +fi + +# 2. Evict any NON-quenchforge listener still holding the port. We never +# kill our own quenchforge / llama-server processes (a concurrent +# instance or our own slots), only a foreign squatter. +for pid in $(lsof -ti "tcp:${PORT}" -sTCP:LISTEN 2>/dev/null); do + cmd="$(ps -p "$pid" -o comm= 2>/dev/null)" + case "$cmd" in + *quenchforge* | *llama-server*) + : # ours — leave it + ;; + *) + log "evicting squatter on :${PORT} — pid=${pid} (${cmd:-unknown})" + kill "$pid" 2>/dev/null || true + ;; + esac +done + +# 3. Brief settle so the kernel releases the port before quenchforge's +# own pre-bind check runs. +sleep 1 + +# 4. Hand off. exec so launchd supervises quenchforge directly (PID, +# signals, KeepAlive, ProcessType all apply to the server, not this +# wrapper). Args after the guard in ProgramArguments flow through, so +# the plist provides `serve`. +log "starting: ${QF_BIN} $*" +exec "${QF_BIN}" "$@" diff --git a/packaging/macos/README.md b/packaging/macos/README.md index 98bdd7f..b132c7e 100644 --- a/packaging/macos/README.md +++ b/packaging/macos/README.md @@ -27,6 +27,32 @@ The canonical source for the embedded template is [`cmd/quenchforge/plist_template.plist`](../../cmd/quenchforge/plist_template.plist) — inspect it before installing if you want to see what will land. +### Prestart port guard + +`quenchforge install` also writes a small **prestart guard** to +`~/.config/quenchforge/prestart-guard.sh`, and the generated plist's +`ProgramArguments[0]` points at it (rather than the bare binary). On every +(re)start and at login the guard: + +1. boots out Ollama's launchd job (`com.ollama.ollama`) if present, so it + can't immediately respawn its `ollama serve` child; +2. evicts any **non-quenchforge** listener still holding port `11434`; +3. `exec`s `quenchforge serve`. + +Why: quenchforge's pre-bind check deliberately exits 0 (yields) when the +port is already held, and `KeepAlive.SuccessfulExit=false` then leaves it +dead — so without the guard, anything that grabs `11434` during a restart +window (classically Ollama.app's auto-launched server) wins and quenchforge +stays down. The guard makes quenchforge authoritatively reclaim the +canonical Ollama-API port without the operator hand-evicting Ollama. + +The guard only kills the actual port squatter (never a running quenchforge +or `llama-server`) and only boots out Ollama if its job exists, so it's a +no-op on a machine without Ollama. Source: +[`cmd/quenchforge/prestart-guard.sh`](../../cmd/quenchforge/prestart-guard.sh). +Operators who intentionally run Ollama alongside quenchforge can edit the +plist's `ProgramArguments` back to the bare `quenchforge` + `serve`. + To uninstall: ```bash