From 5db7d977ce0ec056614624c0538b45d8898fbd82 Mon Sep 17 00:00:00 2001
From: Aleks Petrov <mr.alex.petrov@gmail.com>
Date: Wed, 29 Apr 2026 22:25:51 +0200
Subject: [PATCH] =?UTF-8?q?feat(executor):=20Opus=20plans,=20Sonnet=20exec?=
 =?UTF-8?q?utes=20=E2=80=94=20cut=20Pilot=20token=20spend=20(GH-2432)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Routes Opus to where reasoning matters (epic planning) and Sonnet to verbose
execution to address the 4× token spike in Pilot subprocess sessions.

- ClaudeCodeConfig gains AllowedTools + MCPConfigPath; ExecuteOptions plumbs
  them through and the Claude Code args builder emits --allowedTools and
  --mcp-config when set. Default execution toolbox excludes MCPs (Read,
  Write, Edit, Bash, Grep, Glob, Task) to drop per-turn context bloat.
- New PlanningConfig (default model claude-opus-4-7). PlanEpic now passes
  --model + --allowedTools=Read,Grep,Glob and exports ANTHROPIC_MODEL so
  Pilot's global Sonnet env doesn't override it on Node's last-write lookup.
- model_routing.complex default flipped from claude-opus-4-6 to
  claude-sonnet-4-6 — Opus is reserved for planning only.
- Retry counter persisted via labels (pilot-retry-1, pilot-retry-2,
  pilot-retry-exhausted). Survives `pilot start` restarts; previous
  in-memory retryReadyCount silently reset on restart and let pathological
  issues consume Opus indefinitely. Auto-merger strips retry labels on
  successful merge so a future regression starts fresh.
- HooksConfig.RunTestsOnStop default flipped to false — Stop-hook tests
  forced long unproductive turns into the Claude session.
---
 internal/adapters/github/gh2432_test.go | 174 ++++++++++++++++++++++++
 internal/adapters/github/poller.go      |  78 +++++++++--
 internal/adapters/github/poller_test.go |  10 +-
 internal/adapters/github/types.go       |  10 ++
 internal/autopilot/auto_merger.go       |  12 ++
 internal/executor/backend.go            |  57 +++++++-
 internal/executor/backend_claudecode.go |  10 ++
 internal/executor/epic.go               |  20 ++-
 internal/executor/gh2432_test.go        | 128 +++++++++++++++++
 internal/executor/hooks.go              |   6 +-
 internal/executor/hooks_test.go         |   5 +-
 internal/executor/runner.go             |  48 +++++--
 12 files changed, 524 insertions(+), 34 deletions(-)
 create mode 100644 internal/adapters/github/gh2432_test.go
 create mode 100644 internal/executor/gh2432_test.go

diff --git a/internal/adapters/github/gh2432_test.go b/internal/adapters/github/gh2432_test.go
new file mode 100644
index 00000000..d09d0520
--- /dev/null
+++ b/internal/adapters/github/gh2432_test.go
@@ -0,0 +1,174 @@
+package github
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/qf-studio/pilot/internal/testutil"
+)
+
+// GH-2432: Retry counter is persisted via labels (pilot-retry-1, pilot-retry-2,
+// pilot-retry-exhausted) so the budget survives `pilot start` restarts.
+
+// First retry on a fresh pilot-retry-ready issue should add pilot-retry-1.
+func TestPoller_RetryLabel_FirstAttemptAddsRetry1(t *testing.T) {
+	now := time.Now()
+	issues := []*Issue{
+		{Number: 42, State: "open", Title: "Retry-ready", Labels: []Label{{Name: "pilot"}, {Name: LabelRetryReady}}, CreatedAt: now.Add(-1 * time.Hour)},
+	}
+
+	var addedLabels atomic.Value
+	addedLabels.Store([]string(nil))
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		switch {
+		case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/issues/42/labels"):
+			var body struct {
+				Labels []string `json:"labels"`
+			}
+			_ = json.NewDecoder(r.Body).Decode(&body)
+			prev, _ := addedLabels.Load().([]string)
+			addedLabels.Store(append(prev, body.Labels...))
+			w.WriteHeader(http.StatusOK)
+			_, _ = w.Write([]byte(`[]`))
+			return
+		case r.Method == http.MethodDelete:
+			w.WriteHeader(http.StatusOK)
+			return
+		case r.URL.Path == "/search/issues":
+			_, _ = w.Write([]byte(`{"total_count": 0}`))
+			return
+		}
+		_ = json.NewEncoder(w).Encode(issues)
+	}))
+	defer server.Close()
+
+	client := NewClientWithBaseURL(testutil.FakeGitHubToken, server.URL)
+	poller, _ := NewPoller(client, "owner/repo", "pilot", 30*time.Second, WithRetryGracePeriod(0))
+
+	issue, err := poller.findOldestUnprocessedIssue(context.Background())
+	if err != nil {
+		t.Fatalf("findOldestUnprocessedIssue() error = %v", err)
+	}
+	if issue == nil || issue.Number != 42 {
+		t.Fatalf("expected #42 to be dispatched, got %v", issue)
+	}
+
+	added, _ := addedLabels.Load().([]string)
+	foundRetry1 := false
+	for _, l := range added {
+		if l == LabelRetry1 {
+			foundRetry1 = true
+		}
+	}
+	if !foundRetry1 {
+		t.Errorf("expected pilot-retry-1 to be added, got labels=%v", added)
+	}
+}
+
+// pilot-retry-2 + pilot-retry-ready → escalate to pilot-retry-exhausted, no dispatch.
+func TestPoller_RetryLabel_Retry2EscalatesToExhausted(t *testing.T) {
+	now := time.Now()
+	issues := []*Issue{
+		{Number: 42, State: "open", Title: "Retry-ready", Labels: []Label{{Name: "pilot"}, {Name: LabelRetryReady}, {Name: LabelRetry2}}, CreatedAt: now.Add(-1 * time.Hour)},
+		{Number: 43, State: "open", Title: "Available", Labels: []Label{{Name: "pilot"}}, CreatedAt: now},
+	}
+
+	var addedLabels atomic.Value
+	addedLabels.Store([]string(nil))
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		switch {
+		case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/issues/42/labels"):
+			var body struct {
+				Labels []string `json:"labels"`
+			}
+			_ = json.NewDecoder(r.Body).Decode(&body)
+			prev, _ := addedLabels.Load().([]string)
+			addedLabels.Store(append(prev, body.Labels...))
+			w.WriteHeader(http.StatusOK)
+			_, _ = w.Write([]byte(`[]`))
+			return
+		case r.Method == http.MethodDelete:
+			w.WriteHeader(http.StatusOK)
+			return
+		case r.URL.Path == "/search/issues":
+			_, _ = w.Write([]byte(`{"total_count": 0}`))
+			return
+		}
+		_ = json.NewEncoder(w).Encode(issues)
+	}))
+	defer server.Close()
+
+	client := NewClientWithBaseURL(testutil.FakeGitHubToken, server.URL)
+	poller, _ := NewPoller(client, "owner/repo", "pilot", 30*time.Second, WithRetryGracePeriod(0))
+
+	issue, err := poller.findOldestUnprocessedIssue(context.Background())
+	if err != nil {
+		t.Fatalf("findOldestUnprocessedIssue() error = %v", err)
+	}
+	if issue == nil {
+		t.Fatal("expected #43 to be dispatched (skipping exhausted #42)")
+	}
+	if issue.Number != 43 {
+		t.Errorf("got issue #%d, want #43", issue.Number)
+	}
+
+	added, _ := addedLabels.Load().([]string)
+	foundExhausted := false
+	for _, l := range added {
+		if l == LabelRetryExhausted {
+			foundExhausted = true
+		}
+	}
+	if !foundExhausted {
+		t.Errorf("expected pilot-retry-exhausted to be stamped on #42, got %v", added)
+	}
+}
+
+// pilot-retry-exhausted is terminal — never dispatched.
+func TestPoller_RetryLabel_ExhaustedIsTerminal(t *testing.T) {
+	now := time.Now()
+	issues := []*Issue{
+		{Number: 42, State: "open", Title: "Exhausted", Labels: []Label{{Name: "pilot"}, {Name: LabelRetryReady}, {Name: LabelRetryExhausted}}, CreatedAt: now.Add(-1 * time.Hour)},
+		{Number: 43, State: "open", Title: "Available", Labels: []Label{{Name: "pilot"}}, CreatedAt: now},
+	}
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		_ = json.NewEncoder(w).Encode(issues)
+	}))
+	defer server.Close()
+
+	client := NewClientWithBaseURL(testutil.FakeGitHubToken, server.URL)
+	poller, _ := NewPoller(client, "owner/repo", "pilot", 30*time.Second, WithRetryGracePeriod(0))
+
+	issue, err := poller.findOldestUnprocessedIssue(context.Background())
+	if err != nil {
+		t.Fatalf("findOldestUnprocessedIssue() error = %v", err)
+	}
+	if issue == nil || issue.Number != 43 {
+		t.Errorf("expected #43, got %v", issue)
+	}
+}
+
+// RetryStateLabels exposes the canonical list for cleanup on merge.
+func TestRetryStateLabels_OrderingAndCompleteness(t *testing.T) {
+	want := []string{LabelRetry1, LabelRetry2, LabelRetryExhausted}
+	if len(RetryStateLabels) != len(want) {
+		t.Fatalf("RetryStateLabels len = %d, want %d", len(RetryStateLabels), len(want))
+	}
+	for i, l := range want {
+		if RetryStateLabels[i] != l {
+			t.Errorf("RetryStateLabels[%d] = %q, want %q", i, RetryStateLabels[i], l)
+		}
+	}
+}
diff --git a/internal/adapters/github/poller.go b/internal/adapters/github/poller.go
index 6b6fc5d0..76da979c 100644
--- a/internal/adapters/github/poller.go
+++ b/internal/adapters/github/poller.go
@@ -1324,6 +1324,11 @@ func (p *Poller) shouldRetryFailedIssue(ctx context.Context, issue *Issue) bool
 // shouldRetryRetryReadyIssue checks if a pilot-retry-ready issue should be auto-retried.
 // Returns true if the issue should be retried (label removed), false if it should be skipped.
 // GH-2276: Issues with pilot-retry-ready (PR closed without merge) get retried up to maxRetryReadyRetries times.
+//
+// GH-2432: Retry counter is persisted via GitHub labels (pilot-retry-1, -2,
+// -exhausted) so the count survives `pilot start` restarts. The previous
+// in-memory map silently reset on restart, allowing pathological issues to
+// consume Opus indefinitely.
 func (p *Poller) shouldRetryRetryReadyIssue(ctx context.Context, issue *Issue) bool {
 	// Don't retry closed issues
 	if issue.State != "open" {
@@ -1339,15 +1344,46 @@ func (p *Poller) shouldRetryRetryReadyIssue(ctx context.Context, issue *Issue) b
 		return false
 	}
 
-	p.mu.RLock()
-	retries := p.retryReadyCount[issue.Number]
-	p.mu.RUnlock()
+	// GH-2432: terminal state — exhausted retries never retry again.
+	if HasLabel(issue, LabelRetryExhausted) {
+		p.logger.Warn("Issue is pilot-retry-exhausted, skipping",
+			slog.Int("number", issue.Number),
+		)
+		return false
+	}
 
-	if retries >= p.maxRetryReadyRetries {
-		p.logger.Warn("Issue has reached max retry-ready retries, skipping",
+	// Determine the next retry-counter label based on current state.
+	var currentRetryLabel, nextRetryLabel string
+	switch {
+	case HasLabel(issue, LabelRetry2):
+		currentRetryLabel = LabelRetry2
+		nextRetryLabel = LabelRetryExhausted
+	case HasLabel(issue, LabelRetry1):
+		currentRetryLabel = LabelRetry1
+		nextRetryLabel = LabelRetry2
+	default:
+		nextRetryLabel = LabelRetry1
+	}
+
+	// If escalating to exhausted, mark the label and skip dispatch.
+	if nextRetryLabel == LabelRetryExhausted {
+		if err := p.client.RemoveLabel(ctx, p.owner, p.repo, issue.Number, currentRetryLabel); err != nil {
+			p.logger.Warn("Failed to remove prior retry label",
+				slog.Int("number", issue.Number),
+				slog.String("label", currentRetryLabel),
+				slog.Any("error", err),
+			)
+		}
+		if err := p.client.AddLabels(ctx, p.owner, p.repo, issue.Number, []string{LabelRetryExhausted}); err != nil {
+			p.logger.Warn("Failed to add pilot-retry-exhausted label",
+				slog.Int("number", issue.Number),
+				slog.Any("error", err),
+			)
+		}
+		// Also clear pilot-retry-ready so the poller doesn't keep finding it.
+		_ = p.client.RemoveLabel(ctx, p.owner, p.repo, issue.Number, LabelRetryReady)
+		p.logger.Warn("Issue exhausted retry budget — escalated to pilot-retry-exhausted",
 			slog.Int("number", issue.Number),
-			slog.Int("retries", retries),
-			slog.Int("max", p.maxRetryReadyRetries),
 		)
 		return false
 	}
@@ -1357,7 +1393,26 @@ func (p *Poller) shouldRetryRetryReadyIssue(ctx context.Context, issue *Issue) b
 		return false
 	}
 
-	// Remove pilot-retry-ready label and increment retry count
+	// Swap retry-N label: remove current (if any), add next.
+	if currentRetryLabel != "" {
+		if err := p.client.RemoveLabel(ctx, p.owner, p.repo, issue.Number, currentRetryLabel); err != nil {
+			p.logger.Warn("Failed to remove prior retry label",
+				slog.Int("number", issue.Number),
+				slog.String("label", currentRetryLabel),
+				slog.Any("error", err),
+			)
+		}
+	}
+	if err := p.client.AddLabels(ctx, p.owner, p.repo, issue.Number, []string{nextRetryLabel}); err != nil {
+		p.logger.Warn("Failed to add retry label",
+			slog.Int("number", issue.Number),
+			slog.String("label", nextRetryLabel),
+			slog.Any("error", err),
+		)
+		// Continue anyway; we'd rather retry than block on a label flake.
+	}
+
+	// Remove pilot-retry-ready label so the poller doesn't loop the same issue.
 	if err := p.client.RemoveLabel(ctx, p.owner, p.repo, issue.Number, LabelRetryReady); err != nil {
 		p.logger.Warn("Failed to remove pilot-retry-ready label for retry",
 			slog.Int("number", issue.Number),
@@ -1366,8 +1421,10 @@ func (p *Poller) shouldRetryRetryReadyIssue(ctx context.Context, issue *Issue) b
 		return false
 	}
 
+	// Keep the legacy in-memory counter in sync so existing tests/state observers
+	// remain consistent. GH-2432: this is now a mirror, not the source of truth.
 	p.mu.Lock()
-	p.retryReadyCount[issue.Number] = retries + 1
+	p.retryReadyCount[issue.Number]++
 	p.mu.Unlock()
 
 	// Clear from processed map so the issue can be re-picked
@@ -1375,8 +1432,7 @@ func (p *Poller) shouldRetryRetryReadyIssue(ctx context.Context, issue *Issue) b
 
 	p.logger.Info("Auto-retrying pilot-retry-ready issue",
 		slog.Int("number", issue.Number),
-		slog.Int("retry", retries+1),
-		slog.Int("max", p.maxRetryReadyRetries),
+		slog.String("retry_label", nextRetryLabel),
 	)
 
 	return true
diff --git a/internal/adapters/github/poller_test.go b/internal/adapters/github/poller_test.go
index e1f98d0f..3f20b3e5 100644
--- a/internal/adapters/github/poller_test.go
+++ b/internal/adapters/github/poller_test.go
@@ -2337,8 +2337,11 @@ func TestPoller_AutoRetryRetryReadyIssue_FirstRetry(t *testing.T) {
 
 func TestPoller_AutoRetryRetryReadyIssue_RetryLimitReached(t *testing.T) {
 	now := time.Now()
+	// GH-2432: Retry state is now persisted via labels — issue already at
+	// pilot-retry-2 means the next escalation goes to pilot-retry-exhausted
+	// (no dispatch).
 	issues := []*Issue{
-		{Number: 42, State: "open", Title: "Retry-ready issue", Labels: []Label{{Name: "pilot"}, {Name: LabelRetryReady}}, CreatedAt: now.Add(-1 * time.Hour)},
+		{Number: 42, State: "open", Title: "Retry-ready issue", Labels: []Label{{Name: "pilot"}, {Name: LabelRetryReady}, {Name: LabelRetry2}}, CreatedAt: now.Add(-1 * time.Hour)},
 		{Number: 43, State: "open", Title: "Available issue", Labels: []Label{{Name: "pilot"}}, CreatedAt: now},
 	}
 
@@ -2353,11 +2356,6 @@ func TestPoller_AutoRetryRetryReadyIssue_RetryLimitReached(t *testing.T) {
 		WithMaxRetryReadyRetries(3),
 	)
 
-	// Simulate: already retried 3 times
-	poller.mu.Lock()
-	poller.retryReadyCount[42] = 3
-	poller.mu.Unlock()
-
 	issue, err := poller.findOldestUnprocessedIssue(context.Background())
 	if err != nil {
 		t.Fatalf("findOldestUnprocessedIssue() error = %v", err)
diff --git a/internal/adapters/github/types.go b/internal/adapters/github/types.go
index 4e368390..11acd0d8 100644
--- a/internal/adapters/github/types.go
+++ b/internal/adapters/github/types.go
@@ -98,8 +98,18 @@ const (
 	LabelTitleRejected = "pilot-title-rejected" // GH-2363: title guard escalation; blocks auto-retry until human edits title
 	LabelSuperseded    = "pilot-superseded"     // GH-2402: sub-issue auto-closed because parent epic already shipped the work
 	LabelBlocked       = "pilot-blocked"        // GH-2402: deterministic failure that won't change between retries (e.g. non-conventional title)
+
+	// GH-2432: Retry-counter labels persist retry state across `pilot start`
+	// restarts (the in-memory retryReadyCount map was lost on restart, letting
+	// broken issues consume Opus indefinitely).
+	LabelRetry1         = "pilot-retry-1"
+	LabelRetry2         = "pilot-retry-2"
+	LabelRetryExhausted = "pilot-retry-exhausted"
 )
 
+// RetryStateLabels lists the retry-counter labels in escalation order. GH-2432.
+var RetryStateLabels = []string{LabelRetry1, LabelRetry2, LabelRetryExhausted}
+
 // Priority mapping from GitHub labels
 type Priority int
 
diff --git a/internal/autopilot/auto_merger.go b/internal/autopilot/auto_merger.go
index 03efb26d..af6283bf 100644
--- a/internal/autopilot/auto_merger.go
+++ b/internal/autopilot/auto_merger.go
@@ -102,6 +102,18 @@ func (m *AutoMerger) MergePR(ctx context.Context, prState *PRState) error {
 	}
 
 	m.log.Info("PR merged", "pr", prState.PRNumber, "method", mergeMethod)
+
+	// GH-2432: Strip retry-counter labels off the linked issue so a future
+	// regression on the same issue starts the retry budget from zero again.
+	if prState.IssueNumber > 0 {
+		for _, label := range github.RetryStateLabels {
+			if err := m.ghClient.RemoveLabel(ctx, m.owner, m.repo, prState.IssueNumber, label); err != nil {
+				m.log.Debug("retry label cleanup: remove failed (likely not present)",
+					"issue", prState.IssueNumber, "label", label, "error", err)
+			}
+		}
+	}
+
 	return nil
 }
 
diff --git a/internal/executor/backend.go b/internal/executor/backend.go
index c74583e0..73201a3e 100644
--- a/internal/executor/backend.go
+++ b/internal/executor/backend.go
@@ -69,6 +69,14 @@ type ExecuteOptions struct {
 	// The callback receives the process PID and the watchdog timeout duration.
 	// Called BEFORE the process is killed, allowing for alert emission.
 	WatchdogCallback func(pid int, watchdogTimeout time.Duration)
+
+	// AllowedTools restricts the set of tools the subprocess may use.
+	// Maps to the Claude Code CLI --allowedTools flag. GH-2432.
+	AllowedTools []string
+
+	// MCPConfigPath is passed to the subprocess via --mcp-config. When empty,
+	// no MCP servers are loaded (avoiding tool-definition token bloat). GH-2432.
+	MCPConfigPath string
 }
 
 // BackendEvent represents a streaming event from the backend.
@@ -261,6 +269,10 @@ type BackendConfig struct {
 	// Hooks contains Claude Code hooks settings for quality gates during execution
 	Hooks *HooksConfig `yaml:"hooks,omitempty"`
 
+	// Planning controls the model used for epic planning subprocesses (GH-2432).
+	// Planning gets Opus while execution stays on Sonnet for cost savings.
+	Planning *PlanningConfig `yaml:"planning,omitempty"`
+
 	// UseWorktree enables git worktree isolation for execution.
 	// When true, Pilot creates a temporary worktree for each task, allowing
 	// execution even when the user has uncommitted changes in their working directory.
@@ -560,6 +572,44 @@ type ClaudeCodeConfig struct {
 	// on long runs. When true, such tasks fall back to the lean non-Navigator
 	// prompt. Default: false (Navigator context always injected when available).
 	DisableNavigatorForEpic bool `yaml:"disable_navigator_for_epic,omitempty"`
+
+	// AllowedTools restricts the set of tools the Claude Code subprocess may use.
+	// Maps to the CLI's --allowedTools flag. Default (when unset, see
+	// DefaultAllowedToolsExecution) keeps the standard execution toolbox while
+	// excluding heavy MCP-loaded tools, cutting per-turn token cost. GH-2432.
+	AllowedTools []string `yaml:"allowed_tools,omitempty"`
+
+	// MCPConfigPath points to an MCP server config file passed via --mcp-config.
+	// When empty, the subprocess does NOT load any MCP servers — drastically
+	// reducing tool-definition tokens replayed on every turn. GH-2432.
+	MCPConfigPath string `yaml:"mcp_config_path,omitempty"`
+}
+
+// PlanningConfig controls the model and behavior for epic planning subprocesses.
+// Planning runs benefit from stronger reasoning (Opus); execution runs stay on
+// the cheaper, verbose-friendly Sonnet. GH-2432.
+type PlanningConfig struct {
+	// Model is the Claude model name passed to the planning subprocess via
+	// --model and the ANTHROPIC_MODEL env var. Default: "claude-opus-4-7".
+	Model string `yaml:"model,omitempty"`
+}
+
+// DefaultPlanningConfig returns the default planning config (Opus 4.7).
+func DefaultPlanningConfig() *PlanningConfig {
+	return &PlanningConfig{Model: "claude-opus-4-7"}
+}
+
+// DefaultAllowedToolsExecution is the default --allowedTools list for execution
+// subprocesses. Excludes MCP and Web* tools to cut per-turn context bloat.
+// GH-2432.
+func DefaultAllowedToolsExecution() []string {
+	return []string{"Read", "Write", "Edit", "Bash", "Grep", "Glob", "Task"}
+}
+
+// DefaultAllowedToolsPlanning is the default --allowedTools list for the
+// planning subprocess. Planning should not write code. GH-2432.
+func DefaultAllowedToolsPlanning() []string {
+	return []string{"Read", "Grep", "Glob"}
 }
 
 // QwenCodeConfig contains Qwen Code backend configuration.
@@ -607,7 +657,8 @@ func DefaultBackendConfig() *BackendConfig {
 		PrePushLint:     &prePushLint,
 		PlanningTimeout: 2 * time.Minute,
 		ClaudeCode: &ClaudeCodeConfig{
-			Command: "claude",
+			Command:      "claude",
+			AllowedTools: DefaultAllowedToolsExecution(),
 		},
 		QwenCode: &QwenCodeConfig{
 			Command: "qwen",
@@ -627,6 +678,7 @@ func DefaultBackendConfig() *BackendConfig {
 		IntentJudge:      DefaultIntentJudgeConfig(),
 		Navigator:        DefaultNavigatorConfig(),
 		Hooks:            DefaultHooksConfig(),
+		Planning:         DefaultPlanningConfig(),
 		Retry:            DefaultRetryConfig(),
 		Stagnation:       DefaultStagnationConfig(),
 		Simplification:   DefaultSimplifyConfig(),
@@ -649,7 +701,8 @@ func DefaultModelRoutingConfig() *ModelRoutingConfig {
 		Trivial: "claude-haiku",
 		Simple:  "claude-sonnet-4-6",
 		Medium:  "claude-sonnet-4-6",
-		Complex: "claude-opus-4-6",
+		// GH-2432: Sonnet for "complex" too — Opus is reserved for planning only.
+		Complex: "claude-sonnet-4-6",
 	}
 }
 
diff --git a/internal/executor/backend_claudecode.go b/internal/executor/backend_claudecode.go
index f28994d9..8dd52240 100644
--- a/internal/executor/backend_claudecode.go
+++ b/internal/executor/backend_claudecode.go
@@ -355,6 +355,16 @@ func (b *ClaudeCodeBackend) executeWithFromPR(ctx context.Context, opts ExecuteO
 		b.log.Info("Using routed effort", slog.String("effort", opts.Effort))
 	}
 
+	// GH-2432: --allowedTools restricts the subprocess toolbox; --mcp-config
+	// scopes which MCP servers (if any) the subprocess loads. Both cut the
+	// per-turn token cost considerably when MCPs are not needed.
+	if len(opts.AllowedTools) > 0 {
+		args = append(args, "--allowedTools", strings.Join(opts.AllowedTools, ","))
+	}
+	if opts.MCPConfigPath != "" {
+		args = append(args, "--mcp-config", opts.MCPConfigPath)
+	}
+
 	args = append(args, b.config.ExtraArgs...)
 
 	cmd := exec.CommandContext(ctx, b.config.Command, args...)
diff --git a/internal/executor/epic.go b/internal/executor/epic.go
index dd8d1ea0..266db1c4 100644
--- a/internal/executor/epic.go
+++ b/internal/executor/epic.go
@@ -5,6 +5,7 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"os"
 	"os/exec"
 	"regexp"
 	"strconv"
@@ -201,10 +202,25 @@ func (r *Runner) PlanEpic(ctx context.Context, task *Task, executionPath string)
 		claudeCmd = r.config.ClaudeCode.Command
 	}
 
-	// Run Claude Code with --print flag for planning
-	args := []string{"--print", "-p", prompt}
+	// GH-2432: Planning gets Opus for stronger reasoning; execution stays on
+	// Sonnet (set via the regular runner path). The model is also exported via
+	// ANTHROPIC_MODEL because Pilot's global env may otherwise win on Node's
+	// last-write lookup inside Claude Code (see backend_claudecode.go).
+	planningModel := "claude-opus-4-7"
+	if r.config != nil && r.config.Planning != nil && r.config.Planning.Model != "" {
+		planningModel = r.config.Planning.Model
+	}
+
+	// Run Claude Code with --print flag for planning. Restrict tools to
+	// read-only — planning must not write code.
+	args := []string{
+		"--print", "-p", prompt,
+		"--model", planningModel,
+		"--allowedTools", strings.Join(DefaultAllowedToolsPlanning(), ","),
+	}
 
 	cmd := exec.CommandContext(ctx, claudeCmd, args...)
+	cmd.Env = append(os.Environ(), "ANTHROPIC_MODEL="+planningModel)
 
 	// Set working directory - use executionPath which respects worktree isolation
 	if executionPath != "" {
diff --git a/internal/executor/gh2432_test.go b/internal/executor/gh2432_test.go
new file mode 100644
index 00000000..e994cbca
--- /dev/null
+++ b/internal/executor/gh2432_test.go
@@ -0,0 +1,128 @@
+package executor
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+)
+
+// GH-2432: "Opus plans, Sonnet executes" — verify the Sonnet/Opus split,
+// AllowedTools/MCPConfig wiring, and Stop-hook default flip.
+
+func TestDefaultBackendConfig_SonnetForComplex(t *testing.T) {
+	cfg := DefaultBackendConfig()
+	if got, want := cfg.ModelRouting.Complex, "claude-sonnet-4-6"; got != want {
+		t.Errorf("ModelRouting.Complex = %q, want %q (GH-2432: Opus reserved for planning only)", got, want)
+	}
+}
+
+func TestDefaultPlanningConfig_OpusModel(t *testing.T) {
+	cfg := DefaultPlanningConfig()
+	if cfg.Model != "claude-opus-4-7" {
+		t.Errorf("Planning.Model default = %q, want claude-opus-4-7", cfg.Model)
+	}
+}
+
+func TestDefaultBackendConfig_PlanningWired(t *testing.T) {
+	cfg := DefaultBackendConfig()
+	if cfg.Planning == nil {
+		t.Fatal("DefaultBackendConfig() should populate Planning (GH-2432)")
+	}
+	if cfg.Planning.Model != "claude-opus-4-7" {
+		t.Errorf("DefaultBackendConfig.Planning.Model = %q, want claude-opus-4-7", cfg.Planning.Model)
+	}
+}
+
+func TestDefaultClaudeCodeConfig_AllowedToolsExecution(t *testing.T) {
+	cfg := DefaultBackendConfig()
+	if cfg.ClaudeCode == nil {
+		t.Fatal("DefaultBackendConfig.ClaudeCode is nil")
+	}
+	want := DefaultAllowedToolsExecution()
+	if !reflect.DeepEqual(cfg.ClaudeCode.AllowedTools, want) {
+		t.Errorf("ClaudeCode.AllowedTools = %v, want %v", cfg.ClaudeCode.AllowedTools, want)
+	}
+	if cfg.ClaudeCode.MCPConfigPath != "" {
+		t.Errorf("ClaudeCode.MCPConfigPath = %q, want empty (no MCPs by default)", cfg.ClaudeCode.MCPConfigPath)
+	}
+}
+
+func TestDefaultAllowedTools_PlanningIsReadOnly(t *testing.T) {
+	tools := DefaultAllowedToolsPlanning()
+	for _, banned := range []string{"Write", "Edit", "Bash"} {
+		for _, tool := range tools {
+			if tool == banned {
+				t.Errorf("planning tools contain %q — planning must be read-only", banned)
+			}
+		}
+	}
+	for _, required := range []string{"Read", "Grep", "Glob"} {
+		found := false
+		for _, tool := range tools {
+			if tool == required {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Errorf("planning tools missing required tool %q", required)
+		}
+	}
+}
+
+func TestExecuteOptions_HasAllowedToolsAndMCPConfigPath(t *testing.T) {
+	// Compile-time check via field access.
+	opts := ExecuteOptions{
+		AllowedTools:  []string{"Read", "Bash"},
+		MCPConfigPath: "/tmp/mcp.json",
+	}
+	if len(opts.AllowedTools) != 2 || opts.MCPConfigPath != "/tmp/mcp.json" {
+		t.Error("ExecuteOptions.AllowedTools/MCPConfigPath not wired (GH-2432)")
+	}
+}
+
+func TestRunner_ExecutionToolOptions_FromConfig(t *testing.T) {
+	r := &Runner{
+		config: &BackendConfig{
+			ClaudeCode: &ClaudeCodeConfig{
+				AllowedTools:  []string{"Read", "Edit"},
+				MCPConfigPath: "/path/to/mcp.json",
+			},
+		},
+	}
+	allowed, mcp := r.executionToolOptions()
+	if !reflect.DeepEqual(allowed, []string{"Read", "Edit"}) {
+		t.Errorf("allowed = %v, want [Read Edit]", allowed)
+	}
+	if mcp != "/path/to/mcp.json" {
+		t.Errorf("mcp = %q, want /path/to/mcp.json", mcp)
+	}
+}
+
+func TestRunner_ExecutionToolOptions_NilSafe(t *testing.T) {
+	r := &Runner{config: nil}
+	allowed, mcp := r.executionToolOptions()
+	if allowed != nil || mcp != "" {
+		t.Errorf("nil config: allowed=%v mcp=%q, want nil/empty", allowed, mcp)
+	}
+}
+
+func TestDefaultHooksConfig_RunTestsOnStop_FlippedToFalse(t *testing.T) {
+	cfg := DefaultHooksConfig()
+	if cfg.RunTestsOnStop == nil {
+		t.Fatal("RunTestsOnStop is nil")
+	}
+	if *cfg.RunTestsOnStop {
+		t.Error("RunTestsOnStop should default to false (GH-2432: cut subprocess token spend)")
+	}
+}
+
+// Verify the planning tools list contains exactly what we ship by default —
+// guards against accidental write-tool additions.
+func TestDefaultAllowedToolsPlanning_Exact(t *testing.T) {
+	got := strings.Join(DefaultAllowedToolsPlanning(), ",")
+	want := "Read,Grep,Glob"
+	if got != want {
+		t.Errorf("planning tools = %q, want %q", got, want)
+	}
+}
diff --git a/internal/executor/hooks.go b/internal/executor/hooks.go
index c809a2cf..33ce428f 100644
--- a/internal/executor/hooks.go
+++ b/internal/executor/hooks.go
@@ -46,8 +46,12 @@ type HooksConfig struct {
 }
 
 // DefaultHooksConfig returns default hooks configuration.
+// GH-2432: RunTestsOnStop default flipped to false. Stop-hook tests forced
+// long unproductive turns into the Claude session, inflating token cost
+// without comparable quality gain. Quality gates still run after the
+// subprocess exits.
 func DefaultHooksConfig() *HooksConfig {
-	runTestsOnStop := true
+	runTestsOnStop := false
 	blockDestructive := true
 	return &HooksConfig{
 		Enabled:          false, // Disabled by default, opt-in feature
diff --git a/internal/executor/hooks_test.go b/internal/executor/hooks_test.go
index ab53878a..4518014c 100644
--- a/internal/executor/hooks_test.go
+++ b/internal/executor/hooks_test.go
@@ -13,8 +13,9 @@ func TestHooksConfig_Defaults(t *testing.T) {
 	if config.Enabled {
 		t.Error("Expected hooks to be disabled by default")
 	}
-	if config.RunTestsOnStop == nil || !*config.RunTestsOnStop {
-		t.Error("Expected RunTestsOnStop to default to true when enabled")
+	// GH-2432: RunTestsOnStop default flipped to false to cut subprocess token spend.
+	if config.RunTestsOnStop == nil || *config.RunTestsOnStop {
+		t.Error("Expected RunTestsOnStop to default to false (GH-2432)")
 	}
 	if config.BlockDestructive == nil || !*config.BlockDestructive {
 		t.Error("Expected BlockDestructive to default to true when enabled")
diff --git a/internal/executor/runner.go b/internal/executor/runner.go
index d070519c..55a96daf 100644
--- a/internal/executor/runner.go
+++ b/internal/executor/runner.go
@@ -584,6 +584,16 @@ func (r *Runner) fallbackModelName() string {
 	return r.backendType()
 }
 
+// executionToolOptions returns the AllowedTools and MCPConfigPath that should
+// be applied to every backend.Execute call site driven by this Runner. These
+// shave the per-turn token cost by scoping the subprocess toolbox. GH-2432.
+func (r *Runner) executionToolOptions() (allowed []string, mcpPath string) {
+	if r.config != nil && r.config.ClaudeCode != nil {
+		return r.config.ClaudeCode.AllowedTools, r.config.ClaudeCode.MCPConfigPath
+	}
+	return nil, ""
+}
+
 // SetBackend changes the execution backend.
 func (r *Runner) SetBackend(backend Backend) {
 	r.backend = backend
@@ -1603,6 +1613,7 @@ func (r *Runner) executeWithOptions(ctx context.Context, task *Task, allowWorktr
 	// Watchdog kills subprocess after 2x timeout as a safety net for processes
 	// that ignore context cancellation.
 	watchdogTimeout := 2 * timeout
+	allowedTools, mcpConfigPath := r.executionToolOptions()
 	backendResult, err := r.backend.Execute(ctx, ExecuteOptions{
 		Prompt:          prompt,
 		ProjectPath:     executionPath, // Use worktree path if active
@@ -1611,6 +1622,8 @@ func (r *Runner) executeWithOptions(ctx context.Context, task *Task, allowWorktr
 		Effort:          selectedEffort,
 		FromPR:          task.FromPR, // GH-1267: session resumption from PR context
 		WatchdogTimeout: watchdogTimeout,
+		AllowedTools:    allowedTools,
+		MCPConfigPath:   mcpConfigPath,
 		WatchdogCallback: func(pid int, watchdogDuration time.Duration) {
 			log.Warn("Watchdog killed subprocess",
 				slog.Int("pid", pid),
@@ -1851,6 +1864,7 @@ func (r *Runner) executeWithOptions(ctx context.Context, task *Task, allowWorktr
 
 						r.reportProgress(task.ID, "Re-executing", 55, fmt.Sprintf("Retry attempt %d with %v timeout...", state.smartRetryAttempt, retryTimeout))
 
+						smartAllowed, smartMCP := r.executionToolOptions()
 						retryResult, retryErr := r.backend.Execute(retryCtx, ExecuteOptions{
 							Prompt:          prompt,
 							ProjectPath:     task.ProjectPath,
@@ -1858,6 +1872,8 @@ func (r *Runner) executeWithOptions(ctx context.Context, task *Task, allowWorktr
 							Model:           selectedModel,
 							Effort:          selectedEffort,
 							WatchdogTimeout: 2 * retryTimeout,
+							AllowedTools:    smartAllowed,
+							MCPConfigPath:   smartMCP,
 							EventHandler: func(event BackendEvent) {
 								if recorder != nil {
 									_ = recorder.RecordEvent(event.Raw)
@@ -2145,6 +2161,7 @@ The previous execution completed but made no code changes. This task requires ac
 %s`, task.Title, task.Description)
 
 				// Execute retry
+				noopRetryAllowed, noopRetryMCP := r.executionToolOptions()
 				retryResult, retryErr := r.backend.Execute(ctx, ExecuteOptions{
 					Prompt:          retryPrompt,
 					ProjectPath:     task.ProjectPath,
@@ -2152,6 +2169,8 @@ The previous execution completed but made no code changes. This task requires ac
 					Model:           selectedModel,
 					Effort:          selectedEffort,
 					WatchdogTimeout: watchdogTimeout,
+					AllowedTools:    noopRetryAllowed,
+					MCPConfigPath:   noopRetryMCP,
 					EventHandler: func(event BackendEvent) {
 						// Track tokens from retry
 						state.tokensInput += event.TokensInput
@@ -2416,12 +2435,15 @@ The previous execution completed but made no code changes. This task requires ac
 					)
 
 					// Re-invoke backend with retry prompt
+					feedbackAllowed, feedbackMCP := r.executionToolOptions()
 					retryResult, retryErr := r.backend.Execute(ctx, ExecuteOptions{
-						Prompt:      retryPrompt,
-						ProjectPath: task.ProjectPath,
-						Verbose:     task.Verbose,
-						Model:       selectedModel,
-						Effort:      selectedEffort,
+						Prompt:        retryPrompt,
+						ProjectPath:   task.ProjectPath,
+						Verbose:       task.Verbose,
+						Model:         selectedModel,
+						Effort:        selectedEffort,
+						AllowedTools:  feedbackAllowed,
+						MCPConfigPath: feedbackMCP,
 						EventHandler: func(event BackendEvent) {
 							if recorder != nil {
 								if recErr := recorder.RecordEvent(event.Raw); recErr != nil {
@@ -2692,12 +2714,15 @@ The previous execution completed but made no code changes. This task requires ac
 						intentVerdict.Reason, task.Title, task.Description,
 					)
 
+					intentAllowed, intentMCP := r.executionToolOptions()
 					_, retryErr := r.backend.Execute(ctx, ExecuteOptions{
-						Prompt:      retryPrompt,
-						ProjectPath: task.ProjectPath,
-						Verbose:     task.Verbose,
-						Model:       selectedModel,
-						Effort:      selectedEffort,
+						Prompt:        retryPrompt,
+						ProjectPath:   task.ProjectPath,
+						Verbose:       task.Verbose,
+						Model:         selectedModel,
+						Effort:        selectedEffort,
+						AllowedTools:  intentAllowed,
+						MCPConfigPath: intentMCP,
 						EventHandler: func(event BackendEvent) {
 							state.tokensInput += event.TokensInput
 							state.tokensOutput += event.TokensOutput
@@ -3305,6 +3330,7 @@ func (r *Runner) runSelfReview(ctx context.Context, task *Task, state *progressS
 		}
 	}
 
+	reviewAllowed, reviewMCP := r.executionToolOptions()
 	result, err := r.backend.Execute(reviewCtx, ExecuteOptions{
 		Prompt:          reviewPrompt,
 		ProjectPath:     task.ProjectPath,
@@ -3312,6 +3338,8 @@ func (r *Runner) runSelfReview(ctx context.Context, task *Task, state *progressS
 		Model:           selectedModel,
 		Effort:          selectedEffort,
 		ResumeSessionID: resumeSessionID,
+		AllowedTools:    reviewAllowed,
+		MCPConfigPath:   reviewMCP,
 		EventHandler: func(event BackendEvent) {
 			// Track tokens from self-review
 			state.tokensInput += event.TokensInput