WiktorStarczewski · WiktorStarczewski · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -30,7 +30,19 @@ jobs:
           # toolchain the repo declares rather than hard-coding a
           # version here.
           go-version-file: 'go.mod'
-          cache: true
+          # setup-go's default cache key hashes only go.sum. When two
+          # PRs in a row share a go.sum but touch local source (PR B
+          # didn't bump go.sum after PR A), the second PR's build can
+          # restore stale instrumented test binaries from the first
+          # PR's cache. The resulting -coverpkg=./... profile ends up
+          # with mixed-version cover blocks for the same source file
+          # (e.g. printHelp ending at line 125 in three packages and
+          # line 127 in six), which `go tool cover -func` treats as
+          # two distinct blocks and drops the function's coverage to
+          # ~33%. This bit us in #8: 90.7% locally vs 65% on CI on
+          # the same commit. Keeping cache off until we have a key
+          # that hashes source files too.
+          cache: false
 
       - name: Download modules
         run: go mod download

diff --git a/README.md b/README.md
@@ -174,6 +174,10 @@ If you'd rather edit the config yourself, add this under `mcpServers` in `~/.cla
 | `get_session_summary` | Compact digest: first user ask, tool-call counts, subagent list, last assistant text. |
 | `get_peer_info` | `{name, version, sessionCount, activeSessionCount}` — sanity-check which peer you're talking to. |
 | `ask_peer_claude` | **Phase-2, requires `--enable-agent`.** Spawns a parallel Claude session on the peer's box with read-only filesystem tools. Returns a markdown transcript + `{turnCount, toolCallCount, stopReason, elapsedMs}`. |
+| `start_peer_conversation` | **Phase-2.** Open a stateful read-only conversation. Returns `{convId, startedAt, effectiveBudget}`. |
+| `send_peer_message` | **Phase-2.** One more turn against an existing convId. |
+| `list_peer_conversations` | **Phase-2.** Active conversations sorted by `lastActivityAt` desc. |
+| `end_peer_conversation` | **Phase-2.** Terminate a conversation; idempotent. |
 
 ## Interactive mode (Phase 2)
 
@@ -196,7 +200,11 @@ If `--enable-agent` is set but the env var is empty, hearsay refuses to start
 
 After `hearsay pair <invite>` and a Claude Code restart, your Claude has one new tool per peer:
 
-- `mcp__ivan__ask_peer_claude({prompt, project?, max_tokens?, max_tool_calls?, timeout_seconds?})` — spawns a parallel Claude session on Ivan's box with read-only tools (`read` / `glob` / `grep`). Returns a markdown transcript plus `{turnCount, toolCallCount, stopReason, elapsedMs}`. Short-lived; no state kept after the call. (PR-B will add stateful conversations.)
+- `mcp__ivan__ask_peer_claude({prompt, project?, max_tokens?, max_tool_calls?, timeout_seconds?})` — spawns a parallel Claude session on Ivan's box with read-only tools (`read` / `glob` / `grep`). Returns a markdown transcript plus `{turnCount, toolCallCount, stopReason, elapsedMs}`. Short-lived; no state kept after the call.
+- `mcp__ivan__start_peer_conversation({system_prompt?, project?, ...budget})` — creates a stateful read-only conversation. Returns `{convId, startedAt, effectiveBudget}`.
+- `mcp__ivan__send_peer_message({convId, prompt, ...budget})` — one more turn on an existing conversation. Per-turn budget overrides cascade through the conversation's defaults.
+- `mcp__ivan__list_peer_conversations({})` — active conversations sorted by lastActivityAt desc.
+- `mcp__ivan__end_peer_conversation({convId})` — terminate and free the slot. Idempotent.
 
 Example natural-language prompt to your Claude:
 
@@ -229,6 +237,8 @@ Every agent call appends one JSON line to `~/Library/Logs/hearsay/agent.log` (ma
 --agent-default-max-tool-calls <n>      per-turn tool-call budget (default 20)
 --agent-default-timeout-seconds <n>     per-call wall-clock budget (default 120)
 --agent-log-path <path>                 audit log path (default platform-specific)
+--max-conversations <n>                 concurrent-conversations cap (default 10)
+--conversation-idle-timeout <dur>       reap conversations idle past this (default 15m)
 ```
 
 ## Optional: CLAUDE.md discoverability block
@@ -269,6 +279,8 @@ Phase-2 agent flags (off by default):
   --agent-default-max-tool-calls <n>     (default 20)
   --agent-default-timeout-seconds <n>    (default 120)
   --agent-log-path <path>
+  --max-conversations <n>                (default 10)
+  --conversation-idle-timeout <dur>      (default 15m)
 ```
 
 ## Design notes

diff --git a/cmd/hearsay/main.go b/cmd/hearsay/main.go
@@ -112,6 +112,8 @@ PHASE-2 AGENT FLAGS (off by default)
   --agent-default-timeout-seconds <n>   per-call wall-clock budget in seconds (default 120)
   --agent-log-path <path>   audit log (default: ~/Library/Logs/hearsay/agent.log on macOS,
                             $XDG_STATE_HOME/hearsay/agent.log elsewhere)
+  --max-conversations <n>   concurrent-conversations cap (default 10)
+  --conversation-idle-timeout <dur>     reap conversations idle past this (default 15m)
 
 ADD-PEER FLAGS
   --url <url>               peer's hearsay MCP URL (e.g. http://ivan-mac.tailXXXX.ts.net:3456/mcp)
@@ -160,6 +162,8 @@ func runServerWithSignals(args []string, sigCh <-chan os.Signal) int {
 		agentMaxTools   = fs.Int("agent-default-max-tool-calls", 20, "default per-turn max_tool_calls budget")
 		agentTimeoutSec = fs.Int("agent-default-timeout-seconds", 120, "default per-call wall-clock budget in seconds")
 		agentLogPath    = fs.String("agent-log-path", "", "audit-log path (default: platform-specific — see DefaultAuditPath)")
+		maxConvs        = fs.Int("max-conversations", 10, "concurrent-conversations cap")
+		convIdle        = fs.Duration("conversation-idle-timeout", 15*time.Minute, "reap conversations idle past this duration")
 	)
 	if err := fs.Parse(args); err != nil {
 		return 2
@@ -207,7 +211,9 @@ func runServerWithSignals(args []string, sigCh <-chan os.Signal) int {
 				MaxToolCalls: *agentMaxTools,
 				Timeout:      time.Duration(*agentTimeoutSec) * time.Second,
 			},
-			Auditor: auditor,
+			Auditor:                 auditor,
+			MaxConversations:        *maxConvs,
+			ConversationIdleTimeout: *convIdle,
 		})
 		if err != nil {
 			fmt.Fprintf(os.Stderr, "hearsay: agent init: %v\n", err)
@@ -262,6 +268,9 @@ func runServerWithSignals(args []string, sigCh <-chan os.Signal) int {
 	fmt.Fprintln(os.Stderr, "hearsay: shutting down…")
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
+	if closer, ok := ag.(agent.Closer); ok && closer != nil {
+		closer.Close()
+	}
 	if err := srv.Shutdown(ctx); err != nil {
 		fmt.Fprintf(os.Stderr, "hearsay: shutdown error: %v\n", err)
 		return 1

diff --git a/e2e/hearsay_e2e_test.go b/e2e/hearsay_e2e_test.go
@@ -751,6 +751,68 @@ func TestE2E_AgentToolPresentWhenEnabled(t *testing.T) {
 	}
 }
 
+// TestE2E_ConversationToolsPresent confirms all four PR-B
+// conversation tools are in the catalog when --enable-agent is set.
+func TestE2E_ConversationToolsPresent(t *testing.T) {
+	f := startServerWithEnv(t, "conv-tools",
+		[]string{"--enable-agent", "--quiet"},
+		[]string{"ANTHROPIC_API_KEY=sk-ant-fake-test-key-12345"},
+	)
+	cs := f.connectMCP(t)
+	want := map[string]bool{
+		"start_peer_conversation": false,
+		"send_peer_message":       false,
+		"list_peer_conversations": false,
+		"end_peer_conversation":   false,
+	}
+	for tool := range cs.Tools(context.Background(), nil) {
+		if _, expected := want[tool.Name]; expected {
+			want[tool.Name] = true
+		}
+	}
+	for name, present := range want {
+		if !present {
+			t.Errorf("tool %q missing from catalog", name)
+		}
+	}
+}
+
+// TestE2E_ConversationCapFlagWiredThrough confirms the
+// --max-conversations flag reaches the agent layer.  We can't actually
+// fill the cap without standing up a full Anthropic stub, but we CAN
+// confirm `list_peer_conversations` returns an empty list (proving the
+// agent state map is wired) and that the binary accepts the flag.
+func TestE2E_ConversationCapFlagWiredThrough(t *testing.T) {
+	f := startServerWithEnv(t, "conv-cap",
+		[]string{
+			"--enable-agent",
+			"--quiet",
+			"--max-conversations", "2",
+			"--conversation-idle-timeout", "5s",
+		},
+		[]string{"ANTHROPIC_API_KEY=sk-ant-fake-test-key-12345"},
+	)
+	cs := f.connectMCP(t)
+	res, err := cs.CallTool(context.Background(), &mcp.CallToolParams{
+		Name:      "list_peer_conversations",
+		Arguments: map[string]any{},
+	})
+	if err != nil {
+		t.Fatalf("list_peer_conversations: %v", err)
+	}
+	if res.IsError {
+		t.Fatalf("list_peer_conversations errored: %+v", res.Content)
+	}
+	out := structured(t, res)
+	convs, ok := out["conversations"].([]any)
+	if !ok {
+		t.Fatalf("conversations field missing or wrong type: %+v", out)
+	}
+	if len(convs) != 0 {
+		t.Errorf("freshly-started server should have 0 conversations, got %d", len(convs))
+	}
+}
+
 // TestE2E_AgentRefusesStartWithoutKey confirms the error-contract row:
 // `--enable-agent` set but ANTHROPIC_API_KEY empty ⇒ refuse to start.
 // We can't use startServerWithEnv (it waits for /health) — the binary

diff --git a/internal/agent/agent.go b/internal/agent/agent.go
@@ -99,13 +99,74 @@ type Transcript struct {
 	ErrorSummary  ErrorSummary // populated iff StopReason == "error"
 }
 
-// Agent is the interface ask_peer_claude (and PR-B's conversation
-// tools) call into.  PR A only implements OneShot; PR B will extend
-// the same package with the conversation-lifecycle methods.
+// ConvID is an opaque handle to a hearsay-managed conversation.  We
+// use the SDK's session ID directly so there's no map-lookup
+// indirection inside the agent layer.
+type ConvID string
+
+// StartReq is the input to Agent.StartConversation.
+type StartReq struct {
+	SystemPrompt string
+	Project      string
+	Budget       Budget // becomes the conversation's per-turn default
+}
+
+// ConvMeta mirrors list_peer_conversations' output one-for-one.
+type ConvMeta struct {
+	ConvID         ConvID
+	StartedAt      time.Time
+	LastActivityAt time.Time
+	TurnCount      int
+	// Preview is the first ~140 *runes* (not bytes) of the first user
+	// message — rune-based truncation so a multi-byte codepoint at the
+	// boundary doesn't yield invalid UTF-8.  When the conversation has
+	// been started but no send_peer_message has happened yet, falls
+	// back to the first ~140 runes of the system_prompt (or empty if
+	// no system_prompt was set).
+	Preview string
+}
+
+// EndReason discriminates how a conversation ended; carried into the
+// audit log + the end_peer_conversation tool's output.
+type EndReason string
+
+const (
+	EndedByCaller   EndReason = "caller"
+	EndedByIdleReap EndReason = "idle_timeout"
+	EndedByShutdown EndReason = "shutdown"
+)
+
+// EndSummary mirrors end_peer_conversation's tool output.
+type EndSummary struct {
+	Ended        bool
+	AlreadyEnded bool      // true if the conv was already ended (idempotent re-end)
+	TotalTurns   int
+	EndedReason  EndReason
+}
+
+// Agent is the interface every Phase-2 tool calls into.  PR A landed
+// OneShot; PR B added the conversation-lifecycle methods.
 type Agent interface {
 	OneShot(ctx context.Context, req OneShotRequest) (Transcript, error)
+	StartConversation(ctx context.Context, req StartReq) (ConvID, time.Time /*startedAt*/, Budget /*effective*/, error)
+	SendMessage(ctx context.Context, convID ConvID, prompt string, budget Budget) (Transcript, error)
+	ListConversations() []ConvMeta
+	EndConversation(ctx context.Context, convID ConvID, reason EndReason) (EndSummary, error)
 }
 
+// ErrUnknownConv is returned by SendMessage / EndConversation when
+// the convID has no matching live conversation (typo, idle-reaped,
+// or already ended).
+var ErrUnknownConv = errors.New("agent: unknown conversation id")
+
+// ErrConvCap is returned by StartConversation when --max-conversations
+// is full.  The tool layer translates this into errorSummary=max_conversations.
+var ErrConvCap = errors.New("agent: max conversations reached")
+
+// ErrConvReaped is returned by SendMessage when the named conversation
+// existed but was reaped after the idle timeout.
+var ErrConvReaped = errors.New("agent: conversation reaped after idle timeout")
+
 // ErrAgentDisabled is returned when callers try to use an Agent
 // instance that wasn't constructed (--enable-agent off).  The tools
 // layer prevents this by not registering the agent tools when the