From 6b68529439c50e76a281f83ab95665dab019a55d Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Thu, 30 Apr 2026 11:43:58 -0700 Subject: [PATCH 01/20] Add structured freeform context payload + ExecutingTask/ready recovery Two fixes for context-loss bugs in the pr-monitor flow that surfaced after the switch to MCP sampling for freeform user input: 1. Structured interpret_freeform context payload MCP elicitation flows out of band from chat, so when the server returned interpret_freeform to the agent with prose-only instructions, the agent had no record of the elicitation cycle and dismissed the user's reply as "stale state". BuildFreeformInterpretAction now populates a typed FreeformInterpretContext (comment, server analysis, elicitation Q+choices, user reply + sampling classification) so the agent can ground its response in the same context the user was shown. 2. (ExecutingTask, "ready") recovery The post-push pr_monitor_start hook was calling event=ready while the state machine was still in ExecutingTask mid-comment-flow. ProcessEvent had no transition for that pair, so it fell into RecoverFromUnexpectedState which destructively wiped CommentFlow / CiFailureFlow / ActiveWaitingComment and offered only Resume/Stop, restarting polling from scratch. Fix A: snapshot HEAD on every ExecutingTask entry via new EnterExecutingTask() helper. New (ExecutingTask, ready) transition returns an auto_execute that refreshes HEAD via PrStatusFetcher; if HEAD advanced, re-dispatches as the documented completion event (comment_addressed / push_completed). Fix B: when no push detected, RecoverFromUnexpectedState preserves flow state and offers flow-aware choices (Treat as comment addressed / Treat as push completed) so the user can resume in-flow without losing their place. Non-ExecutingTask prior states keep the existing destructive cleanup. 22 new tests across FreeformInterpretContextTests and ExecutingTaskReadyRecoveryTests. 358 tests pass total. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../PrCopilot/StateMachine/MonitorState.cs | 25 ++ .../StateMachine/MonitorTransitions.cs | 125 +++++++++- .../src/PrCopilot/Tools/CiFailureContext.cs | 15 ++ .../src/PrCopilot/Tools/CommentContext.cs | 28 +++ .../Tools/ElicitationChoiceContext.cs | 21 ++ .../src/PrCopilot/Tools/ElicitationContext.cs | 19 ++ .../src/PrCopilot/Tools/FailedCheckContext.cs | 20 ++ .../Tools/FreeformInterpretContext.cs | 56 +++++ .../src/PrCopilot/Tools/MonitorFlowTools.cs | 199 ++++++++++++++-- .../PrCopilot/Tools/ServerAnalysisContext.cs | 28 +++ .../src/PrCopilot/Tools/UserReplyContext.cs | 28 +++ .../ExecutingTaskReadyRecoveryTests.cs | 215 +++++++++++++++++ .../FreeformInterpretContextTests.cs | 224 ++++++++++++++++++ 13 files changed, 977 insertions(+), 26 deletions(-) create mode 100644 PrCopilot/src/PrCopilot/Tools/CiFailureContext.cs create mode 100644 PrCopilot/src/PrCopilot/Tools/CommentContext.cs create mode 100644 PrCopilot/src/PrCopilot/Tools/ElicitationChoiceContext.cs create mode 100644 PrCopilot/src/PrCopilot/Tools/ElicitationContext.cs create mode 100644 PrCopilot/src/PrCopilot/Tools/FailedCheckContext.cs create mode 100644 PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs create mode 100644 PrCopilot/src/PrCopilot/Tools/ServerAnalysisContext.cs create mode 100644 PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs create mode 100644 PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs create mode 100644 PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs index dbcd65b..6c7a21e 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs @@ -96,6 +96,15 @@ public class MonitorState /// Reply text composed by the agent, to be posted by the server via the REST API. public string? PendingReplyText { get; set; } + /// + /// HEAD SHA snapshotted when the state most recently entered . + /// Used by the recovery path for (ExecutingTask, "ready") to detect whether the agent + /// pushed during the task — if HEAD has advanced, "ready" is reinterpreted as a completion event + /// (comment_addressed in comment flows, push_completed in CI flows). + /// Set via ; null/empty means no snapshot was captured. + /// + public string? HeadShaAtTaskStart { get; set; } + /// Transient: completion event set by sampling handler for MonitorFlowTools to feed back to state machine. public string? SamplingCompletionEvent { get; set; } /// Transient: completion event set by EmitComposeReplyAction for the sampling compose_reply handler. @@ -130,4 +139,20 @@ public void ClearPendingCommentState() /// Set when ReviewerReplied terminal state is detected. /// public CommentInfo? RepliedComment { get; set; } + + /// + /// Transition to and snapshot the current + /// as . The snapshot lets the + /// recovery path detect post-push resumes (where the agent pushed during the task and + /// then re-entered via the post-push pr_monitor_start hook with event=ready + /// instead of calling the documented completion event). + /// + /// Use this instead of assigning CurrentState = MonitorStateId.ExecutingTask + /// directly so the snapshot is never forgotten at a new task-entry site. + /// + public void EnterExecutingTask() + { + CurrentState = MonitorStateId.ExecutingTask; + HeadShaAtTaskStart = HeadSha; + } } diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index ede4e52..86d7148 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -115,6 +115,7 @@ public static MonitorAction BuildTerminalAction(MonitorState state, TerminalStat ["I'll handle the comments myself"] = "handle_myself", ["I'll handle them myself"] = "handle_myself", ["Skip this comment"] = "skip", + ["Treat as comment addressed"] = "treat_as_addressed", ["Done — resume monitoring"] = "done", ["Address next comment"] = "continue", ["I'll handle the rest myself"] = "done", @@ -126,6 +127,7 @@ public static MonitorAction BuildTerminalAction(MonitorState state, TerminalStat ["Re-run failed jobs"] = "rerun", ["Apply the recommendation"] = "apply_fix", ["Run a new build"] = "run_new", + ["Treat as push completed"] = "treat_as_pushed", // Waiting-for-reply comment choices (ProcessWaitingCommentChoice) ["Resolve this thread"] = "resolve", @@ -182,6 +184,14 @@ public static MonitorAction ProcessEvent(MonitorState state, string eventType, s // LLM finished executing a generic task (MonitorStateId.ExecutingTask, "task_complete") => ProcessTaskComplete(state), + // Recovery: agent skipped the documented completion event and re-entered with + // event=ready (commonly because the user's post-push custom-instruction hook + // fires pr_monitor_start + ready instead of the Path B comment_addressed/ + // push_completed events). Returns an auto_execute that fetches HEAD; if HEAD + // advanced during the task, the wrapper re-dispatches as the right completion + // event, otherwise falls through to flow-aware recovery prompt. + (MonitorStateId.ExecutingTask, "ready") => BuildRecoverFromReadyAction(state), + // Recovery: agent sent task_complete from AwaitingUser (skipped a tool call) (MonitorStateId.AwaitingUser, "task_complete") => RecoverFromUnexpectedTaskComplete(state), @@ -331,8 +341,20 @@ private static MonitorAction RecoverFromUnexpectedTaskComplete(MonitorState stat private static MonitorAction RecoverFromUnexpectedState(MonitorState state, string eventType) { var priorState = state.CurrentState; + var priorCommentFlow = state.CommentFlow; + var priorCiFailureFlow = state.CiFailureFlow; DebugLogger.Log("StateMachine", $"RECOVERY: Unexpected state {priorState}/{eventType}. Transitioning to AwaitingUser so next user_chose can recover."); state.CurrentState = MonitorStateId.AwaitingUser; + + // Preserve flow context when recovering from ExecutingTask — the user is mid-flow + // and may want to resume by treating the task as completed (e.g., comment addressed). + // Wiping the flow state would force them to start the comment loop from scratch and + // lose their place. For other prior states, keep the original aggressive cleanup. + if (priorState == MonitorStateId.ExecutingTask) + { + return BuildExecutingTaskRecoveryPrompt(state, eventType, priorCommentFlow, priorCiFailureFlow); + } + state.CommentFlow = CommentFlowState.None; state.CiFailureFlow = CiFailureFlowState.None; state.ActiveWaitingComment = null; @@ -344,6 +366,83 @@ private static MonitorAction RecoverFromUnexpectedState(MonitorState state, stri }; } + /// + /// Recovery prompt for unexpected events arriving while in . + /// Offers flow-aware choices so the user can mark the task done in-flow rather than + /// being forced to "Resume monitoring" (which would wipe the flow state and restart polling). + /// Flow state (CommentFlow / CiFailureFlow / current comment index) is preserved so the + /// follow-up user_chose can dispatch into the correct flow handler. + /// + private static MonitorAction BuildExecutingTaskRecoveryPrompt( + MonitorState state, + string eventType, + CommentFlowState priorCommentFlow, + CiFailureFlowState priorCiFailureFlow) + { + if (priorCommentFlow != CommentFlowState.None) + { + return new MonitorAction + { + Action = "ask_user", + Question = $"Unexpected event '{eventType}' while addressing a comment. " + + "If you finished the work, pick how to mark it; otherwise resume or stop.", + Choices = + [ + "Treat as comment addressed", + "Skip this comment", + "Resume monitoring", + "Stop monitoring" + ] + }; + } + + if (priorCiFailureFlow != CiFailureFlowState.None) + { + return new MonitorAction + { + Action = "ask_user", + Question = $"Unexpected event '{eventType}' while investigating a CI failure. " + + "If you finished the work, pick how to mark it; otherwise resume or stop.", + Choices = + [ + "Treat as push completed", + "Resume monitoring", + "Stop monitoring" + ] + }; + } + + // No active flow — same as the generic recovery + state.ActiveWaitingComment = null; + return new MonitorAction + { + Action = "ask_user", + Question = $"Unexpected state: ExecutingTask/{eventType}. What would you like to do?", + Choices = ["Resume monitoring", "Stop monitoring"] + }; + } + + /// + /// Returns an auto_execute action that the MCP wrapper handles by fetching the + /// latest HEAD SHA from GitHub. If HEAD advanced since , + /// the wrapper re-dispatches the event as comment_addressed (comment flow) or + /// push_completed (CI flow) — recovering automatically from the post-push hook + /// that called ready instead of the documented completion event. If HEAD did + /// not advance, the wrapper falls back to . + /// + private static MonitorAction BuildRecoverFromReadyAction(MonitorState state) + { + DebugLogger.Log("StateMachine", $"ExecutingTask/ready: dispatching auto_execute recover_from_ready_in_executing_task (snapshot HEAD={ShortSha(state.HeadShaAtTaskStart)})"); + return new MonitorAction + { + Action = "auto_execute", + Task = "recover_from_ready_in_executing_task" + }; + } + + private static string ShortSha(string? sha) => + string.IsNullOrEmpty(sha) ? "(none)" : sha.Length <= 7 ? sha : sha[..7]; + private static MonitorAction TransitionToPolling(MonitorState state) { state.CurrentState = MonitorStateId.Polling; @@ -435,6 +534,12 @@ private static MonitorAction BuildCommentAction(MonitorState state, string times private static MonitorAction ProcessCommentChoice(MonitorState state, string? choice) { + // Recovery shortcut available from any comment-flow sub-state — the (ExecutingTask, "ready") + // recovery path offers this when HEAD did NOT advance during the task. Routes through + // ProcessCommentAddressed which gracefully composes a reply if PendingReplyText is empty. + if (choice == "treat_as_addressed") + return ProcessCommentAddressed(state, null); + return (state.CommentFlow, choice) switch { (CommentFlowState.MultiCommentPrompt, "address_all") => BeginAddressAll(state), @@ -514,7 +619,7 @@ private static MonitorAction BeginExplainAll(MonitorState state) private static MonitorAction EmitExplainForCurrentComment(MonitorState state) { var c = state.UnresolvedComments[state.CurrentCommentIndex]; - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); state.PendingExplainResult = true; state.LastRecommendation = null; return new MonitorAction @@ -573,7 +678,7 @@ private static MonitorAction HandlePickedComment(MonitorState state, string? cho private static MonitorAction BeginAddressCurrentComment(MonitorState state) { - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); return EmitAddressCommentAction(state); } @@ -583,7 +688,7 @@ private static MonitorAction BeginApplyRecommendation(MonitorState state) return TransitionToPolling(state); var c = state.UnresolvedComments[state.CurrentCommentIndex]; - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); return new MonitorAction { Action = "execute", @@ -604,7 +709,7 @@ private static MonitorAction BeginApplyRecommendation(MonitorState state) private static MonitorAction BeginExplainComment(MonitorState state, bool isReplyEvent = false) { var c = state.UnresolvedComments[state.CurrentCommentIndex]; - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); state.PendingExplainResult = true; state.LastRecommendation = null; var replyContext = isReplyEvent && !string.IsNullOrEmpty(c.LastReplyAuthor) @@ -633,7 +738,7 @@ private static MonitorAction EmitAddressCommentAction(MonitorState state) } var c = state.UnresolvedComments[state.CurrentCommentIndex]; - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); return new MonitorAction { Action = "execute", @@ -853,7 +958,7 @@ private static MonitorAction BuildPostReplyAction(MonitorState state, CommentInf /// private static MonitorAction EmitComposeReplyAction(MonitorState state, CommentInfo c, string completionEvent) { - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); state.PendingCompletionEvent = completionEvent; return new MonitorAction { @@ -946,6 +1051,12 @@ private static MonitorAction BuildCiFailureAction(MonitorState state) private static MonitorAction ProcessCiFailureChoice(MonitorState state, string? choice) { + // Recovery shortcut from any CI-failure sub-state — the (ExecutingTask, "ready") + // recovery path offers this when HEAD did NOT advance during the task. Surfaces the + // documented push_completed behavior: clear failure state and resume polling. + if (choice == "treat_as_pushed") + return TransitionToPolling(state); + return (state.CiFailureFlow, choice) switch { (CiFailureFlowState.InvestigationResults, "apply_fix") => BeginApplyFix(state), @@ -1060,7 +1171,7 @@ private static MonitorAction BuildRerunAction(MonitorState state) // Azure DevOps has no API for rerun-failed-only; only the web UI supports it. state.PendingRerunWhenChecksComplete = false; var buildUrl = state.FailedChecks.FirstOrDefault()?.Url; - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); state.CommentFlow = CommentFlowState.None; state.CiFailureFlow = CiFailureFlowState.None; return new MonitorAction diff --git a/PrCopilot/src/PrCopilot/Tools/CiFailureContext.cs b/PrCopilot/src/PrCopilot/Tools/CiFailureContext.cs new file mode 100644 index 0000000..bc33934 --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/CiFailureContext.cs @@ -0,0 +1,15 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// CI failure context, attached to when the +/// user is replying about a failed CI check. +/// +internal sealed class CiFailureContext +{ + [JsonPropertyName("failedChecks")] + public List FailedChecks { get; set; } = []; +} diff --git a/PrCopilot/src/PrCopilot/Tools/CommentContext.cs b/PrCopilot/src/PrCopilot/Tools/CommentContext.cs new file mode 100644 index 0000000..5b8fc5d --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/CommentContext.cs @@ -0,0 +1,28 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// The active comment thread context, attached to +/// when the user is replying about a code review comment. +/// +internal sealed class CommentContext +{ + [JsonPropertyName("author")] + public string Author { get; set; } = ""; + + [JsonPropertyName("filePath")] + public string FilePath { get; set; } = ""; + + [JsonPropertyName("line")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public int? Line { get; set; } + + [JsonPropertyName("body")] + public string Body { get; set; } = ""; + + [JsonPropertyName("url")] + public string Url { get; set; } = ""; +} diff --git a/PrCopilot/src/PrCopilot/Tools/ElicitationChoiceContext.cs b/PrCopilot/src/PrCopilot/Tools/ElicitationChoiceContext.cs new file mode 100644 index 0000000..7f894e4 --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/ElicitationChoiceContext.cs @@ -0,0 +1,21 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// A single choice from an elicitation prompt. is what the +/// user saw; is the internal mapped value the agent should +/// pass back as choice for a Path A clean choice match. +/// +internal sealed class ElicitationChoiceContext +{ + /// Human-readable choice label as shown to the user. + [JsonPropertyName("display")] + public string Display { get; set; } = ""; + + /// Internal mapped value (what would be passed back as choice). + [JsonPropertyName("value")] + public string Value { get; set; } = ""; +} diff --git a/PrCopilot/src/PrCopilot/Tools/ElicitationContext.cs b/PrCopilot/src/PrCopilot/Tools/ElicitationContext.cs new file mode 100644 index 0000000..b01765f --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/ElicitationContext.cs @@ -0,0 +1,19 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// The elicitation prompt that was shown to the user, captured for the +/// payload so the agent knows what +/// question the user was responding to. +/// +internal sealed class ElicitationContext +{ + [JsonPropertyName("question")] + public string Question { get; set; } = ""; + + [JsonPropertyName("choices")] + public List Choices { get; set; } = []; +} diff --git a/PrCopilot/src/PrCopilot/Tools/FailedCheckContext.cs b/PrCopilot/src/PrCopilot/Tools/FailedCheckContext.cs new file mode 100644 index 0000000..e088c89 --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/FailedCheckContext.cs @@ -0,0 +1,20 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// A single failed CI check entry in . +/// +internal sealed class FailedCheckContext +{ + [JsonPropertyName("name")] + public string Name { get; set; } = ""; + + [JsonPropertyName("conclusion")] + public string Conclusion { get; set; } = ""; + + [JsonPropertyName("url")] + public string Url { get; set; } = ""; +} diff --git a/PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs b/PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs new file mode 100644 index 0000000..24c366f --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs @@ -0,0 +1,56 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// Structured context attached to interpret_freeform execute actions. +/// The agent's chat history has no record of MCP elicitation — it happens out of band. +/// This payload tells the agent everything it needs to act on a freeform reply: what +/// the user was asked, what reply they gave, what comment/CI failure was being +/// discussed, and what server-side analysis or recommendation the user is reacting to. +/// Without this, the agent can mistake a legitimate elicitation reply for "stale state" +/// because the text doesn't match its chat memory of the user's last message. +/// +internal sealed class FreeformInterpretContext +{ + /// "comment" | "ci_failure" | "generic" + [JsonPropertyName("flowType")] + public string FlowType { get; set; } = "generic"; + + /// + /// Why this fallback was chosen. "sampling_classified_as_custom_instruction" + /// when sampling decided the text was a custom instruction; + /// "sampling_unavailable" when sampling threw or returned null unexpectedly. + /// + [JsonPropertyName("reason")] + public string Reason { get; set; } = "sampling_classified_as_custom_instruction"; + + /// The elicitation prompt that was shown to the user. + [JsonPropertyName("elicitation")] + public ElicitationContext Elicitation { get; set; } = new(); + + /// The user's authoritative reply (delivered via MCP elicitation, NOT chat). + [JsonPropertyName("userReply")] + public UserReplyContext UserReply { get; set; } = new(); + + /// The comment thread under discussion (when FlowType is "comment"). + [JsonPropertyName("comment")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public CommentContext? Comment { get; set; } + + /// Failed CI checks under discussion (when FlowType is "ci_failure"). + [JsonPropertyName("ciFailure")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public CiFailureContext? CiFailure { get; set; } + + /// + /// Server-side analysis the user is reacting to. The user's reply often references + /// this implicitly ("write a test that proves this", "fix it that way", "no, do X instead"). + /// Without this, pronouns in the reply are unresolvable. + /// + [JsonPropertyName("serverAnalysis")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public ServerAnalysisContext? ServerAnalysis { get; set; } +} diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index 5f01379..55f48de 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -141,32 +141,129 @@ private static async Task TryHandleViaSamplingAsync( /// Used when sampling classification returns null for a custom instruction, or when /// sampling classification fails (e.g., invalid JSON, exceptions) and the caller falls /// back to agent interpretation of the user's freeform text. + /// + /// The agent's chat history has NO record of the elicitation that produced this reply + /// (MCP elicitation flows separately from chat). Without explicit context, the agent + /// can mistake a legitimate reply for "stale state" because the text doesn't match + /// what it remembers the user typing in chat. The structured Context field + /// gives the agent everything it needs: the comment/CI under discussion, the prompt + /// shown, the user's reply, and any server-side analysis the user is reacting to. /// - private static MonitorAction BuildFreeformInterpretAction(ElicitChoiceResult result, MonitorState state, string? monitorId = null) + internal static MonitorAction BuildFreeformInterpretAction(ElicitChoiceResult result, MonitorState state, string? monitorId = null) { - var choicesContext = result.OriginalChoices != null - ? string.Join(", ", result.OriginalChoices.Select(c => - { - var mapped = MonitorTransitions.ChoiceValueMap.TryGetValue(c, out var v) ? v : c; - return $"'{c}' → {mapped}"; - })) - : "none"; - + var context = BuildFreeformInterpretContext(result, state); var pathBInstructions = BuildFreeformPathBInstructions(state); + var instructions = + "**MCP elicitation occurred — context boundary notice.** " + + "An MCP elicitation prompt was just shown to the user. The user's reply is in " + + "`context.userReply.text` and is **authoritative** — it is the user's most recent " + + "input. It will NOT appear in your chat history because elicitation flows through " + + "MCP, not chat. **Do not dismiss it as stale state**, and if it conflicts with the " + + "user's prior chat message, the elicitation reply wins.\n\n" + + "Read `context` for full background: the question shown (`context.elicitation`), " + + "the comment or CI failure under discussion (`context.comment` / `context.ciFailure`), " + + "and any server-side analysis the user is reacting to (`context.serverAnalysis`). " + + "Pronouns in the reply (\"this\", \"it\", \"that fix\") refer to items in `context`.\n\n" + + "**Path A — clean choice match:** If `context.userReply.text` cleanly maps to ONE of " + + "`context.elicitation.choices` with NO extra instructions, tell the user " + + "'I'm interpreting this as [choice display text]' and call pr_monitor_next_step " + + "with event='user_chose' and choice=. " + + pathBInstructions; + return new MonitorAction { Action = "execute", MonitorId = monitorId, Task = "interpret_freeform", - Instructions = $"The user typed: \"{result.Value}\". " + - $"The original question was: \"{result.OriginalQuestion}\". " + - $"The available choices were: [{choicesContext}]. " + - "**Path A — clean choice match:** If the text cleanly maps to ONE of the available choices with NO extra instructions, " + - "tell the user 'I'm interpreting this as [choice display text]' and call pr_monitor_next_step " + - "with event='user_chose' and choice=. " + - pathBInstructions + Instructions = instructions, + Context = context + }; + } + + /// + /// Build the structured context payload attached to an interpret_freeform action. + /// Pulls comment/CI/recommendation context from the current MonitorState so the + /// agent can resolve pronoun references and understand what the user is reacting to. + /// + internal static FreeformInterpretContext BuildFreeformInterpretContext(ElicitChoiceResult result, MonitorState state) + { + var ctx = new FreeformInterpretContext + { + Reason = "sampling_classified_as_custom_instruction", + Elicitation = new ElicitationContext + { + Question = result.OriginalQuestion ?? "", + Choices = (result.OriginalChoices ?? []).Select(c => new ElicitationChoiceContext + { + Display = c, + Value = MonitorTransitions.ChoiceValueMap.TryGetValue(c, out var v) ? v : c + }).ToList() + }, + UserReply = new UserReplyContext + { + Text = result.Value, + IsFreeform = true, + SamplingClassification = "custom_instruction" + } }; + + // Comment flow: attach the active comment + last recommendation + if (state.CommentFlow != CommentFlowState.None) + { + ctx.FlowType = "comment"; + if (state.CurrentCommentIndex >= 0 && state.CurrentCommentIndex < state.UnresolvedComments.Count) + { + var c = state.UnresolvedComments[state.CurrentCommentIndex]; + ctx.Comment = new CommentContext + { + Author = c.Author, + FilePath = c.FilePath, + Line = c.Line, + Body = c.Body, + Url = c.Url + }; + } + + if (!string.IsNullOrWhiteSpace(state.LastRecommendation)) + { + ctx.ServerAnalysis = new ServerAnalysisContext + { + Recommendation = state.LastRecommendation + }; + } + } + // CI failure flow: attach failed checks + investigation/recommendation + else if (state.CiFailureFlow != CiFailureFlowState.None) + { + ctx.FlowType = "ci_failure"; + if (state.FailedChecks.Count > 0) + { + ctx.CiFailure = new CiFailureContext + { + FailedChecks = state.FailedChecks.Select(f => new FailedCheckContext + { + Name = f.Name, + Conclusion = f.Conclusion, + Url = f.Url + }).ToList() + }; + } + + if (!string.IsNullOrWhiteSpace(state.LastRecommendation) + || !string.IsNullOrWhiteSpace(state.InvestigationFindings) + || !string.IsNullOrWhiteSpace(state.SuggestedFix)) + { + ctx.ServerAnalysis = new ServerAnalysisContext + { + Recommendation = state.LastRecommendation, + InvestigationFindings = state.InvestigationFindings, + SuggestedFix = state.SuggestedFix + }; + } + } + + return ctx; } /// @@ -674,7 +771,7 @@ await WriteLogEntryAsync( // Custom instruction — delegate to agent var freeformState = action.MonitorId != null && _sessions.TryGetValue(action.MonitorId, out var fs) ? fs.State : heartbeatSession.State; - freeformState.CurrentState = MonitorStateId.ExecutingTask; + freeformState.EnterExecutingTask(); var freeformAction = BuildFreeformInterpretAction(elicitResult, freeformState, action.MonitorId); return SerializeAction(freeformAction); } @@ -773,7 +870,7 @@ await WriteLogEntryAsync( else { // Custom instruction — delegate to agent - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); return SerializeAction(BuildFreeformInterpretAction(triggerResult, state)); } } @@ -901,7 +998,7 @@ await WriteLogEntryAsync( } // Custom instruction — delegate to agent - state.CurrentState = MonitorStateId.ExecutingTask; + state.EnterExecutingTask(); action = BuildFreeformInterpretAction(elicitResult, state); break; } @@ -1409,6 +1506,70 @@ private static async Task ExecuteAutoAction(MonitorState state, M { switch (action.Task) { + case "recover_from_ready_in_executing_task": + { + // Post-push hook recovery: the agent re-entered ExecutingTask via + // pr_monitor_start + event=ready (typically because a global custom-instruction + // says "after git push, invoke pr-monitor"). The state machine kicks us here + // to determine whether a push actually happened during the task. + // + // Strategy: + // 1. Refresh HEAD from GitHub. + // 2. If HEAD advanced since HeadShaAtTaskStart, treat as the documented + // completion event for the active flow (comment_addressed / push_completed). + // 3. If HEAD did not advance, fall back to a flow-aware ask_user prompt + // so the user can mark the task done in-flow without losing CommentFlow + // state (vs. the legacy destructive recovery that wiped flow state). + var snapshotSha = state.HeadShaAtTaskStart; + string? latestSha = state.HeadSha; + try + { + var freshPr = await PrStatusFetcher.FetchPrInfoAsync(state.Owner, state.Repo, state.PrNumber); + if (!string.IsNullOrWhiteSpace(freshPr.HeadSha)) + latestSha = freshPr.HeadSha; + } + catch (Exception ex) + { + DebugLogger.Error("AutoExec", $"recover_from_ready: HEAD refresh failed: {ex.Message}"); + } + + if (!string.IsNullOrWhiteSpace(latestSha) && latestSha != state.HeadSha) + { + DebugLogger.Log("AutoExec", $"recover_from_ready: HEAD advanced {ShortSha(state.HeadSha)} → {ShortSha(latestSha)}"); + state.HeadSha = latestSha; + } + + var headAdvanced = !string.IsNullOrWhiteSpace(snapshotSha) + && !string.IsNullOrWhiteSpace(latestSha) + && !string.Equals(snapshotSha, latestSha, StringComparison.Ordinal); + + if (headAdvanced && state.CommentFlow != CommentFlowState.None) + { + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + comment flow → dispatching comment_addressed"); + return MonitorTransitions.ProcessEvent(state, "comment_addressed", null, null); + } + + if (headAdvanced && state.CiFailureFlow != CiFailureFlowState.None) + { + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + CI flow → dispatching push_completed"); + return MonitorTransitions.ProcessEvent(state, "push_completed", null, null); + } + + if (headAdvanced) + { + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + no active flow → resuming polling"); + state.CurrentState = MonitorStateId.AwaitingUser; + return MonitorTransitions.ProcessEvent(state, "user_chose", "resume", null); + } + + // No push detected — fall through to the recovery prompt. We invoke the + // legacy default-recovery handler explicitly by sending an unknown event; + // ProcessEvent's _ branch routes through RecoverFromUnexpectedState, which + // is now flow-aware (preserves CommentFlow / CiFailureFlow when prior state + // was ExecutingTask, so the user's choice can resume in-flow). + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD unchanged — falling back to flow-aware ask_user"); + return MonitorTransitions.ProcessEvent(state, "ready_unresolved", null, null); + } case "resolve_thread": { var comment = state.ActiveWaitingComment; diff --git a/PrCopilot/src/PrCopilot/Tools/ServerAnalysisContext.cs b/PrCopilot/src/PrCopilot/Tools/ServerAnalysisContext.cs new file mode 100644 index 0000000..2686d12 --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/ServerAnalysisContext.cs @@ -0,0 +1,28 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// Server-side analysis the user is reacting to in their freeform reply. Pronouns +/// in the reply ("this", "it", "that fix") commonly refer to the recommendation +/// or suggested fix here; without this context the agent cannot resolve them. +/// +internal sealed class ServerAnalysisContext +{ + /// Most recent recommendation text (from explain_comment or CI investigation). + [JsonPropertyName("recommendation")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? Recommendation { get; set; } + + /// Investigation findings (CI failure flow). + [JsonPropertyName("investigationFindings")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? InvestigationFindings { get; set; } + + /// Suggested fix (CI failure flow). + [JsonPropertyName("suggestedFix")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? SuggestedFix { get; set; } +} diff --git a/PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs b/PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs new file mode 100644 index 0000000..21c7b1a --- /dev/null +++ b/PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs @@ -0,0 +1,28 @@ +// Licensed under the MIT License. + +using System.Text.Json.Serialization; + +namespace PrCopilot.Tools; + +/// +/// The user's reply to an MCP elicitation prompt. This is authoritative — it is +/// the user's most recent input even though it does not appear in the agent's +/// chat history (elicitation flows separately from chat). +/// +internal sealed class UserReplyContext +{ + /// Verbatim text the user typed into the elicitation prompt. + [JsonPropertyName("text")] + public string Text { get; set; } = ""; + + /// Always true for interpret_freeform — kept for clarity in the payload. + [JsonPropertyName("isFreeform")] + public bool IsFreeform { get; set; } = true; + + /// + /// How sampling classified the text. "custom_instruction" means sampling decided + /// the text doesn't map to any choice; "unavailable" means sampling didn't run or failed. + /// + [JsonPropertyName("samplingClassification")] + public string SamplingClassification { get; set; } = "custom_instruction"; +} diff --git a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs new file mode 100644 index 0000000..aa68ed4 --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs @@ -0,0 +1,215 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; + +namespace PrCopilot.Tests; + +/// +/// Tests for the (ExecutingTask, "ready") recovery path and the flow-aware +/// non-destructive recovery from ExecutingTask. See MonitorTransitions.cs: +/// BuildRecoverFromReadyAction and BuildExecutingTaskRecoveryPrompt. +/// +public class ExecutingTaskReadyRecoveryTests +{ + private static MonitorState CreateState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrNumber = 42, + HeadSha = "abc123", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath() + }; + + private static CommentInfo MakeComment(string id = "c1") => new() + { + Id = id, + Author = "reviewer1", + FilePath = "src/File.cs", + Line = 10, + Body = "Please fix this", + Url = "https://github.com/test/pr/42#comment" + }; + + // ──────────────────────────────────────────────────────────────────── + // Fix A: HEAD-SHA snapshot captured on every ExecutingTask entry + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void EnterExecutingTask_CapturesCurrentHeadShaAsSnapshot() + { + var state = CreateState(); + state.HeadSha = "abc123"; + + state.EnterExecutingTask(); + + Assert.Equal(MonitorStateId.ExecutingTask, state.CurrentState); + Assert.Equal("abc123", state.HeadShaAtTaskStart); + } + + [Fact] + public void EnterExecutingTask_RefreshesSnapshotOnSubsequentEntries() + { + // After AwaitingUser → ExecutingTask cycles, snapshot must reflect the latest entry, + // not the original task-start. Otherwise back-and-forth in a long flow would always + // detect "push happened" against a stale baseline. + var state = CreateState(); + state.HeadSha = "abc123"; + state.EnterExecutingTask(); + Assert.Equal("abc123", state.HeadShaAtTaskStart); + + state.CurrentState = MonitorStateId.AwaitingUser; + state.HeadSha = "def456"; // a push happened during the previous sub-task + state.EnterExecutingTask(); + + Assert.Equal("def456", state.HeadShaAtTaskStart); + } + + // ──────────────────────────────────────────────────────────────────── + // Fix A: (ExecutingTask, "ready") returns the auto_execute recovery hook + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void ProcessEvent_ExecutingTaskReady_ReturnsAutoExecuteRecoverTask() + { + // The state machine itself can't perform an HTTP HEAD-fetch — it returns an + // auto_execute action that the wrapper executes (see ExecuteAutoAction case + // "recover_from_ready_in_executing_task" in MonitorFlowTools). + var state = CreateState(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.UnresolvedComments.Add(MakeComment()); + state.EnterExecutingTask(); + + var action = MonitorTransitions.ProcessEvent(state, "ready", null, null); + + Assert.Equal("auto_execute", action.Action); + Assert.Equal("recover_from_ready_in_executing_task", action.Task); + // Must NOT have wiped flow state — we still need to know the active comment when + // the wrapper finishes its HEAD check and routes to the right completion event. + Assert.Equal(CommentFlowState.SingleCommentPrompt, state.CommentFlow); + Assert.Single(state.UnresolvedComments); + } + + // ──────────────────────────────────────────────────────────────────── + // Fix B: RecoverFromUnexpectedState is now flow-aware when prior=ExecutingTask + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void RecoverFromUnexpectedState_FromExecutingTaskWithCommentFlow_PreservesFlowAndOffersTreatAsAddressed() + { + // Regression: previously we wiped CommentFlow + cleared ActiveWaitingComment, so + // "Resume monitoring" was the ONLY useful option and the user lost the in-progress + // comment loop. Now the flow is preserved so a follow-up "Treat as comment addressed" + // can dispatch into ProcessCommentAddressed and finish the comment cleanly. + var state = CreateState(); + var comment = MakeComment(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.UnresolvedComments.Add(comment); + state.ActiveWaitingComment = comment; + state.EnterExecutingTask(); + + // Send an event with no transition from ExecutingTask — falls to default recovery. + var action = MonitorTransitions.ProcessEvent(state, "completely_made_up_event", null, null); + + Assert.Equal("ask_user", action.Action); + Assert.Contains("Treat as comment addressed", action.Choices!); + Assert.Contains("Skip this comment", action.Choices!); + // Flow state preserved so the next user_chose can resume in-flow: + Assert.Equal(CommentFlowState.SingleCommentPrompt, state.CommentFlow); + Assert.Same(comment, state.ActiveWaitingComment); + Assert.Equal(MonitorStateId.AwaitingUser, state.CurrentState); + } + + [Fact] + public void RecoverFromUnexpectedState_FromExecutingTaskWithCiFailureFlow_PreservesFlowAndOffersTreatAsPushed() + { + var state = CreateState(); + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + state.EnterExecutingTask(); + + var action = MonitorTransitions.ProcessEvent(state, "completely_made_up_event", null, null); + + Assert.Equal("ask_user", action.Action); + Assert.Contains("Treat as push completed", action.Choices!); + Assert.Equal(CiFailureFlowState.InvestigationResults, state.CiFailureFlow); + Assert.Equal(MonitorStateId.AwaitingUser, state.CurrentState); + } + + [Fact] + public void RecoverFromUnexpectedState_FromExecutingTaskWithNoFlow_OffersGenericChoices() + { + var state = CreateState(); + state.EnterExecutingTask(); + + var action = MonitorTransitions.ProcessEvent(state, "completely_made_up_event", null, null); + + Assert.Equal("ask_user", action.Action); + Assert.Equal(["Resume monitoring", "Stop monitoring"], action.Choices); + } + + [Fact] + public void RecoverFromUnexpectedState_FromNonExecutingTaskState_StillWipesFlowStateAsBefore() + { + // The destructive recovery is intentional for non-ExecutingTask prior states — + // those represent legitimately confusing situations where the safest action is + // to reset to a known clean baseline. Don't regress that behavior. + var state = CreateState(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.ActiveWaitingComment = MakeComment(); + state.CurrentState = MonitorStateId.Polling; + + var action = MonitorTransitions.ProcessEvent(state, "completely_made_up_event", null, null); + + Assert.Equal("ask_user", action.Action); + Assert.Equal(CommentFlowState.None, state.CommentFlow); + Assert.Null(state.ActiveWaitingComment); + } + + // ──────────────────────────────────────────────────────────────────── + // Fix B: "treat_as_addressed" / "treat_as_pushed" choices route correctly + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void UserChose_TreatAsAddressed_DispatchesIntoProcessCommentAddressed() + { + var state = CreateState(); + var comment = MakeComment(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.UnresolvedComments.Add(comment); + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + // No PendingReplyText → ProcessCommentAddressed should ask the agent to compose one + // (rather than blowing up). This is the path used after the wrapper detects that + // HEAD changed but the agent hasn't supplied reply text yet. + state.PendingReplyText = null; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_addressed", null); + + Assert.Equal("execute", action.Action); + Assert.Equal("compose_reply", action.Task); + } + + [Fact] + public void UserChose_TreatAsPushed_TransitionsToPolling() + { + var state = CreateState(); + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + state.CurrentState = MonitorStateId.AwaitingUser; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_pushed", null); + + Assert.Equal("polling", action.Action); + Assert.Equal(MonitorStateId.Polling, state.CurrentState); + Assert.Equal(CiFailureFlowState.None, state.CiFailureFlow); + } + + [Fact] + public void ChoiceValueMap_HasTreatAsAddressedAndTreatAsPushedEntries() + { + // Display strings used in BuildExecutingTaskRecoveryPrompt must round-trip through + // ChoiceValueMap so the wrapper can normalize the user's selection to the expected + // internal value before invoking ProcessEvent. + Assert.Equal("treat_as_addressed", MonitorTransitions.ChoiceValueMap["Treat as comment addressed"]); + Assert.Equal("treat_as_pushed", MonitorTransitions.ChoiceValueMap["Treat as push completed"]); + } +} diff --git a/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs b/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs new file mode 100644 index 0000000..5a64e63 --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs @@ -0,0 +1,224 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; +using PrCopilot.Tools; + +namespace PrCopilot.Tests; + +/// +/// Tests for and the +/// structured payload it produces. These +/// guard the cross-context-boundary fix: when MCP elicitation captures a freeform +/// reply, the agent's chat history has no record of the prompt or reply, so the +/// payload must carry every piece of context the agent needs to act intelligently +/// (comment, recommendation, question shown, user reply). +/// +public class FreeformInterpretContextTests +{ + private static ElicitChoiceResult MakeFreeformResult(string text, string question, params string[] choices) => + new() + { + Value = text, + IsFreeform = true, + OriginalQuestion = question, + OriginalChoices = choices.ToList() + }; + + [Fact] + public void BuildFreeformInterpretAction_PopulatesContextField() + { + var state = new MonitorState(); + var result = MakeFreeformResult("just merge it", "What now?", "Merge the PR", "I'll handle it myself"); + + var action = MonitorFlowTools.BuildFreeformInterpretAction(result, state); + + Assert.Equal("execute", action.Action); + Assert.Equal("interpret_freeform", action.Task); + Assert.NotNull(action.Context); + Assert.IsType(action.Context); + } + + [Fact] + public void BuildFreeformInterpretAction_InstructionsContainBoundaryNotice() + { + // The whole point of the fix: the agent must be told that elicitation happened + // out of band from chat. Without this prefix, the agent dismisses the reply as + // stale state when it doesn't match its chat memory. + var state = new MonitorState(); + var result = MakeFreeformResult("write a test first", "What now?", "Fix it"); + + var action = MonitorFlowTools.BuildFreeformInterpretAction(result, state); + + Assert.NotNull(action.Instructions); + Assert.Contains("MCP elicitation", action.Instructions); + Assert.Contains("authoritative", action.Instructions); + Assert.Contains("chat history", action.Instructions); + } + + [Fact] + public void BuildFreeformInterpretContext_CapturesUserReplyVerbatim() + { + var state = new MonitorState(); + var result = MakeFreeformResult("Lets write a test that proves this", "What now?", "Fix it"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("Lets write a test that proves this", ctx.UserReply.Text); + Assert.True(ctx.UserReply.IsFreeform); + Assert.Equal("custom_instruction", ctx.UserReply.SamplingClassification); + } + + [Fact] + public void BuildFreeformInterpretContext_CapturesElicitationQuestionAndChoices() + { + var state = new MonitorState(); + var result = MakeFreeformResult("custom thing", "How should I handle this comment?", + "Address this comment", "I'll handle it myself"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("How should I handle this comment?", ctx.Elicitation.Question); + Assert.Equal(2, ctx.Elicitation.Choices.Count); + Assert.Equal("Address this comment", ctx.Elicitation.Choices[0].Display); + Assert.Equal("address", ctx.Elicitation.Choices[0].Value); + Assert.Equal("I'll handle it myself", ctx.Elicitation.Choices[1].Display); + Assert.Equal("handle_myself", ctx.Elicitation.Choices[1].Value); + } + + [Fact] + public void BuildFreeformInterpretContext_CommentFlow_AttachesActiveCommentAndRecommendation() + { + // This is the bug-trigger case from the bug report: the user replies "write a + // test that proves this" — "this" refers to the recommendation about the cache + // key. Without comment + recommendation in context, the agent can't resolve + // what "this" means. + var state = new MonitorState + { + CommentFlow = CommentFlowState.SingleCommentPrompt, + CurrentCommentIndex = 0, + UnresolvedComments = + [ + new CommentInfo + { + Id = "thread-1", + Author = "copilot-pull-request-reviewer[bot]", + FilePath = "src/CredentialResolverEngine.cs", + Line = 74, + Body = "CredentialCache is keyed only by the merged credential section content.", + Url = "https://github.com/owner/repo/pull/1#discussion_r1" + } + ], + LastRecommendation = "Salt the cache key with a resolver-chain fingerprint." + }; + var result = MakeFreeformResult("Lets write a test that proves this", + "How would you like to proceed?", "Apply the recommendation", "I'll handle it myself"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("comment", ctx.FlowType); + Assert.NotNull(ctx.Comment); + Assert.Equal("copilot-pull-request-reviewer[bot]", ctx.Comment!.Author); + Assert.Equal("src/CredentialResolverEngine.cs", ctx.Comment.FilePath); + Assert.Equal(74, ctx.Comment.Line); + Assert.Contains("CredentialCache", ctx.Comment.Body); + Assert.Equal("https://github.com/owner/repo/pull/1#discussion_r1", ctx.Comment.Url); + + Assert.NotNull(ctx.ServerAnalysis); + Assert.Equal("Salt the cache key with a resolver-chain fingerprint.", ctx.ServerAnalysis!.Recommendation); + Assert.Null(ctx.CiFailure); + } + + [Fact] + public void BuildFreeformInterpretContext_CommentFlow_NoRecommendation_OmitsServerAnalysis() + { + var state = new MonitorState + { + CommentFlow = CommentFlowState.SingleCommentPrompt, + UnresolvedComments = [new CommentInfo { Author = "alice", FilePath = "f.cs", Body = "..." }], + LastRecommendation = null + }; + var result = MakeFreeformResult("ok", "Q?", "A"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("comment", ctx.FlowType); + Assert.NotNull(ctx.Comment); + Assert.Null(ctx.ServerAnalysis); + } + + [Fact] + public void BuildFreeformInterpretContext_CommentFlow_OutOfRangeIndex_OmitsComment() + { + // Defensive: if CurrentCommentIndex is past the end (shouldn't happen but state + // can drift), don't IndexOutOfRange — just omit the comment. + var state = new MonitorState + { + CommentFlow = CommentFlowState.SingleCommentPrompt, + CurrentCommentIndex = 5, + UnresolvedComments = [new CommentInfo { Author = "alice", FilePath = "f.cs", Body = "..." }] + }; + var result = MakeFreeformResult("ok", "Q?", "A"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("comment", ctx.FlowType); + Assert.Null(ctx.Comment); + } + + [Fact] + public void BuildFreeformInterpretContext_CiFailureFlow_AttachesFailedChecksAndAnalysis() + { + var state = new MonitorState + { + CiFailureFlow = CiFailureFlowState.InvestigationResults, + FailedChecks = + [ + new FailedCheckInfo { Name = "build", Conclusion = "failure", Url = "https://example/build" }, + new FailedCheckInfo { Name = "test", Conclusion = "failure", Url = "https://example/test" } + ], + InvestigationFindings = "Null reference in ConfigParser.", + SuggestedFix = "Add null check at line 42.", + LastRecommendation = "Apply the suggested null check fix." + }; + var result = MakeFreeformResult("explain more", "What now?", "Apply the recommendation"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("ci_failure", ctx.FlowType); + Assert.NotNull(ctx.CiFailure); + Assert.Equal(2, ctx.CiFailure!.FailedChecks.Count); + Assert.Equal("build", ctx.CiFailure.FailedChecks[0].Name); + Assert.Equal("failure", ctx.CiFailure.FailedChecks[0].Conclusion); + + Assert.NotNull(ctx.ServerAnalysis); + Assert.Equal("Null reference in ConfigParser.", ctx.ServerAnalysis!.InvestigationFindings); + Assert.Equal("Add null check at line 42.", ctx.ServerAnalysis.SuggestedFix); + Assert.Equal("Apply the suggested null check fix.", ctx.ServerAnalysis.Recommendation); + Assert.Null(ctx.Comment); + } + + [Fact] + public void BuildFreeformInterpretContext_NoActiveFlow_DefaultsToGeneric() + { + var state = new MonitorState(); + var result = MakeFreeformResult("do the thing", "Q?", "A"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("generic", ctx.FlowType); + Assert.Null(ctx.Comment); + Assert.Null(ctx.CiFailure); + Assert.Null(ctx.ServerAnalysis); + } + + [Fact] + public void BuildFreeformInterpretAction_CarriesMonitorIdForMultiPrMode() + { + var state = new MonitorState(); + var result = MakeFreeformResult("ok", "Q?", "A"); + + var action = MonitorFlowTools.BuildFreeformInterpretAction(result, state, monitorId: "pr-owner-repo-42"); + + Assert.Equal("pr-owner-repo-42", action.MonitorId); + } +} From a765a5db5120152c2191c0674e593bb7da32735d Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 13:43:15 -0700 Subject: [PATCH 02/20] Distinguish sampling-classified custom instruction from sampling-unavailable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses copilot-pull-request-reviewer[bot] feedback on PR #51: FreeformInterpretContext was always setting Reason and SamplingClassification to the "custom instruction" labels, even when sampling failed/returned null (host capability missing, invalid JSON, exception). The payload didn't actually carry the sampling outcome — making the field useless to the agent. - TryClassifyFreeformViaSamplingAsync now returns FreeformClassification? (the full record) instead of string?. Null = sampling unavailable. - BuildFreeformInterpretAction / BuildFreeformInterpretContext take an optional FreeformClassification? — sets Reason/SamplingClassification to sampling_unavailable/unavailable when null, sampling_classified_as_custom_instruction/ custom_instruction when non-null. - UserReplyContext gains an optional SamplingReasoning field that propagates the sampling reasoning into the payload so the agent has insight into WHY sampling decided the text was a custom instruction. - All 3 call sites updated to extract .MapsToChoice for routing and pass the full classification on fallback. 3 tests updated/added; 360 tests pass total. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Tools/FreeformInterpretContext.cs | 12 ++- .../src/PrCopilot/Tools/MonitorFlowTools.cs | 77 +++++++++++++------ .../src/PrCopilot/Tools/UserReplyContext.cs | 18 ++++- .../FreeformInterpretContextTests.cs | 48 +++++++++++- 4 files changed, 125 insertions(+), 30 deletions(-) diff --git a/PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs b/PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs index 24c366f..41eb519 100644 --- a/PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs +++ b/PrCopilot/src/PrCopilot/Tools/FreeformInterpretContext.cs @@ -20,9 +20,15 @@ internal sealed class FreeformInterpretContext public string FlowType { get; set; } = "generic"; /// - /// Why this fallback was chosen. "sampling_classified_as_custom_instruction" - /// when sampling decided the text was a custom instruction; - /// "sampling_unavailable" when sampling threw or returned null unexpectedly. + /// Why the freeform fallback was chosen. Distinguishes the two non-routing outcomes: + /// "sampling_classified_as_custom_instruction" — sampling ran successfully and decided + /// the user's text doesn't map to any of the available choices (it's a deliberate + /// custom instruction the agent should execute). + /// "sampling_unavailable" — sampling didn't run or failed (host capability missing, + /// invalid JSON, exception). The agent must interpret the text without a + /// classification hint. + /// The previous version always set this to "sampling_classified_as_custom_instruction" + /// regardless of which path was taken, which made the field useless to the agent. /// [JsonPropertyName("reason")] public string Reason { get; set; } = "sampling_classified_as_custom_instruction"; diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index 55f48de..7564a76 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -117,30 +117,37 @@ private static async Task TryHandleViaSamplingAsync( } /// - /// Attempt to classify freeform text via sampling. If the text maps to a choice, - /// returns the mapped choice value. If it's a custom instruction or sampling fails, - /// returns null (caller should fall back to agent interpretation via BuildFreeformInterpretAction). + /// + /// Attempt to classify freeform text via sampling. Returns the full classification record + /// (including ) when sampling + /// runs successfully; returns null when sampling is unavailable (no host capability, + /// invalid JSON, exceptions). Callers extract + /// to decide routing — non-null choice means follow Path A, null choice or null classification + /// means fall back to . Pass the full classification + /// to so the payload accurately distinguishes + /// "sampling decided custom" from "sampling unavailable" rather than collapsing both to + /// the same label. /// - private static async Task TryClassifyFreeformViaSamplingAsync( + private static async Task TryClassifyFreeformViaSamplingAsync( McpServer server, ElicitChoiceResult result, CancellationToken cancellationToken) { - var classification = await SamplingHelper.ClassifyFreeformAsync( + return await SamplingHelper.ClassifyFreeformAsync( server, result.Value, result.OriginalQuestion ?? "", result.OriginalChoices, cancellationToken); - - return classification?.MapsToChoice; } /// /// Build an execute action for freeform text interpretation by the agent. - /// Used when sampling classification returns null for a custom instruction, or when - /// sampling classification fails (e.g., invalid JSON, exceptions) and the caller falls - /// back to agent interpretation of the user's freeform text. + /// Used when sampling classification returns a custom-instruction classification (mapsToChoice=null), + /// or when sampling itself was unavailable (returned null) and the caller falls back to agent + /// interpretation. Pass when sampling succeeded so the payload + /// can record the actual outcome (custom_instruction vs unavailable) and the sampling reasoning; + /// pass null when sampling was unavailable or failed. /// /// The agent's chat history has NO record of the elicitation that produced this reply /// (MCP elicitation flows separately from chat). Without explicit context, the agent @@ -149,9 +156,13 @@ private static async Task TryHandleViaSamplingAsync( /// gives the agent everything it needs: the comment/CI under discussion, the prompt /// shown, the user's reply, and any server-side analysis the user is reacting to. /// - internal static MonitorAction BuildFreeformInterpretAction(ElicitChoiceResult result, MonitorState state, string? monitorId = null) + internal static MonitorAction BuildFreeformInterpretAction( + ElicitChoiceResult result, + MonitorState state, + SamplingHelper.FreeformClassification? classification = null, + string? monitorId = null) { - var context = BuildFreeformInterpretContext(result, state); + var context = BuildFreeformInterpretContext(result, state, classification); var pathBInstructions = BuildFreeformPathBInstructions(state); var instructions = @@ -185,12 +196,25 @@ internal static MonitorAction BuildFreeformInterpretAction(ElicitChoiceResult re /// Build the structured context payload attached to an interpret_freeform action. /// Pulls comment/CI/recommendation context from the current MonitorState so the /// agent can resolve pronoun references and understand what the user is reacting to. + /// + /// records the sampling outcome: + /// - non-null with MapsToChoice == null → sampling decided this is a custom instruction + /// - null → sampling was unavailable / failed (caller fell back to agent interpretation) + /// The two cases set distinct / + /// values so the agent can + /// tell whether sampling actually classified the text or simply wasn't available. /// - internal static FreeformInterpretContext BuildFreeformInterpretContext(ElicitChoiceResult result, MonitorState state) + internal static FreeformInterpretContext BuildFreeformInterpretContext( + ElicitChoiceResult result, + MonitorState state, + SamplingHelper.FreeformClassification? classification = null) { + var samplingAvailable = classification != null; var ctx = new FreeformInterpretContext { - Reason = "sampling_classified_as_custom_instruction", + Reason = samplingAvailable + ? "sampling_classified_as_custom_instruction" + : "sampling_unavailable", Elicitation = new ElicitationContext { Question = result.OriginalQuestion ?? "", @@ -204,7 +228,8 @@ internal static FreeformInterpretContext BuildFreeformInterpretContext(ElicitCho { Text = result.Value, IsFreeform = true, - SamplingClassification = "custom_instruction" + SamplingClassification = samplingAvailable ? "custom_instruction" : "unavailable", + SamplingReasoning = classification?.Reasoning } }; @@ -754,7 +779,8 @@ await WriteLogEntryAsync( // Freeform text — try sampling classification first, fall back to agent if (elicitResult.IsFreeform) { - var mappedChoice = await TryClassifyFreeformViaSamplingAsync(server!, elicitResult, cancellationToken); + var classification = await TryClassifyFreeformViaSamplingAsync(server!, elicitResult, cancellationToken); + var mappedChoice = classification?.MapsToChoice; if (mappedChoice != null) { DebugLogger.Log("NextStep", $"Multi-PR sampling classified freeform as choice: {mappedChoice}"); @@ -768,11 +794,12 @@ await WriteLogEntryAsync( }); } - // Custom instruction — delegate to agent + // Custom instruction OR sampling unavailable — delegate to agent. + // Pass `classification` so the payload distinguishes the two outcomes. var freeformState = action.MonitorId != null && _sessions.TryGetValue(action.MonitorId, out var fs) ? fs.State : heartbeatSession.State; freeformState.EnterExecutingTask(); - var freeformAction = BuildFreeformInterpretAction(elicitResult, freeformState, action.MonitorId); + var freeformAction = BuildFreeformInterpretAction(elicitResult, freeformState, classification, action.MonitorId); return SerializeAction(freeformAction); } @@ -860,7 +887,8 @@ await WriteLogEntryAsync( // Freeform text — try sampling classification first, fall back to agent if (triggerResult.IsFreeform) { - var mappedChoice = await TryClassifyFreeformViaSamplingAsync(server, triggerResult, cancellationToken); + var classification = await TryClassifyFreeformViaSamplingAsync(server, triggerResult, cancellationToken); + var mappedChoice = classification?.MapsToChoice; if (mappedChoice != null) { DebugLogger.Log("NextStep", $"Trigger sampling classified freeform as choice: {mappedChoice}"); @@ -869,9 +897,9 @@ await WriteLogEntryAsync( } else { - // Custom instruction — delegate to agent + // Custom instruction OR sampling unavailable — delegate to agent. state.EnterExecutingTask(); - return SerializeAction(BuildFreeformInterpretAction(triggerResult, state)); + return SerializeAction(BuildFreeformInterpretAction(triggerResult, state, classification)); } } @@ -988,7 +1016,8 @@ await WriteLogEntryAsync( // Freeform text — try sampling classification first, fall back to agent if (elicitResult.IsFreeform) { - var mappedChoice = await TryClassifyFreeformViaSamplingAsync(server, elicitResult, cancellationToken); + var classification = await TryClassifyFreeformViaSamplingAsync(server, elicitResult, cancellationToken); + var mappedChoice = classification?.MapsToChoice; if (mappedChoice != null) { DebugLogger.Log("NextStep", $"Sampling classified freeform as choice: {mappedChoice}"); @@ -997,9 +1026,9 @@ await WriteLogEntryAsync( continue; } - // Custom instruction — delegate to agent + // Custom instruction OR sampling unavailable — delegate to agent. state.EnterExecutingTask(); - action = BuildFreeformInterpretAction(elicitResult, state); + action = BuildFreeformInterpretAction(elicitResult, state, classification); break; } diff --git a/PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs b/PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs index 21c7b1a..077c9a8 100644 --- a/PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs +++ b/PrCopilot/src/PrCopilot/Tools/UserReplyContext.cs @@ -20,9 +20,23 @@ internal sealed class UserReplyContext public bool IsFreeform { get; set; } = true; /// - /// How sampling classified the text. "custom_instruction" means sampling decided - /// the text doesn't map to any choice; "unavailable" means sampling didn't run or failed. + /// How sampling classified the text. "custom_instruction" means sampling ran and decided + /// the text doesn't map to any of the available choices (so it must be a custom instruction); + /// "unavailable" means sampling didn't run or failed (e.g., the host didn't grant the sampling + /// capability, the model returned invalid JSON, or an exception was raised). The agent should + /// distinguish these — "custom_instruction" is a deliberate decision the agent can trust; + /// "unavailable" means the agent must do all the work of interpreting the text itself. /// [JsonPropertyName("samplingClassification")] public string SamplingClassification { get; set; } = "custom_instruction"; + + /// + /// When sampling ran, the brief reasoning it provided for its classification decision + /// (e.g., "user wrote a custom test request, doesn't match any of the choices"). Null + /// when sampling was unavailable. Useful context for the agent — it can echo the + /// reasoning to the user or use it to make a finer-grained decision. + /// + [JsonPropertyName("samplingReasoning")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? SamplingReasoning { get; set; } } diff --git a/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs b/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs index 5a64e63..936341d 100644 --- a/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs @@ -60,12 +60,58 @@ public void BuildFreeformInterpretContext_CapturesUserReplyVerbatim() { var state = new MonitorState(); var result = MakeFreeformResult("Lets write a test that proves this", "What now?", "Fix it"); + var classification = new SamplingHelper.FreeformClassification + { + MapsToChoice = null, + Reasoning = "User wants to verify the bug before fixing it — doesn't match any choice." + }; - var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state, classification); Assert.Equal("Lets write a test that proves this", ctx.UserReply.Text); Assert.True(ctx.UserReply.IsFreeform); Assert.Equal("custom_instruction", ctx.UserReply.SamplingClassification); + Assert.Equal("sampling_classified_as_custom_instruction", ctx.Reason); + Assert.Equal(classification.Reasoning, ctx.UserReply.SamplingReasoning); + } + + [Fact] + public void BuildFreeformInterpretContext_NullClassification_RecordsSamplingUnavailable() + { + // When sampling fails (host capability missing, invalid JSON, exception), + // TryClassifyFreeformViaSamplingAsync returns null and the caller falls back to + // BuildFreeformInterpretAction. The payload must reflect that distinction — + // previously both outcomes collapsed to "custom_instruction" / "sampling_classified_..." + // which made the field useless to the agent. + var state = new MonitorState(); + var result = MakeFreeformResult("Lets write a test that proves this", "What now?", "Fix it"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state, classification: null); + + Assert.Equal("unavailable", ctx.UserReply.SamplingClassification); + Assert.Equal("sampling_unavailable", ctx.Reason); + Assert.Null(ctx.UserReply.SamplingReasoning); + } + + [Fact] + public void BuildFreeformInterpretContext_ClassificationWithReasoning_PropagatesReasoningToPayload() + { + // The sampling reasoning gives the agent insight into WHY sampling decided + // the text was a custom instruction (vs a near-miss for one of the choices). + // Callers shouldn't need to log it separately — it should live in the payload. + var state = new MonitorState(); + var result = MakeFreeformResult("write a test first then fix", "What now?", + "Address this comment", "I'll handle it myself"); + var classification = new SamplingHelper.FreeformClassification + { + MapsToChoice = null, + Reasoning = "User asks for test-first workflow — not a clean match for either choice." + }; + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state, classification); + + Assert.Equal("User asks for test-first workflow — not a clean match for either choice.", + ctx.UserReply.SamplingReasoning); } [Fact] From b8bef58b42514a6c8bd7fdf7e034909722031397 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 13:47:23 -0700 Subject: [PATCH 03/20] Attach comment context for waiting-for-reply elicitations too Addresses copilot-pull-request-reviewer[bot] feedback on PR #51: BuildFreeformInterpretContext only attached comment context when state.CommentFlow != None. ProcessWaitingCommentChoice operates with state.ActiveWaitingComment set but state.CommentFlow == None (the comment flow ended when the reply was posted; the comment is now waiting for the reviewer's response). A freeform reply during that elicitation lost the comment context, reintroducing the original context-loss bug for that flow. Adds an else-if branch: when CommentFlow is None and ActiveWaitingComment is set, populate ctx.Comment + FlowType from ActiveWaitingComment. The active comment-flow path still takes precedence (it's what the user is currently being prompted about). Tests: WaitingForReply_ActiveCommentSetButNoCommentFlow_StillAttachesComment (was failing, now passes); CommentFlow_PrefersUnresolvedCommentsOverActiveWaiting guards against ActiveWaitingComment hijacking the payload during a real flow. 362 tests pass total. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/PrCopilot/Tools/MonitorFlowTools.cs | 27 ++++++++ .../FreeformInterpretContextTests.cs | 69 +++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index 7564a76..1549311 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -258,6 +258,33 @@ internal static FreeformInterpretContext BuildFreeformInterpretContext( }; } } + // Waiting-for-reply branch: ProcessWaitingCommentChoice operates with + // ActiveWaitingComment set but CommentFlow == None (the comment flow ended + // when the reply was posted; the comment is now waiting for the reviewer's + // response). A freeform reply during that elicitation must still carry the + // comment context — otherwise the agent loses sight of which thread is + // being discussed, reintroducing the original context-loss bug for that flow. + else if (state.ActiveWaitingComment != null) + { + ctx.FlowType = "comment"; + var c = state.ActiveWaitingComment; + ctx.Comment = new CommentContext + { + Author = c.Author, + FilePath = c.FilePath, + Line = c.Line, + Body = c.Body, + Url = c.Url + }; + + if (!string.IsNullOrWhiteSpace(state.LastRecommendation)) + { + ctx.ServerAnalysis = new ServerAnalysisContext + { + Recommendation = state.LastRecommendation + }; + } + } // CI failure flow: attach failed checks + investigation/recommendation else if (state.CiFailureFlow != CiFailureFlowState.None) { diff --git a/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs b/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs index 936341d..c42f068 100644 --- a/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/FreeformInterpretContextTests.cs @@ -267,4 +267,73 @@ public void BuildFreeformInterpretAction_CarriesMonitorIdForMultiPrMode() Assert.Equal("pr-owner-repo-42", action.MonitorId); } + + [Fact] + public void BuildFreeformInterpretContext_WaitingForReply_ActiveCommentSetButNoCommentFlow_StillAttachesComment() + { + // Regression: ProcessWaitingCommentChoice operates with state.ActiveWaitingComment set + // but state.CommentFlow == None (the comment flow ended when the reply was posted; the + // comment is now waiting for the reviewer's response). A freeform reply during THAT + // elicitation must still carry the comment context — otherwise the agent loses sight + // of which comment thread is being discussed, reintroducing the original context-loss bug. + var state = new MonitorState + { + CommentFlow = CommentFlowState.None, + ActiveWaitingComment = new CommentInfo + { + Id = "thread-1", + Author = "human-reviewer", + FilePath = "src/Service.cs", + Line = 42, + Body = "Are you sure this handles the null case?", + Url = "https://github.com/o/r/pull/1#discussion_r999" + } + }; + var result = MakeFreeformResult("yes, it does — see the test I added", "What do you want to do?", + "Resolve this thread", "Go back to monitoring"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("comment", ctx.FlowType); + Assert.NotNull(ctx.Comment); + Assert.Equal("human-reviewer", ctx.Comment!.Author); + Assert.Equal("src/Service.cs", ctx.Comment.FilePath); + Assert.Equal(42, ctx.Comment.Line); + Assert.Equal("Are you sure this handles the null case?", ctx.Comment.Body); + } + + [Fact] + public void BuildFreeformInterpretContext_CommentFlow_PrefersUnresolvedCommentsOverActiveWaiting() + { + // When a comment flow is active AND ActiveWaitingComment happens to be set + // (carried over from a previous waiting-for-reply cycle), the active comment + // flow takes priority — that's what the user is currently being prompted about. + var state = new MonitorState + { + CommentFlow = CommentFlowState.SingleCommentPrompt, + CurrentCommentIndex = 0, + ActiveWaitingComment = new CommentInfo + { + Id = "stale-waiting", + Author = "old-reviewer", + FilePath = "src/Old.cs", + Body = "old discussion" + } + }; + state.UnresolvedComments.Add(new CommentInfo + { + Id = "current", + Author = "current-reviewer", + FilePath = "src/New.cs", + Body = "current discussion" + }); + var result = MakeFreeformResult("address it", "How?", "Address this comment"); + + var ctx = MonitorFlowTools.BuildFreeformInterpretContext(result, state); + + Assert.Equal("comment", ctx.FlowType); + Assert.NotNull(ctx.Comment); + Assert.Equal("current-reviewer", ctx.Comment!.Author); + Assert.Equal("src/New.cs", ctx.Comment.FilePath); + } } From 126fed6698c0d04e4e0f83c6869acca4d157b72e Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 13:55:57 -0700 Subject: [PATCH 04/20] Don't leak synthetic 'ready_unresolved' event name into user prompt The auto_execute no-push fallback in recover_from_ready_in_executing_task was dispatching ProcessEvent(state, 'ready_unresolved', ...) which fell through to RecoverFromUnexpectedState and interpolated the literal string 'ready_unresolved' into the user-facing question: Unexpected event 'ready_unresolved' while addressing a comment. ... Refactor BuildExecutingTaskRecoveryPrompt to take a user-friendly reasonText parameter (was eventType), expose it as internal, and have the auto_execute fallback call it directly with 'Looks like the task finished without a new commit'. The genuine-unknown-event path in RecoverFromUnexpectedState still surfaces the event name (useful debug info for real bugs). Adds 4 tests covering the three flows and the preserved debug behavior. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 41 ++++++--- .../src/PrCopilot/Tools/MonitorFlowTools.cs | 22 +++-- .../ExecutingTaskReadyRecoveryTests.cs | 86 +++++++++++++++++++ 3 files changed, 130 insertions(+), 19 deletions(-) diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 86d7148..396bebb 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -352,7 +352,13 @@ private static MonitorAction RecoverFromUnexpectedState(MonitorState state, stri // lose their place. For other prior states, keep the original aggressive cleanup. if (priorState == MonitorStateId.ExecutingTask) { - return BuildExecutingTaskRecoveryPrompt(state, eventType, priorCommentFlow, priorCiFailureFlow); + // Genuine unknown event from agent — surface the event name in the prompt for + // debug visibility (this is a real bug, not a designed fallback path). + return BuildExecutingTaskRecoveryPrompt( + state, + reasonText: $"The agent sent an unexpected event '{eventType}'", + priorCommentFlow, + priorCiFailureFlow); } state.CommentFlow = CommentFlowState.None; @@ -367,15 +373,26 @@ private static MonitorAction RecoverFromUnexpectedState(MonitorState state, stri } /// - /// Recovery prompt for unexpected events arriving while in . - /// Offers flow-aware choices so the user can mark the task done in-flow rather than - /// being forced to "Resume monitoring" (which would wipe the flow state and restart polling). - /// Flow state (CommentFlow / CiFailureFlow / current comment index) is preserved so the - /// follow-up user_chose can dispatch into the correct flow handler. + /// Recovery prompt for events that arrive while in + /// without a matching transition. Offers flow-aware choices so the user can mark the task + /// done in-flow rather than being forced to "Resume monitoring" (which would wipe the flow + /// state and restart polling). Flow state (CommentFlow / CiFailureFlow / current comment + /// index) is preserved so the follow-up user_chose can dispatch into the correct + /// flow handler. + /// + /// is the user-facing explanation for why the prompt appeared + /// (e.g., "Looks like the task finished without a new commit" for the no-push fallback, + /// or "The agent sent an unexpected event 'foo'" for genuine unknown events). It is + /// interpolated verbatim into the question text — DO NOT pass internal synthetic event + /// names directly, or they will leak to the user. Reviewer feedback on PR #51 caught + /// the original implementation interpolating "ready_unresolved" into the prompt. + /// + /// Internal so the auto_execute no-push fallback in MonitorFlowTools can call it + /// directly instead of routing through ProcessEvent with a synthetic event name. /// - private static MonitorAction BuildExecutingTaskRecoveryPrompt( + internal static MonitorAction BuildExecutingTaskRecoveryPrompt( MonitorState state, - string eventType, + string reasonText, CommentFlowState priorCommentFlow, CiFailureFlowState priorCiFailureFlow) { @@ -384,7 +401,7 @@ private static MonitorAction BuildExecutingTaskRecoveryPrompt( return new MonitorAction { Action = "ask_user", - Question = $"Unexpected event '{eventType}' while addressing a comment. " + + Question = $"{reasonText} while addressing a comment. " + "If you finished the work, pick how to mark it; otherwise resume or stop.", Choices = [ @@ -401,7 +418,7 @@ private static MonitorAction BuildExecutingTaskRecoveryPrompt( return new MonitorAction { Action = "ask_user", - Question = $"Unexpected event '{eventType}' while investigating a CI failure. " + + Question = $"{reasonText} while investigating a CI failure. " + "If you finished the work, pick how to mark it; otherwise resume or stop.", Choices = [ @@ -412,12 +429,12 @@ private static MonitorAction BuildExecutingTaskRecoveryPrompt( }; } - // No active flow — same as the generic recovery + // No active flow — generic recovery state.ActiveWaitingComment = null; return new MonitorAction { Action = "ask_user", - Question = $"Unexpected state: ExecutingTask/{eventType}. What would you like to do?", + Question = $"{reasonText}. What would you like to do?", Choices = ["Resume monitoring", "Stop monitoring"] }; } diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index 1549311..e722091 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -1618,13 +1618,21 @@ private static async Task ExecuteAutoAction(MonitorState state, M return MonitorTransitions.ProcessEvent(state, "user_chose", "resume", null); } - // No push detected — fall through to the recovery prompt. We invoke the - // legacy default-recovery handler explicitly by sending an unknown event; - // ProcessEvent's _ branch routes through RecoverFromUnexpectedState, which - // is now flow-aware (preserves CommentFlow / CiFailureFlow when prior state - // was ExecutingTask, so the user's choice can resume in-flow). - DebugLogger.Log("AutoExec", "recover_from_ready: HEAD unchanged — falling back to flow-aware ask_user"); - return MonitorTransitions.ProcessEvent(state, "ready_unresolved", null, null); + // No push detected — call the recovery prompt builder directly with a + // user-friendly reason. Previously we dispatched ProcessEvent with a + // synthetic event name "ready_unresolved" which leaked into the prompt + // text as "Unexpected event 'ready_unresolved' ..." (reviewer feedback + // on PR #51). Direct call preserves CommentFlow / CiFailureFlow state + // and produces a meaningful question. + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD unchanged — building flow-aware no-push recovery prompt"); + var priorCommentFlow = state.CommentFlow; + var priorCiFailureFlow = state.CiFailureFlow; + state.CurrentState = MonitorStateId.AwaitingUser; + return MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow, + priorCiFailureFlow); } case "resolve_thread": { diff --git a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs index aa68ed4..495cef9 100644 --- a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs @@ -212,4 +212,90 @@ public void ChoiceValueMap_HasTreatAsAddressedAndTreatAsPushedEntries() Assert.Equal("treat_as_addressed", MonitorTransitions.ChoiceValueMap["Treat as comment addressed"]); Assert.Equal("treat_as_pushed", MonitorTransitions.ChoiceValueMap["Treat as push completed"]); } + + // ──────────────────────────────────────────────────────────────────── + // No-push fallback prompt — must NOT leak the synthetic "ready_unresolved" + // event name into the user-facing question text. Reviewer comment #3 on PR #51. + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void BuildExecutingTaskRecoveryPrompt_CommentFlow_NoPushReason_DoesNotLeakSyntheticEventName() + { + // Before the fix, the auto_execute no-push fallback called + // ProcessEvent(state, "ready_unresolved", ...) which routed through the generic + // recovery and interpolated the literal string "ready_unresolved" into the prompt. + // The user saw "Unexpected event 'ready_unresolved' while addressing a comment ..." + // which exposes an internal name. The fix takes a user-friendly reasonText instead. + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment()); + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow: CommentFlowState.SingleCommentPrompt, + priorCiFailureFlow: CiFailureFlowState.None); + + Assert.Equal("ask_user", action.Action); + Assert.NotNull(action.Question); + Assert.DoesNotContain("ready_unresolved", action.Question); + Assert.DoesNotContain("Unexpected event", action.Question); + Assert.Contains("without a new commit", action.Question); + Assert.Contains("Treat as comment addressed", action.Choices!); + } + + [Fact] + public void BuildExecutingTaskRecoveryPrompt_CiFlow_NoPushReason_DoesNotLeakSyntheticEventName() + { + var state = CreateState(); + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow: CommentFlowState.None, + priorCiFailureFlow: CiFailureFlowState.InvestigationResults); + + Assert.NotNull(action.Question); + Assert.DoesNotContain("ready_unresolved", action.Question); + Assert.DoesNotContain("Unexpected event", action.Question); + Assert.Contains("without a new commit", action.Question); + Assert.Contains("Treat as push completed", action.Choices!); + } + + [Fact] + public void BuildExecutingTaskRecoveryPrompt_NoActiveFlow_NoPushReason_DoesNotLeakSyntheticEventName() + { + var state = CreateState(); + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow: CommentFlowState.None, + priorCiFailureFlow: CiFailureFlowState.None); + + Assert.NotNull(action.Question); + Assert.DoesNotContain("ready_unresolved", action.Question); + Assert.DoesNotContain("ExecutingTask/", action.Question); + Assert.Contains("without a new commit", action.Question); + } + + [Fact] + public void RecoverFromUnexpectedState_FromExecutingTaskWithGenuineUnknownEvent_StillSurfacesEventNameForDebugging() + { + // The truly-unknown event path (an actual bug — agent sent something we don't know + // about) keeps surfacing the event name because it's useful debug info. Only the + // designed no-push fallback should hide the synthetic name. + var state = CreateState(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.UnresolvedComments.Add(MakeComment()); + state.EnterExecutingTask(); + + var action = MonitorTransitions.ProcessEvent(state, "totally_made_up_event_xyz", null, null); + + Assert.NotNull(action.Question); + // Genuine unknown events still get the event name interpolated for debug visibility: + Assert.Contains("totally_made_up_event_xyz", action.Question); + } } From 0cd0b533dae94b9bc9bac056daffc5fe11aaf900 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 15:11:04 -0700 Subject: [PATCH 05/20] Add 'Treat as comment replied' option to comment-flow recovery prompt The ExecutingTask no-push recovery prompt only offered 'Treat as comment addressed' for comment-flow completion, which routes through ProcessCommentAddressed and resolves the thread. Reviewer feedback on PR #51 pointed out that comment tasks can also legitimately finish as comment_replied (pushback or clarification), and forcing those through the addressed path incorrectly resolves a thread that should remain open for the reviewer. Adds 'Treat as comment replied' as a fifth choice. It maps to treat_as_replied_externally and routes to SkipAndAdvanceComment, which advances past the current comment without posting anything new and without resolving the thread. Intended for the case where the user already handled the reply outside the loop (e.g., replied directly on GitHub). Adds 4 tests covering the new choice, the ChoiceValueMap entry, the advance-without-posting behavior, and the transition-to-polling on the last comment. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 14 +++- .../ExecutingTaskReadyRecoveryTests.cs | 83 +++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 396bebb..494656c 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -116,6 +116,7 @@ public static MonitorAction BuildTerminalAction(MonitorState state, TerminalStat ["I'll handle them myself"] = "handle_myself", ["Skip this comment"] = "skip", ["Treat as comment addressed"] = "treat_as_addressed", + ["Treat as comment replied"] = "treat_as_replied_externally", ["Done — resume monitoring"] = "done", ["Address next comment"] = "continue", ["I'll handle the rest myself"] = "done", @@ -406,6 +407,7 @@ internal static MonitorAction BuildExecutingTaskRecoveryPrompt( Choices = [ "Treat as comment addressed", + "Treat as comment replied", "Skip this comment", "Resume monitoring", "Stop monitoring" @@ -551,12 +553,18 @@ private static MonitorAction BuildCommentAction(MonitorState state, string times private static MonitorAction ProcessCommentChoice(MonitorState state, string? choice) { - // Recovery shortcut available from any comment-flow sub-state — the (ExecutingTask, "ready") - // recovery path offers this when HEAD did NOT advance during the task. Routes through - // ProcessCommentAddressed which gracefully composes a reply if PendingReplyText is empty. + // Recovery shortcuts available from any comment-flow sub-state — the (ExecutingTask, "ready") + // recovery path offers these when HEAD did NOT advance during the task. + // - treat_as_addressed: routes through ProcessCommentAddressed (resolves the thread). + // - treat_as_replied_externally: user already handled the reply outside the loop + // (e.g., replied directly on GitHub or chose to leave it). Don't post anything, + // don't resolve — just advance past this comment. if (choice == "treat_as_addressed") return ProcessCommentAddressed(state, null); + if (choice == "treat_as_replied_externally") + return SkipAndAdvanceComment(state); + return (state.CommentFlow, choice) switch { (CommentFlowState.MultiCommentPrompt, "address_all") => BeginAddressAll(state), diff --git a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs index 495cef9..fefc484 100644 --- a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs @@ -298,4 +298,87 @@ public void RecoverFromUnexpectedState_FromExecutingTaskWithGenuineUnknownEvent_ // Genuine unknown events still get the event name interpolated for debug visibility: Assert.Contains("totally_made_up_event_xyz", action.Question); } + + // ──────────────────────────────────────────────────────────────────── + // Comment-flow recovery must offer both completion paths: the existing + // "Treat as comment addressed" (resolves thread) AND a new "Treat as + // comment replied" path that advances without posting anything new + // (user already handled the reply outside the loop). Reviewer comment + // #4 on PR #51 — the original recovery only exposed the addressed path, + // so a pushback/clarification flow that finished without push would force + // the user to either resolve the thread (wrong) or reset the flow state. + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void BuildExecutingTaskRecoveryPrompt_CommentFlow_OffersTreatAsCommentRepliedChoice() + { + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment()); + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow: CommentFlowState.SingleCommentPrompt, + priorCiFailureFlow: CiFailureFlowState.None); + + Assert.NotNull(action.Choices); + Assert.Contains("Treat as comment addressed", action.Choices!); + Assert.Contains("Treat as comment replied", action.Choices!); + } + + [Fact] + public void ChoiceValueMap_HasTreatAsRepliedExternallyEntry() + { + Assert.Equal( + "treat_as_replied_externally", + MonitorTransitions.ChoiceValueMap["Treat as comment replied"]); + } + + [Fact] + public void ProcessEvent_TreatAsRepliedExternally_AdvancesWithoutPostingOrResolving() + { + // Two unresolved comments. User picks "Treat as comment replied" on the first — + // we should NOT post a reply (no compose_reply / post_reply / resolve_thread action), + // we should NOT resolve the thread, and we SHOULD advance to the next comment. + var state = CreateState(); + var c1 = MakeComment(id: "c1"); + var c2 = MakeComment(id: "c2"); + state.UnresolvedComments.Add(c1); + state.UnresolvedComments.Add(c2); + state.CommentFlow = CommentFlowState.AddressAllIterating; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_replied_externally", null); + + // Did NOT post anything — task should not be a reply/resolve action. + Assert.NotEqual("compose_reply", action.Task); + Assert.NotEqual("post_reply", action.Task); + Assert.NotEqual("resolve_thread", action.Task); + + // Did NOT mark the comment as addressed (no thread resolution). + Assert.False(c1.IsAddressed); + Assert.False(state.PendingResolveAfterAddress); + + // Advanced to the next comment (index incremented OR transitioned to polling). + Assert.True(state.CurrentCommentIndex >= 1); + } + + [Fact] + public void ProcessEvent_TreatAsRepliedExternally_LastComment_TransitionsToPolling() + { + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment(id: "only")); + state.CommentFlow = CommentFlowState.AddressAllIterating; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_replied_externally", null); + + // After the last comment, we should drop back to polling. + Assert.Equal(MonitorStateId.Polling, state.CurrentState); + Assert.NotEqual("compose_reply", action.Task); + Assert.NotEqual("post_reply", action.Task); + } } From 6bb39bedf967ec29dbe57dc2c1c8976401d9c0b9 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 15:18:46 -0700 Subject: [PATCH 06/20] Don't assume comment_addressed for ambiguous push-detected recovery The (ExecutingTask, 'ready') auto-recovery dispatched comment_addressed whenever HEAD advanced and a comment flow was active. But the apply_recommendation task documents two completion paths: - Implement fix -> event=comment_addressed (resolve thread) - Pushback w/ test -> event=comment_replied (keep thread open) For the proving-test pushback path, dispatching comment_addressed incorrectly resolved a human review thread that should have stayed open for the reviewer to respond to. Fix: - Add MonitorState.ExecutingTaskExpectedCompletion (nullable). Reset to null in EnterExecutingTask. - EmitAddressCommentAction sets it to 'comment_addressed' (single documented completion path). - BeginApplyRecommendation leaves it null (ambiguous - both paths possible). - Extract recovery decision logic into testable helper MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced). ExecuteAutoAction does the gh HEAD fetch then delegates. - Decision: when HEAD advanced + comment flow + ExpectedCompletion == 'comment_addressed' -> auto-dispatch (preserves convenience for the unambiguous case). Otherwise -> fall back to flow-aware ask_user prompt so the user disambiguates between addressed and replied. Adds 5 tests covering the ambiguous-task ask_user fallback, the unambiguous-task auto-dispatch, the no-push regression, and the ExpectedCompletion assignment in both emitters. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../PrCopilot/StateMachine/MonitorState.cs | 17 +++ .../StateMachine/MonitorTransitions.cs | 75 +++++++++++ .../src/PrCopilot/Tools/MonitorFlowTools.cs | 53 ++------ .../ExecutingTaskReadyRecoveryTests.cs | 121 ++++++++++++++++++ 4 files changed, 225 insertions(+), 41 deletions(-) diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs index 6c7a21e..2e10e16 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs @@ -110,6 +110,20 @@ public class MonitorState /// Transient: completion event set by EmitComposeReplyAction for the sampling compose_reply handler. public string? PendingCompletionEvent { get; set; } + /// + /// Expected completion event for the currently-executing task, when known unambiguously. + /// Read by the (ExecutingTask, "ready") recovery path: when HEAD has advanced and this + /// value matches a single completion event (e.g., "comment_addressed", "push_completed"), + /// the recovery dispatches that event automatically. When null (ambiguous — e.g., the + /// apply_recommendation task can complete as either "comment_addressed" for an implementation + /// push OR "comment_replied" for a proving-test push), the recovery falls back to the + /// flow-aware ask_user prompt so the user disambiguates instead of the engine guessing. + /// + /// Reset to null on every call; emitters that know + /// their task's unambiguous completion event must set this explicitly after that call. + /// + public string? ExecutingTaskExpectedCompletion { get; set; } + /// /// When set, ProcessTaskComplete calls AdvanceAfterComment with this summary. /// Used after posting a thread reply (auto_execute) when there's no subsequent resolve step. @@ -154,5 +168,8 @@ public void EnterExecutingTask() { CurrentState = MonitorStateId.ExecutingTask; HeadShaAtTaskStart = HeadSha; + // Reset expected completion to "ambiguous" on every entry. Emitters that know their + // task's single completion event must set this AFTER calling EnterExecutingTask. + ExecutingTaskExpectedCompletion = null; } } diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 494656c..0eaa060 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -459,6 +459,76 @@ private static MonitorAction BuildRecoverFromReadyAction(MonitorState state) }; } + /// + /// Pure decision logic for the recover_from_ready_in_executing_task auto_execute + /// dispatch. Extracted from MonitorFlowTools.ExecuteAutoAction so it can be + /// unit-tested without going through the async HEAD-fetch path. + /// + /// Inputs: the state (CommentFlow / CiFailureFlow / ExecutingTaskExpectedCompletion) + /// and a precomputed flag (true when the latest HEAD + /// differs from ). + /// + /// Behavior: + /// - headAdvanced + comment flow: dispatch comment_addressed ONLY when + /// is exactly + /// "comment_addressed" (the task has a single documented completion path). + /// Otherwise (ambiguous task such as apply_recommendation which can complete as + /// comment_addressed for an implementation push OR comment_replied for a + /// proving-test push), fall back to the flow-aware ask_user prompt so the user + /// disambiguates instead of the engine guessing wrong and incorrectly resolving a + /// thread that should stay open for the reviewer. + /// - headAdvanced + CI flow: dispatch push_completed (the CI flow has a + /// single completion path). + /// - headAdvanced + no flow: resume polling. + /// - !headAdvanced: build the no-push recovery prompt. + /// + internal static MonitorAction BuildRecoverFromReadyResolution(MonitorState state, bool headAdvanced) + { + if (headAdvanced && state.CommentFlow != CommentFlowState.None) + { + if (state.ExecutingTaskExpectedCompletion == "comment_addressed") + { + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + comment flow + unambiguous → dispatching comment_addressed"); + return ProcessEvent(state, "comment_addressed", null, null); + } + + // Ambiguous task (e.g., apply_recommendation) — fall back to ask_user. + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + comment flow + ambiguous task → asking user how to mark it"); + var priorCommentFlow = state.CommentFlow; + var priorCiFailureFlow = state.CiFailureFlow; + state.CurrentState = MonitorStateId.AwaitingUser; + return BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "The task pushed a commit but didn't tell me whether to resolve the thread or leave it for the reviewer", + priorCommentFlow, + priorCiFailureFlow); + } + + if (headAdvanced && state.CiFailureFlow != CiFailureFlowState.None) + { + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + CI flow → dispatching push_completed"); + return ProcessEvent(state, "push_completed", null, null); + } + + if (headAdvanced) + { + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + no active flow → resuming polling"); + state.CurrentState = MonitorStateId.AwaitingUser; + return ProcessEvent(state, "user_chose", "resume", null); + } + + // No push detected — flow-aware no-push recovery prompt. + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD unchanged — building flow-aware no-push recovery prompt"); + var noPushPriorCommentFlow = state.CommentFlow; + var noPushPriorCiFailureFlow = state.CiFailureFlow; + state.CurrentState = MonitorStateId.AwaitingUser; + return BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + noPushPriorCommentFlow, + noPushPriorCiFailureFlow); + } + private static string ShortSha(string? sha) => string.IsNullOrEmpty(sha) ? "(none)" : sha.Length <= 7 ? sha : sha[..7]; @@ -764,6 +834,11 @@ private static MonitorAction EmitAddressCommentAction(MonitorState state) var c = state.UnresolvedComments[state.CurrentCommentIndex]; state.EnterExecutingTask(); + // address_comment has a single documented completion path: event=comment_addressed. + // Set explicitly so the (ExecutingTask, "ready") recovery can auto-dispatch on push + // detection rather than falling back to ask_user (apply_recommendation deliberately + // leaves this null because it has two completion paths). + state.ExecutingTaskExpectedCompletion = "comment_addressed"; return new MonitorAction { Action = "execute", diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index e722091..bb02e30 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -1569,13 +1569,17 @@ private static async Task ExecuteAutoAction(MonitorState state, M // says "after git push, invoke pr-monitor"). The state machine kicks us here // to determine whether a push actually happened during the task. // - // Strategy: - // 1. Refresh HEAD from GitHub. - // 2. If HEAD advanced since HeadShaAtTaskStart, treat as the documented - // completion event for the active flow (comment_addressed / push_completed). - // 3. If HEAD did not advance, fall back to a flow-aware ask_user prompt - // so the user can mark the task done in-flow without losing CommentFlow - // state (vs. the legacy destructive recovery that wiped flow state). + // This handler does the I/O (refresh HEAD from GitHub) then delegates the + // pure decision logic to MonitorTransitions.BuildRecoverFromReadyResolution, + // which is unit-testable. Behavior summary (see helper for full doc): + // 1. HEAD advanced + comment flow + ExpectedCompletion=="comment_addressed" + // → auto-dispatch comment_addressed. + // 2. HEAD advanced + comment flow + ambiguous task (e.g., apply_recommendation + // where the agent might have pushed a proving-test for comment_replied) + // → fall back to ask_user instead of guessing. + // 3. HEAD advanced + CI flow → dispatch push_completed. + // 4. HEAD advanced + no flow → resume polling. + // 5. HEAD did not advance → flow-aware "no new commit" recovery prompt. var snapshotSha = state.HeadShaAtTaskStart; string? latestSha = state.HeadSha; try @@ -1599,40 +1603,7 @@ private static async Task ExecuteAutoAction(MonitorState state, M && !string.IsNullOrWhiteSpace(latestSha) && !string.Equals(snapshotSha, latestSha, StringComparison.Ordinal); - if (headAdvanced && state.CommentFlow != CommentFlowState.None) - { - DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + comment flow → dispatching comment_addressed"); - return MonitorTransitions.ProcessEvent(state, "comment_addressed", null, null); - } - - if (headAdvanced && state.CiFailureFlow != CiFailureFlowState.None) - { - DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + CI flow → dispatching push_completed"); - return MonitorTransitions.ProcessEvent(state, "push_completed", null, null); - } - - if (headAdvanced) - { - DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + no active flow → resuming polling"); - state.CurrentState = MonitorStateId.AwaitingUser; - return MonitorTransitions.ProcessEvent(state, "user_chose", "resume", null); - } - - // No push detected — call the recovery prompt builder directly with a - // user-friendly reason. Previously we dispatched ProcessEvent with a - // synthetic event name "ready_unresolved" which leaked into the prompt - // text as "Unexpected event 'ready_unresolved' ..." (reviewer feedback - // on PR #51). Direct call preserves CommentFlow / CiFailureFlow state - // and produces a meaningful question. - DebugLogger.Log("AutoExec", "recover_from_ready: HEAD unchanged — building flow-aware no-push recovery prompt"); - var priorCommentFlow = state.CommentFlow; - var priorCiFailureFlow = state.CiFailureFlow; - state.CurrentState = MonitorStateId.AwaitingUser; - return MonitorTransitions.BuildExecutingTaskRecoveryPrompt( - state, - reasonText: "Looks like the task finished without a new commit", - priorCommentFlow, - priorCiFailureFlow); + return MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced); } case "resolve_thread": { diff --git a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs index fefc484..da03bbe 100644 --- a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs @@ -381,4 +381,125 @@ public void ProcessEvent_TreatAsRepliedExternally_LastComment_TransitionsToPolli Assert.NotEqual("compose_reply", action.Task); Assert.NotEqual("post_reply", action.Task); } + + // ──────────────────────────────────────────────────────────────────── + // Push-detected recovery must not assume comment_addressed for tasks + // whose completion event is ambiguous (apply_recommendation can push + // either an implementation OR a proving test → comment_replied). + // Reviewer comment #5 on PR #51. + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void RecoverFromReadyResolution_PushDetected_AmbiguousTask_AsksUserInsteadOfDispatchingCommentAddressed() + { + // apply_recommendation (or any task that didn't set an explicit expected + // completion event) is ambiguous: the agent might have pushed an implementation + // (comment_addressed → resolve thread) OR a proving test (comment_replied → + // keep thread open for reviewer). The recovery must NOT guess; it should + // surface the ask_user prompt with both choices. + var state = CreateState(); + var c = MakeComment(); + state.UnresolvedComments.Add(c); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.EnterExecutingTask(); + state.ExecutingTaskExpectedCompletion = null; // ambiguous + + var action = MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced: true); + + // Should NOT have routed through ProcessCommentAddressed (which would resolve + // the thread or emit a compose/resolve action). + Assert.False(c.IsAddressed); + Assert.False(state.PendingResolveAfterAddress); + Assert.NotEqual("compose_reply", action.Task); + Assert.NotEqual("resolve_thread", action.Task); + // SHOULD be an ask_user prompt with both completion choices. + Assert.Equal("ask_user", action.Action); + Assert.NotNull(action.Choices); + Assert.Contains("Treat as comment addressed", action.Choices!); + Assert.Contains("Treat as comment replied", action.Choices!); + } + + [Fact] + public void RecoverFromReadyResolution_PushDetected_KnownAddressedTask_DispatchesCommentAddressed() + { + // address_comment (set by EmitAddressCommentAction) has only one completion path: + // comment_addressed. The recovery should auto-dispatch in this unambiguous case + // (this preserves the original auto-recovery convenience for the common path). + var state = CreateState(); + var c = MakeComment(); + state.UnresolvedComments.Add(c); + state.CommentFlow = CommentFlowState.AddressAllIterating; + state.EnterExecutingTask(); + state.ExecutingTaskExpectedCompletion = "comment_addressed"; + + var action = MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced: true); + + // Should have routed through ProcessCommentAddressed — either a compose_reply + // request (if no pending reply text) or a resolve_thread action. Either way, it + // is NOT the recovery ask_user prompt. + Assert.NotEqual("ask_user", action.Action); + // The processing should have engaged the comment_addressed pipeline (one of + // these flags or actions will be set). + var engagedAddressedPipeline = + action.Task == "compose_reply" || + action.Task == "resolve_thread" || + state.PendingResolveAfterAddress || + c.IsAddressed; + Assert.True(engagedAddressedPipeline, + $"Expected ProcessCommentAddressed to engage; got Action={action.Action}, Task={action.Task}"); + } + + [Fact] + public void RecoverFromReadyResolution_NoPush_StillBuildsRecoveryPrompt() + { + // Regression: the no-push branch must still produce the user-friendly recovery prompt + // ("without a new commit") regardless of ExecutingTaskExpectedCompletion. + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment()); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.EnterExecutingTask(); + state.ExecutingTaskExpectedCompletion = "comment_addressed"; + + var action = MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced: false); + + Assert.Equal("ask_user", action.Action); + Assert.NotNull(action.Question); + Assert.Contains("without a new commit", action.Question); + } + + [Fact] + public void EmitAddressCommentAction_SetsExpectedCompletionToCommentAddressed() + { + // address_comment task has only one documented completion path: event=comment_addressed. + // The state field is what BuildRecoverFromReadyResolution reads to decide whether the + // push-detected auto-dispatch is safe. + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment()); + state.CommentFlow = CommentFlowState.AddressAllIterating; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + // Trigger BeginAddressCurrentComment → EmitAddressCommentAction via the public surface. + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "address", null); + + Assert.Equal("address_comment", action.Task); + Assert.Equal("comment_addressed", state.ExecutingTaskExpectedCompletion); + } + + [Fact] + public void BeginApplyRecommendation_LeavesExpectedCompletionAmbiguous() + { + // apply_recommendation has TWO documented completion paths: comment_addressed (implement) + // and comment_replied (proving-test pushback). Must NOT pre-commit to one. + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment()); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "apply_fix", null); + + Assert.Equal("apply_recommendation", action.Task); + Assert.Null(state.ExecutingTaskExpectedCompletion); + } } From 5a8a976b0fd681c69d1d6ccbd1a3767f064682e2 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 15:26:44 -0700 Subject: [PATCH 07/20] Preserve waiting-comment context in ExecutingTask recovery prompt BuildWaitingCommentAction puts the monitor in a state where ActiveWaitingComment is set but both CommentFlow and CiFailureFlow are None (the comment flow ended when the reply was posted; the thread is now waiting for the reviewer's response, dispatched via ProcessWaitingCommentChoice on ActiveWaitingComment != null). If an interpret_freeform / execute task ran in that state and a stray ready or unknown event arrived, BuildExecutingTaskRecoveryPrompt fell into the no-flow generic branch which cleared ActiveWaitingComment and offered only Resume/Stop. The user lost the waiting-thread context and could no longer recover with the original Resolve / Go-back choices. Fix: add a waiting-comment branch (between CI flow and the truly-no- context generic branch) that preserves ActiveWaitingComment and offers [Resolve this thread, Go back to monitoring, Stop monitoring]. The existing ProcessUserChoice -> ProcessWaitingCommentChoice dispatch then routes the user's selection correctly. BuildRecoverFromReadyResolution gets a parallel branch: when HEAD advanced + no flow + waiting comment, surface the same prompt instead of auto-resuming (which would also clear the waiting context). Adds 4 tests covering the no-push and push-detected paths, the end-to- end Resolve dispatch, and the truly-no-context regression. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 39 ++++++- .../ExecutingTaskReadyRecoveryTests.cs | 106 ++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 0eaa060..d5815f1 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -431,7 +431,30 @@ internal static MonitorAction BuildExecutingTaskRecoveryPrompt( }; } - // No active flow — generic recovery + // Waiting-for-reply state: ActiveWaitingComment is set without an active flow + // (the comment flow ended when the reply was posted; the comment is now waiting + // for the reviewer's response). BuildWaitingCommentAction puts us here. The + // recovery must preserve ActiveWaitingComment and offer the original Resolve / + // Go-back choices, otherwise a stray ready / unknown event clears the waiting + // context and the user can no longer recover the original choices. + if (state.ActiveWaitingComment != null) + { + return new MonitorAction + { + Action = "ask_user", + Question = $"{reasonText} while a thread is waiting for the reviewer's reply. " + + "Resolve the thread now, or go back to monitoring.", + Choices = + [ + "Resolve this thread", + "Go back to monitoring", + "Stop monitoring" + ], + Context = state.ActiveWaitingComment + }; + } + + // No active flow and no waiting comment — generic recovery state.ActiveWaitingComment = null; return new MonitorAction { @@ -510,6 +533,20 @@ internal static MonitorAction BuildRecoverFromReadyResolution(MonitorState state return ProcessEvent(state, "push_completed", null, null); } + // Waiting-comment case (no flow but ActiveWaitingComment set) — must NOT auto-resume, + // because that would clear the waiting context and the user loses the original + // Resolve / Go-back choices. Surface the waiting-comment recovery prompt instead. + if (headAdvanced && state.ActiveWaitingComment != null) + { + DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + waiting comment → surfacing resolve/go-back prompt"); + state.CurrentState = MonitorStateId.AwaitingUser; + return BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "The task pushed a commit while a thread was waiting for the reviewer's reply", + CommentFlowState.None, + CiFailureFlowState.None); + } + if (headAdvanced) { DebugLogger.Log("AutoExec", "recover_from_ready: HEAD advanced + no active flow → resuming polling"); diff --git a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs index da03bbe..6945c87 100644 --- a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs @@ -502,4 +502,110 @@ public void BeginApplyRecommendation_LeavesExpectedCompletionAmbiguous() Assert.Equal("apply_recommendation", action.Task); Assert.Null(state.ExecutingTaskExpectedCompletion); } + + // ──────────────────────────────────────────────────────────────────── + // Waiting-comment recovery: when an interpret_freeform / execute task + // runs while ActiveWaitingComment is set (CommentFlow==None and + // CiFailureFlow==None — the normal post-reply waiting state), the + // recovery must preserve the waiting context and offer the original + // Resolve / Go-back choices instead of clearing ActiveWaitingComment + // and offering only generic Resume/Stop. Reviewer comment #6 on PR #51. + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void BuildExecutingTaskRecoveryPrompt_WaitingComment_PreservesContextAndOffersWaitingChoices() + { + var state = CreateState(); + var c = MakeComment(); + state.ActiveWaitingComment = c; + // CommentFlow == None, CiFailureFlow == None — the normal waiting-for-reply state + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow: CommentFlowState.None, + priorCiFailureFlow: CiFailureFlowState.None); + + Assert.Equal("ask_user", action.Action); + Assert.NotNull(action.Choices); + // Original waiting-comment choices must be offered: + Assert.Contains("Resolve this thread", action.Choices!); + Assert.Contains("Go back to monitoring", action.Choices!); + // ActiveWaitingComment must NOT be wiped (otherwise ProcessWaitingCommentChoice + // can't dispatch the user's selection). + Assert.Equal(c, state.ActiveWaitingComment); + } + + [Fact] + public void BuildExecutingTaskRecoveryPrompt_WaitingComment_ResolveChoice_DispatchesToProcessWaitingCommentChoice() + { + // End-to-end: after the recovery prompt offers "Resolve this thread", the user's + // selection must round-trip through ChoiceValueMap → ProcessUserChoice → + // ProcessWaitingCommentChoice → BuildResolveThreadAction. If ActiveWaitingComment + // is wiped or CommentFlow is set, the dispatcher routes wrong. + var state = CreateState(); + var c = MakeComment(); + state.ActiveWaitingComment = c; + state.EnterExecutingTask(); + + var prompt = MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow: CommentFlowState.None, + priorCiFailureFlow: CiFailureFlowState.None); + + Assert.Equal("ask_user", prompt.Action); + // The helper only builds the action; the caller (RecoverFromUnexpectedState or + // BuildRecoverFromReadyResolution) sets AwaitingUser. Simulate that here so the + // follow-up user_chose dispatches correctly. + state.CurrentState = MonitorStateId.AwaitingUser; + + // Simulate the user picking "Resolve this thread" + var choiceValue = MonitorTransitions.ChoiceValueMap["Resolve this thread"]; + var followUp = MonitorTransitions.ProcessEvent(state, "user_chose", choiceValue, null); + + // Should have dispatched to BuildResolveThreadAction (auto_execute resolve_thread). + Assert.Equal("auto_execute", followUp.Action); + Assert.Equal("resolve_thread", followUp.Task); + } + + [Fact] + public void BuildExecutingTaskRecoveryPrompt_NoFlowAndNoWaitingComment_FallsBackToGenericChoices() + { + // Regression: the truly-no-context branch (no flow, no waiting comment) still + // produces only Resume/Stop and clears ActiveWaitingComment (already null here). + var state = CreateState(); + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildExecutingTaskRecoveryPrompt( + state, + reasonText: "Looks like the task finished without a new commit", + priorCommentFlow: CommentFlowState.None, + priorCiFailureFlow: CiFailureFlowState.None); + + Assert.NotNull(action.Choices); + Assert.DoesNotContain("Resolve this thread", action.Choices!); + Assert.Contains("Resume monitoring", action.Choices!); + } + + [Fact] + public void RecoverFromReadyResolution_PushDetected_WaitingComment_PreservesContextAndAsksUser() + { + // If HEAD advanced while we were in a waiting-comment state (the agent pushed + // something while elicit-freeform was running), we shouldn't auto-dispatch any + // completion — we have no flow, just a waiting thread. Surface the waiting-comment + // recovery prompt instead, so the user can resolve or go back. + var state = CreateState(); + var c = MakeComment(); + state.ActiveWaitingComment = c; + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced: true); + + Assert.Equal("ask_user", action.Action); + Assert.NotNull(action.Choices); + Assert.Contains("Resolve this thread", action.Choices!); + Assert.Equal(c, state.ActiveWaitingComment); + } } From 97d597e34d84f5a28bf7e17f41d58b65f3297ef7 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 15:33:38 -0700 Subject: [PATCH 08/20] Handle 'skip' in all comment-flow sub-states in recovery prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ExecutingTask recovery prompt offers 'Skip this comment' for every comment-flow sub-state, but ProcessCommentChoice only handled 'skip' for AddressAllIterating and ExplainAllIterating. The remaining sub- states (SingleCommentPrompt, PickComment, PickRemaining) fell through to the default '_ => TransitionToPolling' branch, which abandoned the entire comment flow instead of skipping just the active thread. Fix: add explicit (X, 'skip') => SkipAndAdvanceComment cases for the three previously-broken sub-states. AddressAllIterating keeps its existing SkipAndAdvanceComment route. ExplainAllIterating intentionally keeps AdvanceExplainAll (auto-emits the next explain_comment task) — different semantics for the explain loop, preserved. Adds 5 tests: 3 for the broken sub-states (skip now advances index and stays in flow), 2 regression tests for AddressAll and ExplainAll behaviors. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 11 +++ .../ExecutingTaskReadyRecoveryTests.cs | 96 +++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index d5815f1..4b689b8 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -696,6 +696,17 @@ private static MonitorAction ProcessCommentChoice(MonitorState state, string? ch (CommentFlowState.ExplainAllIterating, "skip") => AdvanceExplainAll(state), (CommentFlowState.ExplainAllIterating, "done") => TransitionToPolling(state), + // "Skip this comment" from the ExecutingTask recovery prompt is offered for + // every comment-flow sub-state. AddressAllIterating / ExplainAllIterating + // already handle skip explicitly above with their own semantics. The remaining + // sub-states (SingleCommentPrompt, PickComment, PickRemaining) previously fell + // through to TransitionToPolling, which abandoned the rest of the comment flow. + // Route them through SkipAndAdvanceComment so the user can drop the active + // thread and continue with whatever comments remain. + (CommentFlowState.SingleCommentPrompt, "skip") => SkipAndAdvanceComment(state), + (CommentFlowState.PickComment, "skip") => SkipAndAdvanceComment(state), + (CommentFlowState.PickRemaining, "skip") => SkipAndAdvanceComment(state), + (CommentFlowState.PickComment, _) => HandlePickedComment(state, choice), (CommentFlowState.PickRemaining, "continue") => ContinueToNextComment(state), (CommentFlowState.PickRemaining, "done") => TransitionToPolling(state), diff --git a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs index 6945c87..41b5acf 100644 --- a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs @@ -608,4 +608,100 @@ public void RecoverFromReadyResolution_PushDetected_WaitingComment_PreservesCont Assert.Contains("Resolve this thread", action.Choices!); Assert.Equal(c, state.ActiveWaitingComment); } + + // ──────────────────────────────────────────────────────────────────── + // Recovery prompt's "Skip this comment" must advance to the next comment + // in every comment-flow sub-state, not just AddressAllIterating. Reviewer + // comment #7 on PR #51: SingleCommentPrompt / PickComment / PickRemaining + // previously fell through to TransitionToPolling, abandoning the rest of + // the flow. + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void ProcessEvent_SkipChoice_FromSingleCommentPrompt_AdvancesInsteadOfAbandoningFlow() + { + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment(id: "c1")); + state.UnresolvedComments.Add(MakeComment(id: "c2")); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var skipValue = MonitorTransitions.ChoiceValueMap["Skip this comment"]; + var action = MonitorTransitions.ProcessEvent(state, "user_chose", skipValue, null); + + // Should have advanced to the next comment, NOT abandoned the whole flow. + Assert.True(state.CurrentCommentIndex >= 1); + Assert.NotEqual(MonitorStateId.Polling, state.CurrentState); + } + + [Fact] + public void ProcessEvent_SkipChoice_FromPickComment_AdvancesInsteadOfAbandoningFlow() + { + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment(id: "c1")); + state.UnresolvedComments.Add(MakeComment(id: "c2")); + state.CommentFlow = CommentFlowState.PickComment; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var skipValue = MonitorTransitions.ChoiceValueMap["Skip this comment"]; + var action = MonitorTransitions.ProcessEvent(state, "user_chose", skipValue, null); + + Assert.True(state.CurrentCommentIndex >= 1); + Assert.NotEqual(MonitorStateId.Polling, state.CurrentState); + } + + [Fact] + public void ProcessEvent_SkipChoice_FromPickRemaining_AdvancesInsteadOfAbandoningFlow() + { + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment(id: "c1")); + state.UnresolvedComments.Add(MakeComment(id: "c2")); + state.CommentFlow = CommentFlowState.PickRemaining; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var skipValue = MonitorTransitions.ChoiceValueMap["Skip this comment"]; + var action = MonitorTransitions.ProcessEvent(state, "user_chose", skipValue, null); + + Assert.True(state.CurrentCommentIndex >= 1); + Assert.NotEqual(MonitorStateId.Polling, state.CurrentState); + } + + [Fact] + public void ProcessEvent_SkipChoice_FromAddressAllIterating_StillRoutesToSkipAndAdvance() + { + // Regression: AddressAllIterating's existing skip handling must remain unchanged. + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment(id: "c1")); + state.UnresolvedComments.Add(MakeComment(id: "c2")); + state.CommentFlow = CommentFlowState.AddressAllIterating; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "skip", null); + + Assert.True(state.CurrentCommentIndex >= 1); + Assert.NotEqual(MonitorStateId.Polling, state.CurrentState); + } + + [Fact] + public void ProcessEvent_SkipChoice_FromExplainAllIterating_StillRoutesToAdvanceExplainAll() + { + // Regression: ExplainAllIterating intentionally uses AdvanceExplainAll (auto-emits + // the next explain_comment task) rather than re-prompting the user. Don't break that. + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment(id: "c1")); + state.UnresolvedComments.Add(MakeComment(id: "c2")); + state.CommentFlow = CommentFlowState.ExplainAllIterating; + state.CurrentCommentIndex = 0; + state.CurrentState = MonitorStateId.AwaitingUser; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "skip", null); + + // AdvanceExplainAll auto-emits an execute task for the next comment (NOT ask_user). + Assert.Equal("execute", action.Action); + Assert.Equal("explain_comment", action.Task); + } } From 42aad23436ea3442c4dbc6f878758a8394e84bb0 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 17:02:07 -0700 Subject: [PATCH 09/20] Fix InvalidOperationException leak in ClassifyFreeformAsync (PR #51 review) SampleStructuredAsync throws InvalidOperationException when the MCP client doesn't support sampling. ClassifyFreeformAsync's catch filter excluded that exception, so it bubbled out instead of returning null. Callers ended up on the generic internal-error path instead of the documented sampling_unavailable freeform fallback. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PrCopilot/src/PrCopilot/Tools/SamplingHelper.cs | 6 +++++- .../PrCopilot.Tests/SamplingHelperTests.cs | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/PrCopilot/src/PrCopilot/Tools/SamplingHelper.cs b/PrCopilot/src/PrCopilot/Tools/SamplingHelper.cs index 6b24ec0..c49b237 100644 --- a/PrCopilot/src/PrCopilot/Tools/SamplingHelper.cs +++ b/PrCopilot/src/PrCopilot/Tools/SamplingHelper.cs @@ -274,8 +274,12 @@ internal class FreeformClassification return result; } - catch (Exception ex) when (ex is not OperationCanceledException and not InvalidOperationException) + catch (Exception ex) when (ex is not OperationCanceledException) { + // Includes InvalidOperationException from SampleStructuredAsync when the client + // doesn't support MCP sampling. Per ClassifyFreeformAsync's contract ("Returns null + // if sampling fails"), the caller falls back to BuildFreeformInterpretAction with + // a `sampling_unavailable` payload reason instead of crashing. DebugLogger.Log("Sampling", $"Freeform classification failed: {ex.Message}"); return null; } diff --git a/PrCopilot/tests/PrCopilot.Tests/SamplingHelperTests.cs b/PrCopilot/tests/PrCopilot.Tests/SamplingHelperTests.cs index 604125b..5261bb4 100644 --- a/PrCopilot/tests/PrCopilot.Tests/SamplingHelperTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/SamplingHelperTests.cs @@ -162,6 +162,23 @@ public async Task ClassifyFreeformAsync_ReturnsNull_OnSamplingFailure() Assert.Null(result); } + [Fact] + public async Task ClassifyFreeformAsync_ReturnsNull_WhenSamplingUnavailable() + { + // Client doesn't support MCP sampling — SampleStructuredAsync throws InvalidOperationException. + // ClassifyFreeformAsync's contract is "Returns null if sampling fails" — it must not let + // the exception bubble up to callers (which would crash the freeform fallback path). + var server = new FakeMcpServer(); + var result = await SamplingHelper.ClassifyFreeformAsync( + server, + "fix it", + "What would you like to do?", + ["Address all comments", "I'll handle them myself"], + CancellationToken.None); + + Assert.Null(result); + } + [Fact] public async Task ClassifyFreeformAsync_ReturnsNull_WhenNoChoices() { From 51fd161a0153473ed7440376db6d8ab2e4c78f90 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 18:09:55 -0700 Subject: [PATCH 10/20] Route freeform Path B in waiting-comment context (PR #51 review) When ActiveWaitingComment was set but CommentFlow was None, freeform replies that didn't map to 'Resolve' or 'Go back' fell into the generic task_complete branch. ProcessTaskComplete then cleared ActiveWaitingComment and transitioned to polling, abandoning the thread. Add a waiting-comment branch to BuildFreeformPathBInstructions that mirrors the comment-flow branch (uses comment_addressed for code changes, comment_replied for reply-only). Update ProcessCommentAddressed and ProcessCommentReplied to dispatch via ActiveWaitingComment when no comment flow is active. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 45 +++++ .../src/PrCopilot/Tools/MonitorFlowTools.cs | 30 ++++ .../WaitingCommentFreeformTests.cs | 169 ++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 PrCopilot/tests/PrCopilot.Tests/WaitingCommentFreeformTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 4b689b8..99b9500 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -898,6 +898,23 @@ private static MonitorAction EmitAddressCommentAction(MonitorState state) private static MonitorAction ProcessCommentAddressed(MonitorState state, object? data) { + // Waiting-comment context: ActiveWaitingComment is set but no comment flow is + // active (the original flow ended; the thread is awaiting reviewer reply). The + // index-based lookup below would either miss or operate on a different comment, + // so route the resolve through ActiveWaitingComment directly. + // See PR #51 reviewer comment on MonitorFlowTools.cs:183. + if (state.CommentFlow == CommentFlowState.None && state.ActiveWaitingComment != null) + { + var waiting = state.ActiveWaitingComment; + if (string.IsNullOrWhiteSpace(state.PendingReplyText)) + return EmitComposeReplyAction(state, waiting, "comment_addressed"); + + waiting.IsAddressed = true; + state.PendingResolveAfterAddress = true; + state.PendingResolveSummary = "Comment addressed"; + return BuildResolveThreadAction(state, waiting); + } + var addressedComment = state.UnresolvedComments.Count > state.CurrentCommentIndex ? state.UnresolvedComments[state.CurrentCommentIndex] : null; @@ -921,6 +938,34 @@ private static MonitorAction ProcessCommentAddressed(MonitorState state, object? private static MonitorAction ProcessCommentReplied(MonitorState state) { + // Waiting-comment context: see ProcessCommentAddressed. Route through the + // ActiveWaitingComment so freeform replies on a waiting thread post on the + // correct thread instead of falling through to AdvanceAfterComment. + if (state.CommentFlow == CommentFlowState.None && state.ActiveWaitingComment != null) + { + var waiting = state.ActiveWaitingComment; + if (string.IsNullOrWhiteSpace(state.PendingReplyText)) + return EmitComposeReplyAction(state, waiting, "comment_replied"); + + if (PrStatusFetcher.IsBotReviewer(waiting.Author)) + { + waiting.IsAddressed = true; + state.PendingResolveAfterAddress = true; + state.PendingResolveSummary = "Replied to comment"; + return BuildResolveThreadAction(state, waiting); + } + + // Human reviewer — post the reply on the existing waiting thread without + // resolving. The thread stays in WaitingForReplyComments. + if (!state.WaitingForReplyComments.Any(c => c.Id == waiting.Id)) + { + waiting.IsWaitingForReply = true; + state.WaitingForReplyComments.Add(waiting); + } + state.PendingAdvanceAfterReply = "Replied to comment"; + return BuildPostReplyAction(state, waiting); + } + // Agent replied to a comment (pushback/clarification) without code changes. // Auto-resolve if the reviewer is a bot (they won't reply back). // Track as waiting-for-reply if the reviewer is human. diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index bb02e30..1e8316f 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -369,6 +369,36 @@ private static string BuildFreeformPathBInstructions(MonitorState state) "If the instruction is some other non-reply task (analysis, questions, etc.), execute it and call pr_monitor_next_step with event='task_complete'."; } + // Waiting-for-reply context — ActiveWaitingComment is set but CommentFlow has ended. + // Mirror the comment-flow branch so freeform replies that produce code changes or + // additional reply text still route through comment_addressed/comment_replied (which + // act on ActiveWaitingComment in this state) instead of falling into the generic + // task_complete branch — the latter clears ActiveWaitingComment in ProcessTaskComplete + // and drops the thread context entirely. See PR #51 reviewer comment on + // MonitorFlowTools.cs:183. + if (state.ActiveWaitingComment != null) + { + var c = state.ActiveWaitingComment; + var commentContext = $" Active waiting comment from {c.Author} on {c.FilePath}:{c.Line}: \"{c.Body}\". URL: {c.Url}."; + + return "**Path B — custom instruction:** If the text is a custom instruction that doesn't map to a single choice " + + "(or has extra instructions beyond the choice), execute the user's request directly. " + + "If the instruction involves code changes: STOP and present your changes to the user for review before committing — " + + "honor the user's custom instructions for git workflow. Only commit/push after the user approves. " + + "After pushing, draft the reply text describing what was changed and link the commit " + + $"(use `git rev-parse HEAD` to get the SHA, then format as {state.Owner}/{state.Repo}@SHA). " + + "Do NOT post the reply yourself — pass it via data='{\"reply_text\": \"your reply\"}' in pr_monitor_next_step. " + + "The server will post it to the correct review thread. " + + "Then call pr_monitor_next_step with event='comment_addressed' and data containing reply_text." + + commentContext + + MonitorTransitions.CopilotFooter(state) + + " If the instruction does NOT involve code changes: " + + "If the instruction is to reply to the waiting comment without changing code (e.g., clarification or pushback), " + + "draft the reply text and pass it via data='{\"reply_text\": \"your reply\"}' in pr_monitor_next_step — " + + "the server will post it. Then call pr_monitor_next_step with event='comment_replied' and data containing reply_text. " + + "If the instruction is some other non-reply task (analysis, questions, etc.), execute it and call pr_monitor_next_step with event='task_complete'."; + } + // No active flow — generic fallback return "**Path B — custom instruction:** If the text is a custom instruction that doesn't map to a single choice " + "(or has extra instructions beyond the choice), execute the user's request directly. " + diff --git a/PrCopilot/tests/PrCopilot.Tests/WaitingCommentFreeformTests.cs b/PrCopilot/tests/PrCopilot.Tests/WaitingCommentFreeformTests.cs new file mode 100644 index 0000000..18265cc --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/WaitingCommentFreeformTests.cs @@ -0,0 +1,169 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; +using PrCopilot.Tools; + +namespace PrCopilot.Tests; + +/// +/// Tests for freeform Path B handling when the user is on a waiting-for-reply +/// comment thread (state.ActiveWaitingComment != null, CommentFlow == None, +/// CiFailureFlow == None). A freeform reply that doesn't map to "Resolve" or +/// "Go back" must still route through the comment-reply/comment-addressed flow +/// rather than collapsing into the generic task_complete branch — otherwise +/// ProcessTaskComplete clears ActiveWaitingComment and abandons the thread. +/// See PR #51 reviewer comment on MonitorFlowTools.cs:183. +/// +public class WaitingCommentFreeformTests +{ + private static MonitorState CreateState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrNumber = 42, + HeadSha = "abc123", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath() + }; + + private static CommentInfo MakeComment(string id = "wait1", string author = "human-reviewer") => new() + { + Id = id, + Author = author, + FilePath = "src/File.cs", + Line = 10, + Body = "Are you sure this handles null?", + Url = "https://github.com/test/pr/42#comment" + }; + + private static ElicitChoiceResult MakeFreeformResult(string text) => new() + { + Value = text, + IsFreeform = true, + OriginalQuestion = "What do you want to do?", + OriginalChoices = ["Resolve this thread", "Go back to monitoring"] + }; + + // ──────────────────────────────────────────────────────────────────── + // Path B instructions in waiting-comment context + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void PathBInstructions_WaitingComment_MentionsCommentAddressedAndReplied() + { + // Regression: previously fell into the generic branch which only mentions + // task_complete. The agent then fired task_complete which made + // ProcessTaskComplete clear ActiveWaitingComment and resume polling, + // dropping the thread context. + var state = CreateState(); + state.ActiveWaitingComment = MakeComment(); + var result = MakeFreeformResult("draft another reply explaining the null guard"); + + var action = MonitorFlowTools.BuildFreeformInterpretAction(result, state); + + Assert.Contains("comment_addressed", action.Instructions); + Assert.Contains("comment_replied", action.Instructions); + } + + [Fact] + public void PathBInstructions_WaitingComment_IncludesActiveCommentContext() + { + var state = CreateState(); + state.ActiveWaitingComment = MakeComment(id: "thread-99"); + var result = MakeFreeformResult("resolve it now"); + + var action = MonitorFlowTools.BuildFreeformInterpretAction(result, state); + + // Must surface the active waiting thread so the agent acts on the right comment. + Assert.Contains("Are you sure this handles null?", action.Instructions); + } + + [Fact] + public void PathBInstructions_NoFlowAndNoWaitingComment_StillUsesGenericBranch() + { + // Regression: don't accidentally apply waiting-comment branch when there's + // no waiting comment. + var state = CreateState(); + var result = MakeFreeformResult("just print hello"); + + var action = MonitorFlowTools.BuildFreeformInterpretAction(result, state); + + Assert.Contains("task_complete", action.Instructions); + Assert.DoesNotContain("comment_addressed", action.Instructions); + Assert.DoesNotContain("comment_replied", action.Instructions); + } + + // ──────────────────────────────────────────────────────────────────── + // ProcessCommentAddressed / ProcessCommentReplied dispatch in waiting-comment context + // ──────────────────────────────────────────────────────────────────── + + [Fact] + public void CommentAddressed_WaitingCommentNoFlow_ResolvesActiveWaitingThread() + { + // Agent finished the freeform task with code changes and fires comment_addressed. + // Without a comment flow there's no UnresolvedComments[index] to operate on — + // the dispatcher must use ActiveWaitingComment. + var state = CreateState(); + var comment = MakeComment(id: "thread-7"); + state.ActiveWaitingComment = comment; + state.CurrentState = MonitorStateId.ExecutingTask; + state.PendingReplyText = "Fixed the null guard in commit abc."; + + var action = MonitorTransitions.ProcessEvent(state, "comment_addressed", null, null); + + // Should resolve the waiting thread, not fall through to AdvanceAfterCommentAddressed + // (which would just transition to polling and lose the resolve). + Assert.Equal("auto_execute", action.Action); + Assert.Equal("resolve_thread", action.Task); + Assert.Same(comment, action.Context); + Assert.True(state.PendingResolveAfterAddress); + } + + [Fact] + public void CommentReplied_WaitingCommentNoFlow_HumanReviewer_PostsReplyOnActiveWaitingThread() + { + var state = CreateState(); + var comment = MakeComment(id: "thread-8", author: "human-reviewer"); + state.ActiveWaitingComment = comment; + state.CurrentState = MonitorStateId.ExecutingTask; + state.PendingReplyText = "Yes, the guard handles null — see the test added in commit xyz."; + + var action = MonitorTransitions.ProcessEvent(state, "comment_replied", null, null); + + // Human reviewer + reply-only → post the reply on the existing waiting thread. + Assert.Equal("auto_execute", action.Action); + Assert.Equal("post_thread_reply", action.Task); + Assert.Same(comment, action.Context); + } + + [Fact] + public void CommentReplied_WaitingCommentNoFlow_BotReviewer_AutoResolves() + { + var state = CreateState(); + var comment = MakeComment(id: "thread-9", author: "copilot-pull-request-reviewer[bot]"); + state.ActiveWaitingComment = comment; + state.CurrentState = MonitorStateId.ExecutingTask; + state.PendingReplyText = "Acknowledged — see analysis above."; + + var action = MonitorTransitions.ProcessEvent(state, "comment_replied", null, null); + + // Bot reviewer won't follow up → resolve the thread immediately. + Assert.Equal("auto_execute", action.Action); + Assert.Equal("resolve_thread", action.Task); + Assert.Same(comment, action.Context); + } + + [Fact] + public void CommentAddressed_NoFlowNoWaitingComment_StillSafelyAdvances() + { + // Regression: when there's neither a flow nor a waiting comment, the dispatcher + // must not crash — it should fall through to the existing AdvanceAfterCommentAddressed + // path (which transitions back to polling). + var state = CreateState(); + state.CurrentState = MonitorStateId.ExecutingTask; + + // Should not throw. + var action = MonitorTransitions.ProcessEvent(state, "comment_addressed", null, null); + Assert.NotNull(action); + } +} From decce7b2f6a6e329f9dd37d4534cd0753a15e73d Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 18:15:04 -0700 Subject: [PATCH 11/20] Extend (ready) recovery to ApplyingFix state (PR #51 review) The CI apply_fix task runs in ApplyingFix and tells the agent to push before reporting push_completed. A post-push hook firing event=ready from ApplyingFix fell into RecoverFromUnexpectedState, which wiped CiFailureFlow and lost the CI flow context. - Add EnterApplyingFix that snapshots HEAD (shared SnapshotForRecovery helper with EnterExecutingTask). - Have BeginApplyFix go through EnterApplyingFix. - Add (ApplyingFix, 'ready') => BuildRecoverFromReadyAction so the existing headAdvanced + CiFailureFlow != None branch dispatches push_completed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../PrCopilot/StateMachine/MonitorState.cs | 18 +++ .../StateMachine/MonitorTransitions.cs | 9 +- .../ApplyingFixReadyRecoveryTests.cs | 104 ++++++++++++++++++ 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 PrCopilot/tests/PrCopilot.Tests/ApplyingFixReadyRecoveryTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs index 2e10e16..1fd34a5 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorState.cs @@ -167,6 +167,24 @@ public void ClearPendingCommentState() public void EnterExecutingTask() { CurrentState = MonitorStateId.ExecutingTask; + SnapshotForRecovery(); + } + + /// + /// Transition to and snapshot HEAD for + /// post-push recovery. The CI fix flow's apply_fix task tells the agent to push + /// before reporting push_completed; if a post-push hook fires event=ready instead, + /// the (ApplyingFix, "ready") recovery uses this snapshot to detect that the push + /// happened and re-dispatch as push_completed (rather than wiping CiFailureFlow). + /// + public void EnterApplyingFix() + { + CurrentState = MonitorStateId.ApplyingFix; + SnapshotForRecovery(); + } + + private void SnapshotForRecovery() + { HeadShaAtTaskStart = HeadSha; // Reset expected completion to "ambiguous" on every entry. Emitters that know their // task's single completion event must set this AFTER calling EnterExecutingTask. diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 99b9500..943cec9 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -193,6 +193,13 @@ public static MonitorAction ProcessEvent(MonitorState state, string eventType, s // event, otherwise falls through to flow-aware recovery prompt. (MonitorStateId.ExecutingTask, "ready") => BuildRecoverFromReadyAction(state), + // Same recovery for the CI apply_fix task, which runs in ApplyingFix and tells + // the agent to push before reporting push_completed. A post-push hook firing + // event=ready must be recognized as a successful push (HEAD advanced + CI flow + // → dispatch push_completed) instead of falling through to RecoverFromUnexpectedState + // which wipes CiFailureFlow. + (MonitorStateId.ApplyingFix, "ready") => BuildRecoverFromReadyAction(state), + // Recovery: agent sent task_complete from AwaitingUser (skipped a tool call) (MonitorStateId.AwaitingUser, "task_complete") => RecoverFromUnexpectedTaskComplete(state), @@ -1335,7 +1342,7 @@ private static string FormatCiWithRecommendation(MonitorState state) private static MonitorAction BeginApplyFix(MonitorState state) { - state.CurrentState = MonitorStateId.ApplyingFix; + state.EnterApplyingFix(); return new MonitorAction { Action = "execute", diff --git a/PrCopilot/tests/PrCopilot.Tests/ApplyingFixReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ApplyingFixReadyRecoveryTests.cs new file mode 100644 index 0000000..f75e9ef --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/ApplyingFixReadyRecoveryTests.cs @@ -0,0 +1,104 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; + +namespace PrCopilot.Tests; + +/// +/// Tests for the (ApplyingFix, "ready") recovery path. The CI fix flow runs in +/// MonitorStateId.ApplyingFix and instructs the agent to push before reporting +/// push_completed. A post-push hook that fires event=ready instead must be +/// recognized as a successful push (HEAD advanced + CI flow → dispatch +/// push_completed) rather than falling into RecoverFromUnexpectedState which +/// would wipe CiFailureFlow and reset monitoring. +/// See PR #51 reviewer comment on MonitorTransitions.cs:194. +/// +public class ApplyingFixReadyRecoveryTests +{ + private static MonitorState CreateState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrNumber = 42, + HeadSha = "abc123", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath() + }; + + [Fact] + public void ApplyingFix_Ready_DispatchesRecoverFromReadyAutoExecute() + { + // The transition table must route (ApplyingFix, "ready") to the same + // recover_from_ready auto_execute as (ExecutingTask, "ready"). Without it, + // the event falls through to RecoverFromUnexpectedState which wipes + // CiFailureFlow. + var state = CreateState(); + state.CurrentState = MonitorStateId.ApplyingFix; + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + + var action = MonitorTransitions.ProcessEvent(state, "ready", null, null); + + Assert.Equal("auto_execute", action.Action); + Assert.Equal("recover_from_ready_in_executing_task", action.Task); + // Recovery must NOT have wiped CI flow context — the resolution step needs it. + Assert.Equal(CiFailureFlowState.InvestigationResults, state.CiFailureFlow); + } + + [Fact] + public void ApplyingFix_Ready_HeadAdvanced_DispatchesPushCompleted() + { + // End-to-end behavior: when ApplyingFix produces a push (HEAD advanced) and + // then re-enters with event=ready, the resolution step should treat it as a + // successful push_completed and resume polling — same as if the agent had + // called event=push_completed directly. + var state = CreateState(); + state.CurrentState = MonitorStateId.ApplyingFix; + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + state.HeadShaAtTaskStart = "abc123"; + state.HeadSha = "def456"; // simulate HEAD advanced + + var action = MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced: true); + + // push_completed from ApplyingFix transitions to polling. + Assert.Equal("polling", action.Action); + } + + [Fact] + public void BeginApplyFix_CapturesHeadShaSnapshot() + { + // The recovery path compares HeadSha against HeadShaAtTaskStart to decide + // whether a push happened. BeginApplyFix must capture the snapshot just like + // EnterExecutingTask does — otherwise headAdvanced is always wrong for + // the apply_fix flow and the recovery makes the wrong decision. + var state = CreateState(); + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + state.SuggestedFix = "do the thing"; + state.HeadSha = "before-apply"; + state.HeadShaAtTaskStart = null; + + // Trigger BeginApplyFix via the public dispatch. + state.CurrentState = MonitorStateId.AwaitingUser; + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "apply_fix", null); + + Assert.Equal(MonitorStateId.ApplyingFix, state.CurrentState); + Assert.Equal("before-apply", state.HeadShaAtTaskStart); + } + + [Fact] + public void ApplyingFix_Ready_HeadNotAdvanced_OffersFlowAwareRecoveryPrompt() + { + // No push detected (HEAD unchanged) — recovery should surface a flow-aware + // ask_user prompt rather than wiping CI flow context. + var state = CreateState(); + state.CurrentState = MonitorStateId.ApplyingFix; + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + state.HeadShaAtTaskStart = "abc123"; + state.HeadSha = "abc123"; + + var action = MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced: false); + + Assert.Equal("ask_user", action.Action); + // Must not have wiped CI flow context — the prompt is flow-aware. + Assert.Equal(CiFailureFlowState.InvestigationResults, state.CiFailureFlow); + } +} From ba7a0c615660e8efdaa3da19581aba00df65febb Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 18:21:17 -0700 Subject: [PATCH 12/20] Preserve recovery snapshot in EmitComposeReplyAction (PR #51 review) EmitComposeReplyAction was calling state.EnterExecutingTask(), which overwrites HeadShaAtTaskStart (with the still-stale state.HeadSha, since the server hasn't polled since the agent pushed) and resets ExecutingTaskExpectedCompletion to null. compose_reply never touches git, so the prior task's recovery snapshot must be preserved -- otherwise an event=ready arriving during compose_reply misattributes the original push to compose_reply and surfaces a bogus recovery prompt or auto-dispatches the wrong completion event. Fix: set CurrentState = ExecutingTask directly. Both callers (ProcessCommentAddressed, ProcessCommentReplied) already run with CurrentState == ExecutingTask, so no transition is needed. Added 3 regression tests in ComposeReplySnapshotTests.cs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 12 ++- .../ComposeReplySnapshotTests.cs | 91 +++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 PrCopilot/tests/PrCopilot.Tests/ComposeReplySnapshotTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 943cec9..5534adb 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -1155,10 +1155,20 @@ private static MonitorAction BuildPostReplyAction(MonitorState state, CommentInf /// emit a compose_reply task. The server's sampling handler intercepts this and composes /// the reply via sampling (stored in PendingReplyText). Sampling support is required for /// this flow; if sampling is unavailable, the task will fail with no agent fallback. + /// + /// IMPORTANT: This is a re-entry — the caller (ProcessCommentAddressed/ProcessCommentReplied) + /// runs while CurrentState is already ExecutingTask. We must NOT call EnterExecutingTask + /// here because that would overwrite HeadShaAtTaskStart (with the still-stale state.HeadSha, + /// since the server hasn't refreshed since the agent pushed) and reset + /// ExecutingTaskExpectedCompletion to null. compose_reply itself never touches git, so the + /// prior task's recovery snapshot must be preserved — otherwise an event=ready arriving + /// during compose_reply (from a post-push hook) misattributes the original push to + /// compose_reply and surfaces a bogus recovery prompt or auto-dispatches the wrong + /// completion event. See PR #51 reviewer comment on MonitorTransitions.cs:1109. /// private static MonitorAction EmitComposeReplyAction(MonitorState state, CommentInfo c, string completionEvent) { - state.EnterExecutingTask(); + state.CurrentState = MonitorStateId.ExecutingTask; state.PendingCompletionEvent = completionEvent; return new MonitorAction { diff --git a/PrCopilot/tests/PrCopilot.Tests/ComposeReplySnapshotTests.cs b/PrCopilot/tests/PrCopilot.Tests/ComposeReplySnapshotTests.cs new file mode 100644 index 0000000..a8d91fc --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/ComposeReplySnapshotTests.cs @@ -0,0 +1,91 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; + +namespace PrCopilot.Tests; + +/// +/// Regression tests for EmitComposeReplyAction (PR #51 reviewer comment on +/// MonitorTransitions.cs:1109). +/// +/// EmitComposeReplyAction is called from ProcessCommentAddressed/ProcessCommentReplied +/// when the agent fires comment_addressed/comment_replied without including reply_text. +/// Both callers are dispatched from CurrentState == ExecutingTask, so EmitComposeReplyAction +/// is a re-entry into ExecutingTask. compose_reply itself never touches git, so it must +/// preserve the prior task's recovery snapshot (HeadShaAtTaskStart and +/// ExecutingTaskExpectedCompletion) — overwriting them causes the (ExecutingTask, "ready") +/// recovery to misattribute the original task's push to compose_reply and surface a bogus +/// recovery prompt (or auto-dispatch the wrong completion event). +/// +public class ComposeReplySnapshotTests +{ + private static MonitorState CreateExecutingTaskState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrNumber = 42, + HeadSha = "stale-pre-push", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath(), + CurrentState = MonitorStateId.ExecutingTask, + CommentFlow = CommentFlowState.SingleCommentPrompt, + CurrentCommentIndex = 0, + // Snapshot from the original push-capable task (e.g., address_comment) + HeadShaAtTaskStart = "original-snapshot", + ExecutingTaskExpectedCompletion = "comment_addressed" + }; + + private static CommentInfo MakeComment(string author = "human-reviewer") => new() + { + Id = "c1", + Author = author, + FilePath = "src/File.cs", + Line = 10, + Body = "Fix this", + Url = "https://example/c1" + }; + + [Fact] + public void CommentAddressed_NoReplyText_PreservesHeadShaSnapshot() + { + // Before fix: EmitComposeReplyAction calls EnterExecutingTask which overwrites + // HeadShaAtTaskStart with state.HeadSha (still pre-push, since the server hasn't + // refreshed since the agent pushed). The original task's snapshot is lost. + var state = CreateExecutingTaskState(); + state.UnresolvedComments.Add(MakeComment()); + // PendingReplyText is empty → ProcessCommentAddressed will route to EmitComposeReplyAction. + + var action = MonitorTransitions.ProcessEvent(state, "comment_addressed", null, null); + + Assert.Equal("compose_reply", action.Task); + Assert.Equal("original-snapshot", state.HeadShaAtTaskStart); + } + + [Fact] + public void CommentAddressed_NoReplyText_PreservesExpectedCompletion() + { + // Before fix: EnterExecutingTask resets ExecutingTaskExpectedCompletion to null, + // turning an unambiguous task ("comment_addressed") into the ambiguous branch + // → bogus ask_user recovery prompt instead of auto-dispatch. + var state = CreateExecutingTaskState(); + state.UnresolvedComments.Add(MakeComment()); + + var action = MonitorTransitions.ProcessEvent(state, "comment_addressed", null, null); + + Assert.Equal("compose_reply", action.Task); + Assert.Equal("comment_addressed", state.ExecutingTaskExpectedCompletion); + } + + [Fact] + public void CommentReplied_NoReplyText_PreservesHeadShaSnapshot() + { + var state = CreateExecutingTaskState(); + state.UnresolvedComments.Add(MakeComment()); + state.ExecutingTaskExpectedCompletion = null; // apply_recommendation is ambiguous + + var action = MonitorTransitions.ProcessEvent(state, "comment_replied", null, null); + + Assert.Equal("compose_reply", action.Task); + Assert.Equal("original-snapshot", state.HeadShaAtTaskStart); + } +} From baaa2a9b5b1f16779c3c6a77d1d520a45c938be9 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 18:23:46 -0700 Subject: [PATCH 13/20] Fix duplicate tag in TryClassifyFreeformViaSamplingAsync (PR #51 review) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index 1e8316f..7a73fa7 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -116,7 +116,6 @@ private static async Task TryHandleViaSamplingAsync( } } - /// /// /// Attempt to classify freeform text via sampling. Returns the full classification record /// (including ) when sampling From 9649219dbd36448605cd828c61b734b899c7057a Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 22:41:11 -0700 Subject: [PATCH 14/20] Hoist "stop" choice above per-flow routing (PR #51 review) BuildExecutingTaskRecoveryPrompt offers "Stop monitoring" (-> value "stop") in every flow-preserving branch. But ProcessUserChoice routed to ProcessCommentChoice / ProcessCiFailureChoice / ProcessWaitingCommentChoice first when those flows were set, and none of them handled "stop" -- they all fell through to TransitionToPolling / ClearWaitingAndResume. Result: picking "Stop monitoring" from a recovery prompt silently resumed polling instead of stopping the monitor. Fix: hoist "stop" handling above the per-flow routing in ProcessUserChoice so it always returns StopMonitoring regardless of active flow. Added 4 regression tests in StopChoiceRoutingTests.cs covering all three flow contexts plus the no-flow regression case. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 9 ++ .../PrCopilot.Tests/StopChoiceRoutingTests.cs | 91 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 PrCopilot/tests/PrCopilot.Tests/StopChoiceRoutingTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 5534adb..03acb27 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -590,6 +590,15 @@ private static MonitorAction TransitionToPolling(MonitorState state) private static MonitorAction ProcessUserChoice(MonitorState state, string? choice, object? data) { DebugLogger.Log("StateMachine", $"ProcessUserChoice: choice={choice ?? "null"}, commentFlow={state.CommentFlow}, activeWaiting={state.ActiveWaitingComment != null}"); + + // "Stop monitoring" is offered in every flow-preserving recovery prompt + // (BuildExecutingTaskRecoveryPrompt). It must take precedence over per-flow + // routing — otherwise ProcessCommentChoice/ProcessCiFailureChoice/ + // ProcessWaitingCommentChoice swallow "stop" via their fall-through branches + // and silently resume polling instead of stopping the monitor. + if (choice == "stop") + return StopMonitoring(state); + // Route based on the active flow if (state.CommentFlow != CommentFlowState.None) return ProcessCommentChoice(state, choice); diff --git a/PrCopilot/tests/PrCopilot.Tests/StopChoiceRoutingTests.cs b/PrCopilot/tests/PrCopilot.Tests/StopChoiceRoutingTests.cs new file mode 100644 index 0000000..b765349 --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/StopChoiceRoutingTests.cs @@ -0,0 +1,91 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; + +namespace PrCopilot.Tests; + +/// +/// Regression tests for "Stop monitoring" choice routing in the (ExecutingTask, "ready") +/// recovery prompts (PR #51 reviewer comment on MonitorTransitions.cs:420). +/// +/// BuildExecutingTaskRecoveryPrompt offers "Stop monitoring" in all three flow-preserving +/// branches (CommentFlow, CiFailureFlow, ActiveWaitingComment). The choice maps to value +/// "stop". But ProcessUserChoice routes to the per-flow handlers FIRST when those flow +/// states are set — and none of ProcessCommentChoice / ProcessCiFailureChoice / +/// ProcessWaitingCommentChoice handle "stop". They all fall through to TransitionToPolling +/// (or ClearWaitingAndResume), so picking "Stop monitoring" silently resumes polling +/// instead of stopping the monitor. +/// +public class StopChoiceRoutingTests +{ + private static MonitorState BaseState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrNumber = 42, + HeadSha = "abc123", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath(), + CurrentState = MonitorStateId.AwaitingUser + }; + + private static CommentInfo MakeComment() => new() + { + Id = "c1", + Author = "human-reviewer", + FilePath = "src/File.cs", + Line = 10, + Body = "Fix this", + Url = "https://example/c1" + }; + + [Fact] + public void StopChoice_FromCommentFlowRecoveryPrompt_StopsMonitor() + { + var state = BaseState(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.UnresolvedComments.Add(MakeComment()); + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "stop", null); + + Assert.Equal("stop", action.Action); + Assert.Equal(MonitorStateId.Stopped, state.CurrentState); + } + + [Fact] + public void StopChoice_FromCiFailureRecoveryPrompt_StopsMonitor() + { + var state = BaseState(); + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "stop", null); + + Assert.Equal("stop", action.Action); + Assert.Equal(MonitorStateId.Stopped, state.CurrentState); + } + + [Fact] + public void StopChoice_FromWaitingCommentRecoveryPrompt_StopsMonitor() + { + var state = BaseState(); + state.ActiveWaitingComment = MakeComment(); + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "stop", null); + + Assert.Equal("stop", action.Action); + Assert.Equal(MonitorStateId.Stopped, state.CurrentState); + } + + [Fact] + public void StopChoice_FromGenericRecoveryPrompt_StopsMonitor_Regression() + { + // The no-flow case already works (terminal-level switch handles "stop"). + // Ensure the fix doesn't regress this path. + var state = BaseState(); + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "stop", null); + + Assert.Equal("stop", action.Action); + Assert.Equal(MonitorStateId.Stopped, state.CurrentState); + } +} From 32401975aed10a712d6187220b8075bd41fe66b8 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 22:44:38 -0700 Subject: [PATCH 15/20] Make treat_as_replied_externally flow-aware (PR #51 review) treat_as_replied_externally was unconditionally routed to SkipAndAdvanceComment, which only knows the AddressAllIterating-style prompt. For ExplainAllIterating, the user dropped out of the explain-all loop and got an Address prompt instead of the next comment's explain task. For SingleCommentPrompt, the expected PickRemaining flow was bypassed. Fix: route through AdvanceAfterComment which is fully flow-aware (same path the normal comment_replied event uses). Added 4 regression tests in TreatAsRepliedExternallyRoutingTests.cs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 2 +- .../TreatAsRepliedExternallyRoutingTests.cs | 106 ++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 PrCopilot/tests/PrCopilot.Tests/TreatAsRepliedExternallyRoutingTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 03acb27..60e8a14 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -686,7 +686,7 @@ private static MonitorAction ProcessCommentChoice(MonitorState state, string? ch return ProcessCommentAddressed(state, null); if (choice == "treat_as_replied_externally") - return SkipAndAdvanceComment(state); + return AdvanceAfterComment(state, "Comment replied externally"); return (state.CommentFlow, choice) switch { diff --git a/PrCopilot/tests/PrCopilot.Tests/TreatAsRepliedExternallyRoutingTests.cs b/PrCopilot/tests/PrCopilot.Tests/TreatAsRepliedExternallyRoutingTests.cs new file mode 100644 index 0000000..8e0ac6a --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/TreatAsRepliedExternallyRoutingTests.cs @@ -0,0 +1,106 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; + +namespace PrCopilot.Tests; + +/// +/// Regression tests for treat_as_replied_externally routing in ProcessCommentChoice +/// (PR #51 reviewer comment on MonitorTransitions.cs:680). +/// +/// "Treat as comment replied" (-> "treat_as_replied_externally") was unconditionally +/// routed to SkipAndAdvanceComment, which always shows the AddressAllIterating-style +/// "Address this comment / Skip / Done" prompt for the next comment. That's wrong for +/// ExplainAllIterating: the user is in the explain-all loop and should see the +/// explain_comment task for the next thread, not an address prompt — i.e., treat_as_replied +/// should be flow-aware just like the normal comment_replied event (which routes through +/// AdvanceAfterComment -> AdvanceExplainAll for the explain-all flow). +/// +public class TreatAsRepliedExternallyRoutingTests +{ + private static MonitorState BaseState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrNumber = 42, + HeadSha = "abc123", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath(), + CurrentState = MonitorStateId.AwaitingUser + }; + + private static CommentInfo MakeComment(string id) => new() + { + Id = id, + Author = "human-reviewer", + FilePath = "src/File.cs", + Line = 10, + Body = $"Comment {id}", + Url = $"https://example/{id}" + }; + + [Fact] + public void TreatAsRepliedExternally_InExplainAllIterating_AdvancesViaExplainAll() + { + var state = BaseState(); + state.CommentFlow = CommentFlowState.ExplainAllIterating; + state.UnresolvedComments.Add(MakeComment("c1")); + state.UnresolvedComments.Add(MakeComment("c2")); + state.CurrentCommentIndex = 0; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_replied_externally", null); + + // Should re-emit explain task for next comment, not the "Address this comment" prompt + Assert.Equal("execute", action.Action); + Assert.Equal("explain_comment", action.Task); + Assert.Equal(1, state.CurrentCommentIndex); + Assert.Equal(CommentFlowState.ExplainAllIterating, state.CommentFlow); + } + + [Fact] + public void TreatAsRepliedExternally_InAddressAllIterating_AsksAddressOrSkip_Regression() + { + var state = BaseState(); + state.CommentFlow = CommentFlowState.AddressAllIterating; + state.UnresolvedComments.Add(MakeComment("c1")); + state.UnresolvedComments.Add(MakeComment("c2")); + state.CurrentCommentIndex = 0; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_replied_externally", null); + + Assert.Equal("ask_user", action.Action); + Assert.Contains("Address this comment", action.Choices ?? []); + Assert.Equal(1, state.CurrentCommentIndex); + } + + [Fact] + public void TreatAsRepliedExternally_InSingleCommentPrompt_GoesToPickRemaining_Regression() + { + var state = BaseState(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.UnresolvedComments.Add(MakeComment("c1")); + state.UnresolvedComments.Add(MakeComment("c2")); + state.CurrentCommentIndex = 0; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_replied_externally", null); + + // Single-comment branch: AdvanceAfterComment falls through to PickRemaining + Assert.Equal("ask_user", action.Action); + Assert.Equal(CommentFlowState.PickRemaining, state.CommentFlow); + Assert.Contains("Address next comment", action.Choices ?? []); + } + + [Fact] + public void TreatAsRepliedExternally_InExplainAllIterating_AtLastComment_StopsFlow() + { + var state = BaseState(); + state.CommentFlow = CommentFlowState.ExplainAllIterating; + state.UnresolvedComments.Add(MakeComment("c1")); + state.CurrentCommentIndex = 0; + + var action = MonitorTransitions.ProcessEvent(state, "user_chose", "treat_as_replied_externally", null); + + // No more comments → AdvanceExplainAll falls through to TransitionToPolling + Assert.Equal("polling", action.Action); + } +} From 021e99afccbfa3fb381d6026415803b20655e685 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 22:49:24 -0700 Subject: [PATCH 16/20] Guard ProcessTaskComplete against waiting-thread stale indexing (PR #51 review) The waiting-thread branches in ProcessCommentAddressed/ProcessCommentReplied set PendingResolveAfterAddress / PendingAdvanceAfterReply without entering a comment flow (CommentFlow stays None). When ProcessTaskComplete then runs, it indexes UnresolvedComments[CurrentCommentIndex] -- pointing to a stale entry from a prior flow -- and triggers a bogus rerequest_review on an unrelated reviewer or surfaces PickRemaining for unrelated comments. Fix: add a guard at the top of both PendingResolveAfterAddress and PendingAdvanceAfterReply branches -- when CommentFlow == None, this was a waiting-thread task; just TransitionToPolling (which clears all pending state). Added 3 regression tests in WaitingThreadResolveTests.cs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 17 +++ .../WaitingThreadResolveTests.cs | 112 ++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 PrCopilot/tests/PrCopilot.Tests/WaitingThreadResolveTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 60e8a14..84e6229 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -223,6 +223,16 @@ private static MonitorAction ProcessTaskComplete(MonitorState state) // If we just auto-resolved a thread after addressing/replying to a comment, advance if (state.PendingResolveAfterAddress) { + // Waiting-thread resolution: ProcessCommentAddressed/ProcessCommentReplied + // entered their waiting branch (CommentFlow == None && ActiveWaitingComment != null) + // and set PendingResolveAfterAddress. There's no in-flow comment to advance past + // — UnresolvedComments[CurrentCommentIndex] points to a stale entry from a prior + // flow, so the indexing logic below would re-request review on an unrelated + // reviewer or surface PickRemaining for unrelated comments. Just clean state and + // return to polling. See PR #51 reviewer comment on MonitorTransitions.cs:921. + if (state.CommentFlow == CommentFlowState.None) + return TransitionToPolling(state); + var summary = state.PendingResolveSummary ?? "Comment addressed"; state.PendingResolveAfterAddress = false; state.ActiveWaitingComment = null; @@ -246,6 +256,13 @@ private static MonitorAction ProcessTaskComplete(MonitorState state) // If we just posted a reply (no resolve) — advance to next comment or re-request if (state.PendingAdvanceAfterReply != null) { + // Same waiting-thread guard as PendingResolveAfterAddress above. The waiting + // branch of ProcessCommentReplied (human reviewer) sets PendingAdvanceAfterReply + // without entering a comment flow; the indexed lookup below would target a + // stale comment from a prior flow. + if (state.CommentFlow == CommentFlowState.None) + return TransitionToPolling(state); + var summary = state.PendingAdvanceAfterReply; state.PendingAdvanceAfterReply = null; diff --git a/PrCopilot/tests/PrCopilot.Tests/WaitingThreadResolveTests.cs b/PrCopilot/tests/PrCopilot.Tests/WaitingThreadResolveTests.cs new file mode 100644 index 0000000..a4969f0 --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/WaitingThreadResolveTests.cs @@ -0,0 +1,112 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; + +namespace PrCopilot.Tests; + +/// +/// Regression tests for ProcessTaskComplete handling of waiting-thread resolutions +/// (PR #51 reviewer comment on MonitorTransitions.cs:921). +/// +/// The waiting-thread branches in ProcessCommentAddressed/ProcessCommentReplied set +/// PendingResolveAfterAddress / PendingAdvanceAfterReply but do NOT enter a comment flow +/// (CommentFlow stays None). When ProcessTaskComplete then runs, it indexes +/// UnresolvedComments[CurrentCommentIndex] — which can point to a stale, unrelated +/// comment from a prior flow — and may trigger a bogus rerequest_review on that +/// reviewer or surface PickRemaining prompts for unrelated comments instead of just +/// returning to monitoring. +/// +public class WaitingThreadResolveTests +{ + private static MonitorState BaseState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrAuthor = "pr-author", + CurrentUser = "current-user", + PrNumber = 42, + HeadSha = "abc123", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath(), + CurrentState = MonitorStateId.ExecutingTask + }; + + private static CommentInfo MakeComment(string id, string author) => new() + { + Id = id, + Author = author, + FilePath = "src/File.cs", + Line = 10, + Body = $"Comment {id}", + Url = $"https://example/{id}" + }; + + [Fact] + public void TaskComplete_AfterWaitingThreadResolve_TransitionsToPolling() + { + // Simulates: waiting branch of ProcessCommentReplied (bot author) just resolved + // the waiting thread. ProcessTaskComplete is now firing. UnresolvedComments + // contains a stale entry from a prior flow with a different reviewer who has NOT + // been re-requested. The bug: ProcessTaskComplete indexes UnresolvedComments[0] + // and calls ShouldReRequestReview on "other-reviewer" → triggers bogus rerequest. + var state = BaseState(); + state.CommentFlow = CommentFlowState.None; + state.ActiveWaitingComment = MakeComment("waiting", "some-bot[bot]"); + state.UnresolvedComments.Add(MakeComment("stale", "other-reviewer")); + state.CurrentCommentIndex = 0; + state.PendingResolveAfterAddress = true; + state.PendingResolveSummary = "Comment addressed"; + + var action = MonitorTransitions.ProcessEvent(state, "task_complete", null, null); + + Assert.Equal("polling", action.Action); + Assert.Null(state.PendingReRequestReviewer); + Assert.DoesNotContain("other-reviewer", state.ReviewsReRequested); + Assert.Null(state.ActiveWaitingComment); + Assert.False(state.PendingResolveAfterAddress); + } + + [Fact] + public void TaskComplete_AfterWaitingThreadReplyPosted_TransitionsToPolling() + { + // Simulates: waiting branch of ProcessCommentReplied (human author) just posted + // a reply on the waiting thread. ProcessTaskComplete is now firing via + // PendingAdvanceAfterReply. Same indexing bug as above. + var state = BaseState(); + state.CommentFlow = CommentFlowState.None; + state.ActiveWaitingComment = MakeComment("waiting", "human-reviewer"); + state.WaitingForReplyComments.Add(state.ActiveWaitingComment); + state.UnresolvedComments.Add(MakeComment("stale", "other-reviewer")); + state.CurrentCommentIndex = 0; + state.PendingAdvanceAfterReply = "Replied to comment"; + + var action = MonitorTransitions.ProcessEvent(state, "task_complete", null, null); + + Assert.Equal("polling", action.Action); + Assert.Null(state.PendingReRequestReviewer); + Assert.DoesNotContain("other-reviewer", state.ReviewsReRequested); + Assert.Null(state.PendingAdvanceAfterReply); + } + + [Fact] + public void TaskComplete_InActiveCommentFlow_StillTriggersRerequestReview_Regression() + { + // Regression: the in-flow path (CommentFlow != None) must STILL re-request review + // when appropriate. The fix only short-circuits the waiting-thread case + // (CommentFlow == None). + var state = BaseState(); + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.UnresolvedComments.Add(MakeComment("c1", "in-flow-reviewer")); + state.CurrentCommentIndex = 0; + state.PendingResolveAfterAddress = true; + state.PendingResolveSummary = "Comment addressed"; + + var action = MonitorTransitions.ProcessEvent(state, "task_complete", null, null); + + // Only comment for in-flow-reviewer is the one we just resolved → ShouldReRequestReview + // returns true → BuildReRequestReviewAction fires. + Assert.Equal("auto_execute", action.Action); + Assert.Equal("request_review", action.Task); + Assert.Equal("in-flow-reviewer", state.PendingReRequestReviewer); + } +} From cede69424dee27cedd65b943a66e8c83552d3d8f Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 23:17:21 -0700 Subject: [PATCH 17/20] Add misattributes and rerequest to cspell dictionary Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PrCopilot/cspell.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PrCopilot/cspell.json b/PrCopilot/cspell.json index b9abb15..cf70d40 100644 --- a/PrCopilot/cspell.json +++ b/PrCopilot/cspell.json @@ -11,10 +11,12 @@ "keepalive", "LASTEXITCODE", "MCPEXP", + "misattributes", "nologo", "nums", "osascript", "pushback", + "rerequest", "slnx", "unreplied", "xunit" From 10991be29264cfdf4bd71e09720020ac393707cb Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 23:24:25 -0700 Subject: [PATCH 18/20] Rename 'Treat as comment replied' to 'Treat as replied externally' (PR #51 review) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/PrCopilot/StateMachine/MonitorTransitions.cs | 4 ++-- .../PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 84e6229..d699aaa 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -116,7 +116,7 @@ public static MonitorAction BuildTerminalAction(MonitorState state, TerminalStat ["I'll handle them myself"] = "handle_myself", ["Skip this comment"] = "skip", ["Treat as comment addressed"] = "treat_as_addressed", - ["Treat as comment replied"] = "treat_as_replied_externally", + ["Treat as replied externally"] = "treat_as_replied_externally", ["Done — resume monitoring"] = "done", ["Address next comment"] = "continue", ["I'll handle the rest myself"] = "done", @@ -431,7 +431,7 @@ internal static MonitorAction BuildExecutingTaskRecoveryPrompt( Choices = [ "Treat as comment addressed", - "Treat as comment replied", + "Treat as replied externally", "Skip this comment", "Resume monitoring", "Stop monitoring" diff --git a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs index 41b5acf..94cf10b 100644 --- a/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs +++ b/PrCopilot/tests/PrCopilot.Tests/ExecutingTaskReadyRecoveryTests.cs @@ -324,7 +324,7 @@ public void BuildExecutingTaskRecoveryPrompt_CommentFlow_OffersTreatAsCommentRep Assert.NotNull(action.Choices); Assert.Contains("Treat as comment addressed", action.Choices!); - Assert.Contains("Treat as comment replied", action.Choices!); + Assert.Contains("Treat as replied externally", action.Choices!); } [Fact] @@ -332,13 +332,13 @@ public void ChoiceValueMap_HasTreatAsRepliedExternallyEntry() { Assert.Equal( "treat_as_replied_externally", - MonitorTransitions.ChoiceValueMap["Treat as comment replied"]); + MonitorTransitions.ChoiceValueMap["Treat as replied externally"]); } [Fact] public void ProcessEvent_TreatAsRepliedExternally_AdvancesWithoutPostingOrResolving() { - // Two unresolved comments. User picks "Treat as comment replied" on the first — + // Two unresolved comments. User picks "Treat as replied externally" on the first — // we should NOT post a reply (no compose_reply / post_reply / resolve_thread action), // we should NOT resolve the thread, and we SHOULD advance to the next comment. var state = CreateState(); @@ -416,7 +416,7 @@ public void RecoverFromReadyResolution_PushDetected_AmbiguousTask_AsksUserInstea Assert.Equal("ask_user", action.Action); Assert.NotNull(action.Choices); Assert.Contains("Treat as comment addressed", action.Choices!); - Assert.Contains("Treat as comment replied", action.Choices!); + Assert.Contains("Treat as replied externally", action.Choices!); } [Fact] From 4de93931efcdc333d15fc2b95572cbaf49ddef29 Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 23:49:03 -0700 Subject: [PATCH 19/20] Distinguish HEAD-refresh failure from no-push in recover_from_ready (PR #51 review) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../StateMachine/MonitorTransitions.cs | 20 ++- .../src/PrCopilot/Tools/MonitorFlowTools.cs | 6 +- ...RecoverFromReadyHeadRefreshFailureTests.cs | 129 ++++++++++++++++++ 3 files changed, 150 insertions(+), 5 deletions(-) create mode 100644 PrCopilot/tests/PrCopilot.Tests/RecoverFromReadyHeadRefreshFailureTests.cs diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index d699aaa..826be49 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -529,7 +529,10 @@ private static MonitorAction BuildRecoverFromReadyAction(MonitorState state) /// - headAdvanced + no flow: resume polling. /// - !headAdvanced: build the no-push recovery prompt. /// - internal static MonitorAction BuildRecoverFromReadyResolution(MonitorState state, bool headAdvanced) + internal static MonitorAction BuildRecoverFromReadyResolution( + MonitorState state, + bool headAdvanced, + bool headRefreshFailed = false) { if (headAdvanced && state.CommentFlow != CommentFlowState.None) { @@ -578,14 +581,23 @@ internal static MonitorAction BuildRecoverFromReadyResolution(MonitorState state return ProcessEvent(state, "user_chose", "resume", null); } - // No push detected — flow-aware no-push recovery prompt. - DebugLogger.Log("AutoExec", "recover_from_ready: HEAD unchanged — building flow-aware no-push recovery prompt"); + // No push detected — flow-aware no-push recovery prompt. If we couldn't + // refresh HEAD from GitHub (or never captured a snapshot), we can't actually + // know whether a push happened — surface an uncertainty prompt so the user + // can pick the correct completion path explicitly instead of being told + // "finished without a new commit" (which may be wrong). + var cannotDetermineHeadAdvance = headRefreshFailed + || string.IsNullOrWhiteSpace(state.HeadShaAtTaskStart); + var noPushReason = cannotDetermineHeadAdvance + ? "I couldn't verify whether you pushed (HEAD refresh from GitHub failed or no baseline was captured)" + : "Looks like the task finished without a new commit"; + DebugLogger.Log("AutoExec", $"recover_from_ready: HEAD unchanged — building flow-aware recovery prompt (cannotDetermine={cannotDetermineHeadAdvance})"); var noPushPriorCommentFlow = state.CommentFlow; var noPushPriorCiFailureFlow = state.CiFailureFlow; state.CurrentState = MonitorStateId.AwaitingUser; return BuildExecutingTaskRecoveryPrompt( state, - reasonText: "Looks like the task finished without a new commit", + reasonText: noPushReason, noPushPriorCommentFlow, noPushPriorCiFailureFlow); } diff --git a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs index 7a73fa7..fb629ee 100644 --- a/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs +++ b/PrCopilot/src/PrCopilot/Tools/MonitorFlowTools.cs @@ -1611,15 +1611,19 @@ private static async Task ExecuteAutoAction(MonitorState state, M // 5. HEAD did not advance → flow-aware "no new commit" recovery prompt. var snapshotSha = state.HeadShaAtTaskStart; string? latestSha = state.HeadSha; + bool headRefreshFailed = false; try { var freshPr = await PrStatusFetcher.FetchPrInfoAsync(state.Owner, state.Repo, state.PrNumber); if (!string.IsNullOrWhiteSpace(freshPr.HeadSha)) latestSha = freshPr.HeadSha; + else + headRefreshFailed = true; } catch (Exception ex) { DebugLogger.Error("AutoExec", $"recover_from_ready: HEAD refresh failed: {ex.Message}"); + headRefreshFailed = true; } if (!string.IsNullOrWhiteSpace(latestSha) && latestSha != state.HeadSha) @@ -1632,7 +1636,7 @@ private static async Task ExecuteAutoAction(MonitorState state, M && !string.IsNullOrWhiteSpace(latestSha) && !string.Equals(snapshotSha, latestSha, StringComparison.Ordinal); - return MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced); + return MonitorTransitions.BuildRecoverFromReadyResolution(state, headAdvanced, headRefreshFailed); } case "resolve_thread": { diff --git a/PrCopilot/tests/PrCopilot.Tests/RecoverFromReadyHeadRefreshFailureTests.cs b/PrCopilot/tests/PrCopilot.Tests/RecoverFromReadyHeadRefreshFailureTests.cs new file mode 100644 index 0000000..f5df5fb --- /dev/null +++ b/PrCopilot/tests/PrCopilot.Tests/RecoverFromReadyHeadRefreshFailureTests.cs @@ -0,0 +1,129 @@ +// Licensed under the MIT License. + +using PrCopilot.StateMachine; + +namespace PrCopilot.Tests; + +/// +/// Tests for the recover_from_ready auto-exec path when HEAD refresh fails +/// (FetchPrInfoAsync throws) or when HeadShaAtTaskStart is missing. In those +/// cases we cannot determine whether the task pushed a new commit, and surfacing +/// the standard "finished without a new commit" prompt is misleading — the user +/// might have actually pushed but we just couldn't verify. +/// +/// See: BuildRecoverFromReadyResolution(state, headAdvanced, headRefreshFailed). +/// +public class RecoverFromReadyHeadRefreshFailureTests +{ + private static MonitorState CreateState() => new() + { + Owner = "test-owner", + Repo = "test-repo", + PrNumber = 42, + HeadSha = "abc123", + HeadBranch = "feature/test", + SessionFolder = Path.GetTempPath() + }; + + private static CommentInfo MakeComment(string id = "c1") => new() + { + Id = id, + Author = "reviewer1", + FilePath = "src/File.cs", + Line = 10, + Body = "Please fix this" + }; + + [Fact] + public void HeadRefreshFailed_NoFlow_ShowsUncertaintyPrompt_NotNoCommitPrompt() + { + // Generic recovery (no comment/CI flow, no waiting comment). When refresh + // failed we don't know whether HEAD advanced — must NOT claim "finished + // without a new commit". + var state = CreateState(); + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildRecoverFromReadyResolution( + state, headAdvanced: false, headRefreshFailed: true); + + Assert.Equal("ask_user", action.Action); + Assert.NotNull(action.Question); + Assert.DoesNotContain("finished without a new commit", action.Question!, + StringComparison.OrdinalIgnoreCase); + Assert.Contains("couldn't", action.Question!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public void HeadRefreshFailed_CommentFlow_ShowsUncertaintyPrompt_WithCommentChoices() + { + var state = CreateState(); + state.UnresolvedComments.Add(MakeComment()); + state.CurrentCommentIndex = 0; + state.CommentFlow = CommentFlowState.SingleCommentPrompt; + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildRecoverFromReadyResolution( + state, headAdvanced: false, headRefreshFailed: true); + + Assert.Equal("ask_user", action.Action); + Assert.DoesNotContain("finished without a new commit", action.Question!, + StringComparison.OrdinalIgnoreCase); + Assert.Contains("couldn't", action.Question!, StringComparison.OrdinalIgnoreCase); + // Still surfaces the comment-flow choices so user can mark addressed/skip/etc. + Assert.NotNull(action.Choices); + Assert.Contains("Treat as comment addressed", action.Choices!); + Assert.Contains("Treat as replied externally", action.Choices!); + } + + [Fact] + public void HeadRefreshFailed_CiFailureFlow_ShowsUncertaintyPrompt_WithCiChoices() + { + var state = CreateState(); + state.CiFailureFlow = CiFailureFlowState.InvestigationResults; + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildRecoverFromReadyResolution( + state, headAdvanced: false, headRefreshFailed: true); + + Assert.Equal("ask_user", action.Action); + Assert.DoesNotContain("finished without a new commit", action.Question!, + StringComparison.OrdinalIgnoreCase); + Assert.Contains("couldn't", action.Question!, StringComparison.OrdinalIgnoreCase); + Assert.NotNull(action.Choices); + Assert.Contains("Treat as push completed", action.Choices!); + } + + [Fact] + public void HeadShaAtTaskStartMissing_TreatedAsRefreshFailure() + { + // If we never captured a snapshot (HeadShaAtTaskStart is null/empty) we + // also cannot determine headAdvanced — same uncertainty prompt should apply. + var state = CreateState(); + state.CurrentState = MonitorStateId.ExecutingTask; + state.HeadShaAtTaskStart = null; + + var action = MonitorTransitions.BuildRecoverFromReadyResolution( + state, headAdvanced: false, headRefreshFailed: false); + + Assert.Equal("ask_user", action.Action); + Assert.DoesNotContain("finished without a new commit", action.Question!, + StringComparison.OrdinalIgnoreCase); + Assert.Contains("couldn't", action.Question!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public void HeadRefreshSucceeded_NoAdvance_StillShowsNoCommitPrompt_Regression() + { + // The success path (refresh worked, HEAD didn't advance) must still show + // the existing "finished without a new commit" prompt. Don't regress. + var state = CreateState(); + state.EnterExecutingTask(); + + var action = MonitorTransitions.BuildRecoverFromReadyResolution( + state, headAdvanced: false, headRefreshFailed: false); + + Assert.Equal("ask_user", action.Action); + Assert.Contains("finished without a new commit", action.Question!, + StringComparison.OrdinalIgnoreCase); + } +} From 9274c74d1d03cf06fc7b7143eccbbb0328118dcd Mon Sep 17 00:00:00 2001 From: Michael Nash Date: Tue, 5 May 2026 23:56:57 -0700 Subject: [PATCH 20/20] Use actual state in BuildRecoverFromReadyAction debug log (PR #51 review) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs index 826be49..48722de 100644 --- a/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs +++ b/PrCopilot/src/PrCopilot/StateMachine/MonitorTransitions.cs @@ -498,7 +498,7 @@ internal static MonitorAction BuildExecutingTaskRecoveryPrompt( /// private static MonitorAction BuildRecoverFromReadyAction(MonitorState state) { - DebugLogger.Log("StateMachine", $"ExecutingTask/ready: dispatching auto_execute recover_from_ready_in_executing_task (snapshot HEAD={ShortSha(state.HeadShaAtTaskStart)})"); + DebugLogger.Log("StateMachine", $"{state.CurrentState}/ready: dispatching auto_execute recover_from_ready_in_executing_task (snapshot HEAD={ShortSha(state.HeadShaAtTaskStart)})"); return new MonitorAction { Action = "auto_execute",