From 6d82129e95cec7667de6ab68acef55a1250f4128 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Jun 2026 01:37:43 +0000 Subject: [PATCH 1/6] Initial plan From 39a48c0f57ef87a66ee37a531ffee1b6879c37a5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Jun 2026 01:51:56 +0000 Subject: [PATCH 2/6] fix(copilot-sdk): add post-completion idle watchdog to bound SDK hang after final tool result When the Copilot SDK's sendAndWait never resolves after the agent's final tool call returns success, the process would hang until the full step timeout fires (burning the entire remaining step budget). This adds a post-completion idle watchdog (GH_AW_SDK_IDLE_MS, default 5 min) to copilot_sdk_session.cjs: - The watchdog is armed whenever the session enters the "work complete" state: output has been collected AND no tool calls are still pending. - It is disarmed when a new tool call starts (the agent is still mid-turn). - If no new SDK events arrive within the idle window, the watchdog force-disconnects the session; the catch block treats that as a successful completion (same semantics as the existing idle-timeout path). This bounds the post-completion hang from full-step-timeout (~20 min) to at most the watchdog idle period (~5 min), without affecting normal completions where sendAndWait resolves on its own. Closes #41551 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/copilot_sdk_driver.test.cjs | 281 +++++++++++++++++++ actions/setup/js/copilot_sdk_session.cjs | 62 +++- 2 files changed, 342 insertions(+), 1 deletion(-) diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index ff1f3ade9d4..8dc23e3d490 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -332,6 +332,287 @@ describe("copilot_sdk_driver.cjs", () => { expect(result.hasOutput).toBe(false); }); + it("post-completion idle watchdog fires and treats session as completed", async () => { + // Regression test: when sendAndWait hangs after the agent's final tool result + // (the SDK post-completion hang), the watchdog must force-disconnect and the + // driver must return exitCode 0 with the collected output. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + // disconnectCalled resolves when the watchdog calls session.disconnect() + let resolveDisconnect; + const disconnectCalled = new Promise(resolve => { + resolveDisconnect = resolve; + }); + const disconnectWithSignal = vi.fn().mockImplementation(() => { + resolveDisconnect(); + return Promise.resolve(undefined); + }); + + const session = { + sessionId: "session-watchdog-fires", + on: handler => { + onEvent = handler; + }, + // sendAndWait emits events that satisfy completion conditions, then hangs + // until the watchdog forces a disconnect. + sendAndWait: vi.fn().mockImplementation(async () => { + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolName: "create_issue", mcpServerName: "safeoutputs", toolCallId: "call-watchdog" }, + }); + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "Issue filed successfully" }, + }); + onEvent({ + type: "tool.execution_complete", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolCallId: "call-watchdog", success: true }, + }); + // Simulate sendAndWait hanging — wait until the watchdog disconnects. + await disconnectCalled; + throw new Error("transport disconnected"); + }), + disconnect: disconnectWithSignal, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const prevIdleMs = process.env.GH_AW_SDK_IDLE_MS; + // Use a very short idle timeout so the watchdog fires quickly in tests. + process.env.GH_AW_SDK_IDLE_MS = "20"; + try { + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("Issue filed successfully"); + // disconnect is called twice: once by the watchdog and once in finally. + expect(disconnectWithSignal).toHaveBeenCalled(); + expect(stop).toHaveBeenCalledTimes(1); + } finally { + if (prevIdleMs === undefined) delete process.env.GH_AW_SDK_IDLE_MS; + else process.env.GH_AW_SDK_IDLE_MS = prevIdleMs; + } + }); + + it("post-completion watchdog does not fire when tool calls are still pending", async () => { + // When a new tool call starts after the watchdog would have been armed, + // the watchdog must be disarmed so it does not fire while work is in progress. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + + const session = { + sessionId: "session-watchdog-disarmed", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + // First, reach the "post-completion" state that would arm the watchdog. + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "still working" }, + }); + // Then start a new tool call — this must disarm the watchdog. + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-new" }, + }); + // Complete the new tool call and produce more output. + onEvent({ + type: "tool.execution_complete", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolCallId: "call-new", success: true }, + }); + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "done now" }, + }); + // sendAndWait resolves normally — no disconnect needed. + return { data: { content: "done now" } }; + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const prevIdleMs = process.env.GH_AW_SDK_IDLE_MS; + process.env.GH_AW_SDK_IDLE_MS = "20"; + try { + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("done now"); + // Only one disconnect: from the finally block (normal completion path). + expect(disconnect).toHaveBeenCalledTimes(1); + } finally { + if (prevIdleMs === undefined) delete process.env.GH_AW_SDK_IDLE_MS; + else process.env.GH_AW_SDK_IDLE_MS = prevIdleMs; + } + }); + + it("post-completion watchdog does not trigger when output not yet collected", async () => { + // The watchdog must not arm when no output has been collected — only + // after the agent has produced real work product. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + + const session = { + sessionId: "session-watchdog-no-output", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + // Tool completes but no assistant.message yet — watchdog must not arm. + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-early" }, + }); + onEvent({ + type: "tool.execution_complete", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolCallId: "call-early", success: true }, + }); + // Now session produces output and completes normally. + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "here is the result" }, + }); + return { data: { content: "here is the result" } }; + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const prevIdleMs = process.env.GH_AW_SDK_IDLE_MS; + process.env.GH_AW_SDK_IDLE_MS = "20"; + try { + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("here is the result"); + // Disconnect only once: from the normal finally-block cleanup. + expect(disconnect).toHaveBeenCalledTimes(1); + } finally { + if (prevIdleMs === undefined) delete process.env.GH_AW_SDK_IDLE_MS; + else process.env.GH_AW_SDK_IDLE_MS = prevIdleMs; + } + }); + + it("post-completion watchdog does not treat success as failure when sendAndWait resolves before timer fires", async () => { + // When sendAndWait resolves normally before the watchdog fires, the session + // should complete with exitCode 0 and no disconnect from the watchdog. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + + const session = { + sessionId: "session-watchdog-not-needed", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "completed normally" }, + }); + // sendAndWait resolves before watchdog fires (watchdog idle = 20ms in test). + return { data: { content: "completed normally" } }; + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const prevIdleMs = process.env.GH_AW_SDK_IDLE_MS; + process.env.GH_AW_SDK_IDLE_MS = "20"; + try { + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("completed normally"); + // Disconnect called once (finally), not twice. + expect(disconnect).toHaveBeenCalledTimes(1); + } finally { + if (prevIdleMs === undefined) delete process.env.GH_AW_SDK_IDLE_MS; + else process.env.GH_AW_SDK_IDLE_MS = prevIdleMs; + } + }); + it("passes custom provider and model through to SDK createSession", async () => { const disconnect = vi.fn().mockResolvedValue(undefined); const stop = vi.fn().mockResolvedValue(undefined); diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index 9f3129c43f0..fb4e7112dd2 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -43,6 +43,15 @@ const SDK_SEND_TIMEOUT_MS_DEFAULT = 10 * 60 * 1000; // Keep in sync with SDK_SESSION_IDLE_TIMEOUT_PATTERN in copilot_harness.cjs. const SDK_IDLE_TIMEOUT_PATTERN = /Timeout after \d+ms waiting for session\.idle/; +// Default idle period for the post-completion watchdog: 5 minutes. +// When the agent has produced output and all tracked tool calls have completed, +// the driver arms a watchdog timer. If no new SDK events arrive within this +// window, the driver force-disconnects the session and treats it as a successful +// completion — covering the SDK driver bug where sendAndWait never resolves after +// the final tool result is returned. +// Override via the GH_AW_SDK_IDLE_MS environment variable. +const SDK_POST_COMPLETION_IDLE_MS_DEFAULT = 5 * 60 * 1000; + /** * Extract the prompt text from a resolved args array. * Looks for the first occurrence of "-p " or "--prompt ". @@ -156,6 +165,18 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c */ const pendingToolCalls = new Map(); + // Post-completion idle watchdog. + // When the agent has produced output and all tracked tool calls have completed, + // this timer is armed. If no new SDK events arrive within GH_AW_SDK_IDLE_MS + // (default 5 minutes), the watchdog force-disconnects the session and the catch + // block treats the result as a successful completion. This bounds the damage + // from the SDK driver bug where sendAndWait never resolves after the final + // tool result is returned. + const postCompletionIdleMs = getEnvPositiveIntOrDefault("GH_AW_SDK_IDLE_MS", SDK_POST_COMPLETION_IDLE_MS_DEFAULT); + let postCompletionWatchdogTriggered = false; + /** @type {ReturnType | null} */ + let postCompletionWatchdog = null; + /** * Best-effort write of a driver-level event to events.jsonl and stderr. * @param {string} type @@ -297,6 +318,31 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c // Other event types are not consumed by unified_timeline.cjs; skip them. break; } + + // After processing each event, update the post-completion watchdog: + // - Arm (or rearm) the watchdog when the session looks complete: output + // collected and no tool calls still in flight. + // - Disarm the watchdog whenever the session is still mid-turn (a new + // tool call was just started, or no output yet). + // The watchdog fires only if sendAndWait never resolves on its own after + // the final tool result is returned — the common SDK post-completion hang. + if (hasOutput && pendingToolCalls.size === 0) { + if (postCompletionWatchdog) clearTimeout(postCompletionWatchdog); + postCompletionWatchdog = setTimeout(() => { + postCompletionWatchdog = null; + if (!hasOutput || pendingToolCalls.size !== 0 || !session) return; + log(`warning: post-completion idle watchdog fired after ${postCompletionIdleMs}ms — force-disconnecting session`); + postCompletionWatchdogTriggered = true; + void session.disconnect().catch(() => { + // best-effort disconnect + }); + }, postCompletionIdleMs); + } else { + if (postCompletionWatchdog) { + clearTimeout(postCompletionWatchdog); + postCompletionWatchdog = null; + } + } }); log("sending prompt..."); @@ -326,6 +372,15 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c const failure = catastrophicToolDenialsError ?? (err instanceof Error ? err : new Error(String(err))); log(`error: ${failure.message}`); + // When the post-completion idle watchdog force-disconnected the session, the + // agent's work is done — the SDK simply failed to resolve sendAndWait after + // the final tool result was returned. Treat it as a successful completion. + if (postCompletionWatchdogTriggered && !catastrophicToolDenialsError && hasOutput && pendingToolCalls.size === 0) { + log(`warning: post-completion watchdog triggered disconnect — treating as completed`); + log(`session completed: hasOutput=${hasOutput} durationMs=${durationMs}`); + return { exitCode: 0, output, hasOutput, durationMs }; + } + // When sendAndWait times out waiting for session.idle but the agent produced // output and all tracked tool calls have already completed, the session work is // done — the SDK simply failed to emit the idle signal. Treat it as a successful @@ -346,6 +401,11 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c durationMs, }; } finally { + // Clear the post-completion watchdog if it has not already fired. + if (postCompletionWatchdog) { + clearTimeout(postCompletionWatchdog); + postCompletionWatchdog = null; + } // Snapshot for null-safe cleanup in this scope. const stream = eventsStream; if (stream) { @@ -368,4 +428,4 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c } } -module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, SDK_IDLE_TIMEOUT_PATTERN, extractPromptFromArgs, runWithCopilotSDK }; +module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, SDK_POST_COMPLETION_IDLE_MS_DEFAULT, SDK_IDLE_TIMEOUT_PATTERN, extractPromptFromArgs, runWithCopilotSDK }; From 7045e88e3575c5eed23504ca624122f2e6f9c8d7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Jun 2026 01:52:57 +0000 Subject: [PATCH 3/6] =?UTF-8?q?fix:=20address=20code=20review=20=E2=80=94?= =?UTF-8?q?=20log=20watchdog=20disconnect=20errors,=20comment=20race-condi?= =?UTF-8?q?tion=20guard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/copilot_sdk_session.cjs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index fb4e7112dd2..9de85461df5 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -330,11 +330,13 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c if (postCompletionWatchdog) clearTimeout(postCompletionWatchdog); postCompletionWatchdog = setTimeout(() => { postCompletionWatchdog = null; + // Re-check conditions at fire time: a new tool call could have started + // between arming the watchdog and the timer firing (race condition guard). if (!hasOutput || pendingToolCalls.size !== 0 || !session) return; log(`warning: post-completion idle watchdog fired after ${postCompletionIdleMs}ms — force-disconnecting session`); postCompletionWatchdogTriggered = true; - void session.disconnect().catch(() => { - // best-effort disconnect + void session.disconnect().catch(err => { + log(`warning: post-completion watchdog disconnect failed: ${err instanceof Error ? err.message : String(err)}`); }); }, postCompletionIdleMs); } else { From c302ba6ff72654a65f6a46821817b869f9fd7335 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Jun 2026 03:02:00 +0000 Subject: [PATCH 4/6] fix(tests): remove unused disconnect mock and increase watchdog interval in flaky test Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/copilot_sdk_driver.test.cjs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index 8dc23e3d490..dc068411f27 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -336,7 +336,6 @@ describe("copilot_sdk_driver.cjs", () => { // Regression test: when sendAndWait hangs after the agent's final tool result // (the SDK post-completion hang), the watchdog must force-disconnect and the // driver must return exitCode 0 with the collected output. - const disconnect = vi.fn().mockResolvedValue(undefined); const stop = vi.fn().mockResolvedValue(undefined); let onEvent = () => {}; // disconnectCalled resolves when the watchdog calls session.disconnect() @@ -577,7 +576,8 @@ describe("copilot_sdk_driver.cjs", () => { timestamp: new Date().toISOString(), data: { content: "completed normally" }, }); - // sendAndWait resolves before watchdog fires (watchdog idle = 20ms in test). + // sendAndWait resolves before watchdog fires (watchdog idle = 500ms in test — + // large enough that normal completion always wins the race on any CI runner). return { data: { content: "completed normally" } }; }), disconnect, @@ -589,7 +589,7 @@ describe("copilot_sdk_driver.cjs", () => { } const prevIdleMs = process.env.GH_AW_SDK_IDLE_MS; - process.env.GH_AW_SDK_IDLE_MS = "20"; + process.env.GH_AW_SDK_IDLE_MS = "500"; try { const result = await runWithCopilotSDK({ sdkUri: "http://127.0.0.1:3002", From bd7268aac514928c23c7c61322cf5036d384ae89 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Jun 2026 03:33:14 +0000 Subject: [PATCH 5/6] fix(copilot-sdk): handle assistant.turn_start/end, session.task_complete, subagent events from SDK doc review Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/copilot_sdk_driver.test.cjs | 141 +++++++++++++++++++ actions/setup/js/copilot_sdk_session.cjs | 71 +++++++++- 2 files changed, 207 insertions(+), 5 deletions(-) diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index dc068411f27..800520dbfa1 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -613,6 +613,147 @@ describe("copilot_sdk_driver.cjs", () => { } }); + it("post-completion watchdog is disarmed during assistant.turn_start → turn_end cycle", async () => { + // When a new turn starts (assistant.turn_start) after the first output is + // produced and all tool calls complete, the watchdog must not fire until + // the turn ends (assistant.turn_end). This prevents a premature + // force-disconnect while the LLM is still doing inference. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + + const session = { + sessionId: "session-watchdog-turn-disarm", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + // Produce output and complete all tool calls — watchdog would arm here. + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "first result" }, + }); + // New turn starts — must disarm watchdog immediately. + onEvent({ + type: "assistant.turn_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { turnId: "turn-2" }, + }); + // Turn ends — watchdog may re-arm now. + onEvent({ + type: "assistant.turn_end", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { turnId: "turn-2" }, + }); + // Additional output and normal resolution. + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: " second result" }, + }); + return { data: { content: " second result" } }; + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const prevIdleMs = process.env.GH_AW_SDK_IDLE_MS; + // Very short watchdog — if the turn_start guard is missing, the watchdog + // would fire before sendAndWait resolves. + process.env.GH_AW_SDK_IDLE_MS = "500"; + try { + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.hasOutput).toBe(true); + // Disconnect called once (finally), not twice. + expect(disconnect).toHaveBeenCalledTimes(1); + } finally { + if (prevIdleMs === undefined) delete process.env.GH_AW_SDK_IDLE_MS; + else process.env.GH_AW_SDK_IDLE_MS = prevIdleMs; + } + }); + + it("session.task_complete is written to events.jsonl", async () => { + // session.task_complete must be serialized to the JSONL log so that + // unified_timeline.cjs can surface the agent's task summary. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + const sessionId = "session-task-complete-jsonl"; + + const session = { + sessionId, + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "work done" }, + }); + onEvent({ + type: "session.task_complete", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { success: true, summary: "Created 3 issues successfully" }, + }); + return { data: { content: "work done" } }; + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + + // Read the JSONL and verify the task_complete entry is present. + const eventsPath = path.join(testSessionStateDir, sessionId, "events.jsonl"); + const lines = fs + .readFileSync(eventsPath, "utf8") + .trim() + .split("\n") + .map(l => JSON.parse(l)); + const taskCompleteEvent = lines.find(e => e.type === "session.task_complete"); + expect(taskCompleteEvent).toBeDefined(); + expect(taskCompleteEvent.data.success).toBe(true); + expect(taskCompleteEvent.data.summary).toBe("Created 3 issues successfully"); + }); + it("passes custom provider and model through to SDK createSession", async () => { const disconnect = vi.fn().mockResolvedValue(undefined); const stop = vi.fn().mockResolvedValue(undefined); diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index 9de85461df5..a11264eb6b2 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -12,6 +12,12 @@ * SDK "tool.execution_start" → JSONL "tool.execution_start" (toolName, mcpServerName, command?) * SDK "tool.execution_complete" → JSONL "tool.execution_complete" (toolName, mcpServerName, success, result) * SDK "assistant.message" → JSONL "assistant.message" (content) + * SDK "assistant.turn_start" → watchdog disarmed (inAssistantTurn = true) + * SDK "assistant.turn_end" → watchdog re-enabled (inAssistantTurn = false) + * SDK "session.task_complete" → JSONL "session.task_complete" (success, summary) + * SDK "subagent.started" → JSONL "subagent.started" (agentName, agentDisplayName, toolCallId) + * SDK "subagent.completed" → JSONL "subagent.completed" (agentName, toolCallId) + * SDK "subagent.failed" → JSONL "subagent.failed" (agentName, toolCallId, error) * * The JSONL file is written to: * /tmp/gh-aw/sandbox/agent/logs/copilot-session-state/{sessionId}/events.jsonl @@ -176,6 +182,11 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c let postCompletionWatchdogTriggered = false; /** @type {ReturnType | null} */ let postCompletionWatchdog = null; + // Tracks whether the LLM is mid-inference (between assistant.turn_start and + // assistant.turn_end). The watchdog must not fire during this window because + // pendingToolCalls may legitimately be empty before the model dispatches its + // first tool call of the new turn. + let inAssistantTurn = false; /** * Best-effort write of a driver-level event to events.jsonl and stderr. @@ -314,6 +325,55 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c break; } + case "assistant.turn_start": + // LLM inference started for a new turn. Disarm the watchdog — the model + // may not have dispatched any tool calls yet, so pendingToolCalls can be + // empty while real work is still in progress. + inAssistantTurn = true; + if (postCompletionWatchdog) { + clearTimeout(postCompletionWatchdog); + postCompletionWatchdog = null; + } + break; + + case "assistant.turn_end": + // LLM inference finished. Allow the watchdog to re-arm on the next event + // that satisfies the completion conditions. + inAssistantTurn = false; + break; + + case "session.task_complete": + writeEvent("session.task_complete", { success: event.data?.success, summary: event.data?.summary ?? "" }, event.timestamp); + break; + + case "subagent.started": + writeEvent( + "subagent.started", + { + agentName: event.data?.agentName, + agentDisplayName: event.data?.agentDisplayName, + toolCallId: event.data?.toolCallId, + }, + event.timestamp + ); + break; + + case "subagent.completed": + writeEvent("subagent.completed", { agentName: event.data?.agentName, toolCallId: event.data?.toolCallId }, event.timestamp); + break; + + case "subagent.failed": + writeEvent( + "subagent.failed", + { + agentName: event.data?.agentName, + toolCallId: event.data?.toolCallId, + error: event.data?.error, + }, + event.timestamp + ); + break; + default: // Other event types are not consumed by unified_timeline.cjs; skip them. break; @@ -321,18 +381,19 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c // After processing each event, update the post-completion watchdog: // - Arm (or rearm) the watchdog when the session looks complete: output - // collected and no tool calls still in flight. + // collected, no tool calls still in flight, and not mid-LLM-inference. // - Disarm the watchdog whenever the session is still mid-turn (a new - // tool call was just started, or no output yet). + // tool call was just started, LLM inference is in progress, or no output yet). // The watchdog fires only if sendAndWait never resolves on its own after // the final tool result is returned — the common SDK post-completion hang. - if (hasOutput && pendingToolCalls.size === 0) { + if (hasOutput && pendingToolCalls.size === 0 && !inAssistantTurn) { if (postCompletionWatchdog) clearTimeout(postCompletionWatchdog); postCompletionWatchdog = setTimeout(() => { postCompletionWatchdog = null; // Re-check conditions at fire time: a new tool call could have started - // between arming the watchdog and the timer firing (race condition guard). - if (!hasOutput || pendingToolCalls.size !== 0 || !session) return; + // or a new turn could have begun between arming the watchdog and the + // timer firing (race condition guard). + if (!hasOutput || pendingToolCalls.size !== 0 || inAssistantTurn || !session) return; log(`warning: post-completion idle watchdog fired after ${postCompletionIdleMs}ms — force-disconnecting session`); postCompletionWatchdogTriggered = true; void session.disconnect().catch(err => { From f990bd378102ae5a95d6ba28346a800dc5f93cfe Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Jun 2026 03:35:28 +0000 Subject: [PATCH 6/6] =?UTF-8?q?fix:=20address=20code=20review=20=E2=80=94?= =?UTF-8?q?=20omit=20summary=20field=20when=20absent,=20increase=20watchdo?= =?UTF-8?q?g=20interval=20to=201000ms?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/copilot_sdk_driver.test.cjs | 7 ++++--- actions/setup/js/copilot_sdk_session.cjs | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index 800520dbfa1..f4ac95197c9 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -667,9 +667,10 @@ describe("copilot_sdk_driver.cjs", () => { } const prevIdleMs = process.env.GH_AW_SDK_IDLE_MS; - // Very short watchdog — if the turn_start guard is missing, the watchdog - // would fire before sendAndWait resolves. - process.env.GH_AW_SDK_IDLE_MS = "500"; + // Short watchdog — if the turn_start guard is missing, the watchdog + // would fire before sendAndWait resolves. 1000ms gives enough headroom + // on slow CI while still detecting a missing guard reliably. + process.env.GH_AW_SDK_IDLE_MS = "1000"; try { const result = await runWithCopilotSDK({ sdkUri: "http://127.0.0.1:3002", diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index a11264eb6b2..be2e6aa5586 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -343,7 +343,7 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c break; case "session.task_complete": - writeEvent("session.task_complete", { success: event.data?.success, summary: event.data?.summary ?? "" }, event.timestamp); + writeEvent("session.task_complete", { success: event.data?.success, summary: event.data?.summary }, event.timestamp); break; case "subagent.started":