Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 27 additions & 24 deletions actions/setup/js/log_parser_bootstrap.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,14 @@ describe("log_parser_bootstrap.cjs", () => {
fs.unlinkSync(logFile),
fs.rmdirSync(tmpDir));
}),
it("should fail Claude runs when no structured log entries are parsed", () => {
it("should fail Claude runs when no structured log entries are parsed", async () => {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/diagnosing-bugs] The root cause of all 12 failing tests was that runLogParser is async but the test callbacks were synchronous. This PR correctly fixes that, but a comment or a regression-guarding lint rule would prevent future callers from making the same mistake.

💡 Consider a JSDoc comment on runLogParser

Adding a JSDoc @returns {Promise<void>} annotation to runLogParser in log_parser_bootstrap.cjs makes it immediately obvious to the IDE and future test authors that the function must be awaited:

/**
 * `@param` {{ parseLog: Function, parserName: string, supportsDirectories?: boolean }} opts
 * `@returns` {Promise<void>}
 */
async function runLogParser(opts) { ... }

This is a low-cost, permanent documentation of the async contract — exactly the kind of instrumentation /diagnosing-bugs recommends to prevent regression.

@copilot please address this.

const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
try {
fs.writeFileSync(logFile, "unstructured log output");
process.env.GH_AW_AGENT_OUTPUT = logFile;
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false, logEntries: [] });
runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
await runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_CONFIG}: Claude execution failed: no structured log entries were produced. This usually indicates a startup or configuration error before tool execution.`);
} finally {
fs.unlinkSync(logFile);
Expand Down Expand Up @@ -105,14 +105,17 @@ describe("log_parser_bootstrap.cjs", () => {
fs.unlinkSync(logFile),
fs.rmdirSync(tmpDir));
}),
it("should handle MCP failures", () => {
it("should handle MCP failures", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-")),
logFile = path.join(tmpDir, "test.log");
(fs.writeFileSync(logFile, "content"), (process.env.GH_AW_AGENT_OUTPUT = logFile));
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: ["server1", "server2"], maxTurnsHit: !1 });
(runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }), expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_API}: MCP server(s) failed to launch: server1, server2`), fs.unlinkSync(logFile), fs.rmdirSync(tmpDir));
(await runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] Comma-chain async test skips cleanup on failure — if the expect assertion throws, fs.unlinkSync and fs.rmdirSync never run, leaving temp dirs behind.

💡 Suggested refactor (try/finally)

Other tests in this same file (e.g. lines 137–162) already use the safe pattern:

it('should handle MCP failures', async () => {
  const tmpDir = fs.mkdtempSync(path.join(__dirname, 'test-'));
  const logFile = path.join(tmpDir, 'test.log');
  try {
    fs.writeFileSync(logFile, 'content');
    process.env.GH_AW_AGENT_OUTPUT = logFile;
    const mockParseLog = vi.fn().mockReturnValue({ markdown: '## Result\n', mcpFailures: ['server1', 'server2'], maxTurnsHit: false });
    await runLogParser({ parseLog: mockParseLog, parserName: 'TestParser' });
    expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_API}: MCP server(s) failed to launch: server1, server2`);
  } finally {
    fs.unlinkSync(logFile);
    fs.rmdirSync(tmpDir);
  }
});

The same pattern applies to the tests at lines 130–136 and 305–314.

@copilot please address this.

expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_API}: MCP server(s) failed to launch: server1, server2`),
fs.unlinkSync(logFile),
fs.rmdirSync(tmpDir));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Temp-dir leak if assertion throws: cleanup runs in a comma expression with no try/finally, so if expect(mockCore.setFailed) throws, fs.unlinkSync and fs.rmdirSync are never reached.

💡 Suggested fix

Refactor to use try/finally for cleanup, consistent with the majority of tests in this file:

it("should handle MCP failures", async () => {
  const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
  const logFile = path.join(tmpDir, "test.log");
  try {
    fs.writeFileSync(logFile, "content");
    process.env.GH_AW_AGENT_OUTPUT = logFile;
    const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: ["server1", "server2"], maxTurnsHit: false });
    await runLogParser({ parseLog: mockParseLog, parserName: "TestParser" });
    expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_API}: MCP server(s) failed to launch: server1, server2`);
  } finally {
    fs.unlinkSync(logFile);
    fs.rmdirSync(tmpDir);
  }
});

The same issue exists in the "should warn instead of failing MCP failures when safe outputs exist" test (~line 128) and "should handle max-turns limit reached" test (~line 308). All three were touched by this PR and are outliers — every other updated test already uses try/finally.

}),
it("should warn instead of failing MCP failures when safe outputs exist", () => {
it("should warn instead of failing MCP failures when safe outputs exist", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
const safeOutputsFile = path.join(tmpDir, "safe-outputs.jsonl");
Expand All @@ -124,14 +127,14 @@ describe("log_parser_bootstrap.cjs", () => {

const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: ["server1"], maxTurnsHit: !1 });

(runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }),
(await runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] Same comma-chain cleanup hazard as line 113 — env vars and temp files aren't cleaned up if expect throws mid-chain.

💡 Suggested refactor (try/finally)

Use try/finally as the surrounding tests already do (e.g. lines 118–136 already exist cleanly — but the comma-chain equivalent here leaves GH_AW_SAFE_OUTPUTS set on failure, polluting later tests):

try {
  // ... setup ...
  await runLogParser({ parseLog: mockParseLog, parserName: 'TestParser' });
  expect(mockCore.warning).toHaveBeenCalledWith(...);
  expect(mockCore.setFailed).not.toHaveBeenCalled();
} finally {
  fs.unlinkSync(logFile);
  fs.unlinkSync(safeOutputsFile);
  delete process.env.GH_AW_SAFE_OUTPUTS;
  fs.rmdirSync(tmpDir);
}

@copilot please address this.

expect(mockCore.warning).toHaveBeenCalledWith("MCP server(s) failed to launch (server1), but agent completed with 1 safe output entry"),
expect(mockCore.setFailed).not.toHaveBeenCalled(),
fs.unlinkSync(logFile),
fs.unlinkSync(safeOutputsFile),
fs.rmdirSync(tmpDir));
}),
it("should warn (non-fatal) when MCP fails but agent ran turns (legacy result entry)", () => {
it("should warn (non-fatal) when MCP fails but agent ran turns (legacy result entry)", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
try {
Expand All @@ -149,15 +152,15 @@ describe("log_parser_bootstrap.cjs", () => {
{ type: "result", num_turns: 34, duration_ms: 60000 },
],
});
runLogParser({ parseLog: mockParseLog, parserName: "TestParser" });
await runLogParser({ parseLog: mockParseLog, parserName: "TestParser" });
expect(mockCore.warning).toHaveBeenCalledWith("MCP server(s) failed to launch (github), but agent completed turns — treating as non-fatal post-completion relaunch");
expect(mockCore.setFailed).not.toHaveBeenCalled();
} finally {
fs.unlinkSync(logFile);
fs.rmdirSync(tmpDir);
}
}),
it("should warn (non-fatal) when MCP fails but agent ran turns (Copilot event session.result)", () => {
it("should warn (non-fatal) when MCP fails but agent ran turns (Copilot event session.result)", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
try {
Expand All @@ -175,15 +178,15 @@ describe("log_parser_bootstrap.cjs", () => {
{ type: "session.result", data: { numTurns: 34, durationMs: 60000 } },
],
});
runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
await runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
expect(mockCore.warning).toHaveBeenCalledWith("MCP server(s) failed to launch (github), but agent completed turns — treating as non-fatal post-completion relaunch");
expect(mockCore.setFailed).not.toHaveBeenCalled();
} finally {
fs.unlinkSync(logFile);
fs.rmdirSync(tmpDir);
}
}),
it("should still fail when MCP fails and agent ran no turns", () => {
it("should still fail when MCP fails and agent ran no turns", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
try {
Expand All @@ -196,14 +199,14 @@ describe("log_parser_bootstrap.cjs", () => {
maxTurnsHit: false,
logEntries: [{ type: "system", subtype: "init" }],
});
runLogParser({ parseLog: mockParseLog, parserName: "TestParser" });
await runLogParser({ parseLog: mockParseLog, parserName: "TestParser" });
expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_API}: MCP server(s) failed to launch: github`);
} finally {
fs.unlinkSync(logFile);
fs.rmdirSync(tmpDir);
}
}),
it("should warn (non-fatal) when Claude has empty logEntries but safe outputs exist", () => {
it("should warn (non-fatal) when Claude has empty logEntries but safe outputs exist", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
const safeOutputsFile = path.join(tmpDir, "safe-outputs.jsonl");
Expand All @@ -215,7 +218,7 @@ describe("log_parser_bootstrap.cjs", () => {
// Parser returns markdown but no structured logEntries — simulates sandbox teardown
// race leaving agent-stdio.log unreadable after the agent completed successfully.
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false, logEntries: [] });
runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
await runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
expect(mockCore.setFailed).not.toHaveBeenCalled();
expect(mockCore.warning).toHaveBeenCalledWith("Claude produced no structured log entries, but agent completed with 1 safe output entry — treating as non-fatal post-completion infrastructure failure");
} finally {
Expand All @@ -225,22 +228,22 @@ describe("log_parser_bootstrap.cjs", () => {
fs.rmdirSync(tmpDir);
}
}),
it("should fail when Claude has empty logEntries and no safe outputs (startup failure)", () => {
it("should fail when Claude has empty logEntries and no safe outputs (startup failure)", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
try {
fs.writeFileSync(logFile, "some raw content");
process.env.GH_AW_AGENT_OUTPUT = logFile;
delete process.env.GH_AW_SAFE_OUTPUTS;
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false, logEntries: [] });
runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
await runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_CONFIG}: Claude execution failed: no structured log entries were produced. This usually indicates a startup or configuration error before tool execution.`);
} finally {
fs.unlinkSync(logFile);
fs.rmdirSync(tmpDir);
}
}),
it("should not print 'parsed successfully' for Claude when logEntries is empty", () => {
it("should not print 'parsed successfully' for Claude when logEntries is empty", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
const safeOutputsFile = path.join(tmpDir, "safe-outputs.jsonl");
Expand All @@ -250,7 +253,7 @@ describe("log_parser_bootstrap.cjs", () => {
process.env.GH_AW_AGENT_OUTPUT = logFile;
process.env.GH_AW_SAFE_OUTPUTS = safeOutputsFile;
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false, logEntries: [] });
runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
await runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
const infoCalls = mockCore.info.mock.calls.map(c => c[0]);
expect(infoCalls.some(msg => msg.includes("Claude log parsed successfully"))).toBe(false);
expect(mockCore.setFailed).not.toHaveBeenCalled();
Expand All @@ -262,23 +265,23 @@ describe("log_parser_bootstrap.cjs", () => {
fs.rmdirSync(tmpDir);
}
}),
it("should treat logEntries: null as missing entries for Claude guardrail (no safe outputs → setFailed)", () => {
it("should treat logEntries: null as missing entries for Claude guardrail (no safe outputs → setFailed)", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
try {
fs.writeFileSync(logFile, "some raw content");
process.env.GH_AW_AGENT_OUTPUT = logFile;
delete process.env.GH_AW_SAFE_OUTPUTS;
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false, logEntries: null });
runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
await runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_CONFIG}: Claude execution failed: no structured log entries were produced. This usually indicates a startup or configuration error before tool execution.`);
} finally {
fs.unlinkSync(logFile);
delete process.env.GH_AW_AGENT_OUTPUT;
fs.rmdirSync(tmpDir);
}
}),
it("should treat logEntries: null as missing entries for Claude guardrail (safe outputs → warning)", () => {
it("should treat logEntries: null as missing entries for Claude guardrail (safe outputs → warning)", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
const safeOutputsFile = path.join(tmpDir, "safe-outputs.jsonl");
Expand All @@ -288,7 +291,7 @@ describe("log_parser_bootstrap.cjs", () => {
process.env.GH_AW_AGENT_OUTPUT = logFile;
process.env.GH_AW_SAFE_OUTPUTS = safeOutputsFile;
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false, logEntries: null });
runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
await runLogParser({ parseLog: mockParseLog, parserName: "Claude" });
expect(mockCore.setFailed).not.toHaveBeenCalled();
expect(mockCore.warning).toHaveBeenCalledWith("Claude produced no structured log entries, but agent completed with 1 safe output entry — treating as non-fatal post-completion infrastructure failure");
} finally {
Expand All @@ -299,12 +302,12 @@ describe("log_parser_bootstrap.cjs", () => {
fs.rmdirSync(tmpDir);
}
}),
it("should handle max-turns limit reached", () => {
it("should handle max-turns limit reached", async () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-")),
logFile = path.join(tmpDir, "test.log");
(fs.writeFileSync(logFile, "content"), (process.env.GH_AW_AGENT_OUTPUT = logFile));
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: !0 });
(runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }),
(await runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] Comma-chain async test: cleanup won't execute if expect throws — same structural issue as lines 113 and 130. The GH_AW_AGENT_OUTPUT env var is also never cleared, which may bleed state into subsequent tests.

💡 Suggested refactor (try/finally)
it('should handle max-turns limit reached', async () => {
  const tmpDir = fs.mkdtempSync(path.join(__dirname, 'test-'));
  const logFile = path.join(tmpDir, 'test.log');
  try {
    fs.writeFileSync(logFile, 'content');
    process.env.GH_AW_AGENT_OUTPUT = logFile;
    const mockParseLog = vi.fn().mockReturnValue({ markdown: '## Result\n', mcpFailures: [], maxTurnsHit: true });
    await runLogParser({ parseLog: mockParseLog, parserName: 'TestParser' });
    expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_VALIDATION}: Agent execution stopped: max-turns limit reached. The agent did not complete its task successfully.`);
  } finally {
    fs.unlinkSync(logFile);
    fs.rmdirSync(tmpDir);
  }
});

@copilot please address this.

expect(mockCore.setFailed).toHaveBeenCalledWith(`${ERR_VALIDATION}: Agent execution stopped: max-turns limit reached. The agent did not complete its task successfully.`),
fs.unlinkSync(logFile),
fs.rmdirSync(tmpDir));
Expand Down
4 changes: 2 additions & 2 deletions pkg/workflow/arc_dind_artifacts.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ func rewriteTmpGhAwPathsForArcDind(paths []string) []string {
result := make([]string, len(paths))
rewritten := 0
for i, p := range paths {
if strings.HasPrefix(p, constants.TmpGhAwDirSlash) {
if after, ok := strings.CutPrefix(p, constants.TmpGhAwDirSlash); ok {
// /tmp/gh-aw/foo → ${{ runner.temp }}/gh-aw/foo
result[i] = constants.GhAwRootDir + "/" + strings.TrimPrefix(p, constants.TmpGhAwDirSlash)
result[i] = constants.GhAwRootDir + "/" + after
rewritten++
} else if p == constants.TmpGhAwDir {
result[i] = constants.GhAwRootDir
Expand Down
Loading