Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 43 additions & 5 deletions actions/setup/js/codex_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,23 @@ const BACKOFF_MULTIPLIER = 2;
// Maximum delay cap in milliseconds
const MAX_DELAY_MS = 60000;

// Pattern to detect OpenAI rate-limit errors (HTTP 429).
// Matches "rate_limit_exceeded" from the OpenAI error type field and the "429" status code
// that Codex emits when the API rate limit is hit.
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
// Pattern to detect OpenAI rate-limit errors.
// Matches the JSON error type field ("rate_limit_exceeded"), the HTTP status code
// ("429 Too Many Requests"), the client-side exception class ("RateLimitError"), and
// the human-readable message Codex emits inside "Reconnecting..." / error lines:
// "Rate limit reached for <model> in organization <org> on tokens per min (TPM): ..."
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError|Rate limit reached for [^\s]+(?: in organization [^\s]+)? on tokens per min/i;

// Pattern to detect when Codex's internal stream-reconnect budget is fully spent.
// Codex emits "Reconnecting... N/N (reason)" where both numbers are the same when
// the reconnect is the last allowed attempt. Seeing this pattern together with a
// rate-limit error means the session cannot make forward progress: every reconnect
// attempt immediately fails with the same rate-limit, and a fresh harness run will
// re-encounter the same limit since the same work pattern consumes the same TPM budget.
//
// The backreference \1 requires the two numeric parts of "N/N" to be identical —
// "5/5" matches (exhausted) but "1/5", "3/5", "4/5" do not (still retrying).
const RECONNECT_EXHAUSTED_PATTERN = /Reconnecting\.\.\.\s+(\d+)\/\1\b/;
const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;

// Pattern to detect a missing API key at startup — Codex emits this before making any API
Expand Down Expand Up @@ -130,6 +143,20 @@ function isInvalidModelError(output) {
return INVALID_MODEL_ERROR_PATTERN.test(output);
}

/**
* Determines if the collected output shows that Codex's internal stream-reconnect
* retries are exhausted (i.e., the output contains "Reconnecting... N/N" where both
* numbers are the same, indicating the last reconnect attempt).
*
* When this is true together with a rate-limit error, retrying from scratch would
* immediately encounter the same rate limit and drain the token budget further.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isReconnectExhaustedError(output) {
return RECONNECT_EXHAUSTED_PATTERN.test(output);
}

/**
* Resolve --prompt-file arguments for the Codex run.
* Strips the --prompt-file <path> pair from args and appends the file content
Expand Down Expand Up @@ -439,11 +466,12 @@ async function main() {
}

const nonRetryableGuard = detectNonRetryableHarnessGuard(result.output);
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive) {
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive || nonRetryableGuard.maxRunsExceeded) {
const reasons = [];
if (nonRetryableGuard.aiCreditsExceeded) reasons.push("AI credits budget exceeded");
if (nonRetryableGuard.awfAPIProxyBlockingRequests) reasons.push("AWF API proxy is blocking requests");
if (nonRetryableGuard.goalAlreadyActive) reasons.push("goal is already active for this thread (use update_goal when the current goal is complete)");
if (nonRetryableGuard.maxRunsExceeded) reasons.push("maximum LLM invocations exceeded");
log(`attempt ${attempt + 1}: ${reasons.join(" and ")} — not retrying (non-retryable guard condition)`);
break;
}
Expand All @@ -470,6 +498,15 @@ async function main() {
break;
}

// Codex's internal stream-reconnect retries are exhausted and the root cause is a
// rate-limit error. Each reconnect attempt immediately failed with the same limit,
// so a fresh harness run will encounter the same rate-limit at the same point in the
// session and drain the token budget further without making progress.
if (isRateLimit && isReconnectExhaustedError(result.output)) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard-stopping on exhausted reconnects is permanent — it discards the backoff delay that could have allowed the TPM window to roll over.

💡 Details

The harness's exponential backoff (INITIAL_DELAY_MS = 5000, max MAX_DELAY_MS = 60000) between attempts was designed precisely to let transient limits expire before a fresh run. OpenAI TPM windows are 1-minute rolling windows. By the time a first codex attempt finishes (~15 min per the PR description), runs 60 s of backoff, and reaches attempt 2, the earliest tokens consumed are well outside the window — the token budget has substantially rolled over.

Attempt 1 — t=0:  runs 15 min, hits exhausted reconnect at t=15 min
            break → harness exits entirely
            ← NEW BEHAVIOR: all backoff opportunity is discarded

vs the old behavior (wrongly retrying three times) vs the better behaviour (retry once after max backoff):

Attempt 1 — t=0:  runs 15 min, hits rate-limit with exhausted reconnect
Backoff:    t=15: wait MAX_DELAY_MS (60 s) — TPM window rolls over
Attempt 2 — t=16: fresh run, window is partially cleared, succeeds (or fails cleanly)

Suggested alternative: instead of break, apply a fixed long-delay sleep (e.g., 2× the TPM window = 120 s) then allow exactly one more attempt, rather than zero. If that also exhausts reconnects, then break.

At minimum, document the tradeoff: if the failure is truly non-retryable (rate limit exceeds session budget regardless of timing), the current change is correct. If the failure is timing-based (instantaneous limit with rolling window), the current change silently discards the recovery path.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kept current behavior intentionally. This PR targets the token-drain failure mode where exhausted reconnects repeatedly consume budget without recovery. We now stop only when both conditions are present (rate-limit + exhausted N/N reconnects), while keeping normal transient retries in place for non-exhausted reconnects and other transient failures.

log(`attempt ${attempt + 1}: rate-limit with exhausted reconnects — not retrying (fresh run would hit the same rate limit)`);
break;
}

// Retry when the session was partially executed (has output) or on well-known
// transient errors (rate limit, server error) even without output.
const isTransient = isRateLimit || isServer;
Expand Down Expand Up @@ -504,6 +541,7 @@ if (typeof module !== "undefined" && module.exports) {
isMissingApiKeyError,
isServerError,
isInvalidModelError,
isReconnectExhaustedError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
extractDeniedCommands,
Expand Down
80 changes: 76 additions & 4 deletions actions/setup/js/codex_harness.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const {
isMissingApiKeyError,
isServerError,
isInvalidModelError,
isReconnectExhaustedError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
extractDeniedCommands,
Expand Down Expand Up @@ -97,6 +98,10 @@ describe("codex_harness.cjs", () => {
expect(isRateLimitError("RateLimitError: You exceeded your current quota")).toBe(true);
});

it("returns true for 'Rate limit reached for' human-readable message", () => {
expect(isRateLimitError("Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): " + "Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.")).toBe(true);
});

it("returns false for unrelated errors", () => {
expect(isRateLimitError("Error: ENOENT: no such file")).toBe(false);
expect(isRateLimitError("Fatal: out of memory")).toBe(false);
Expand Down Expand Up @@ -408,14 +413,14 @@ env_key = "OPENAI_API_KEY"
*/
function shouldRetry(result, attempt) {
if (result.exitCode === 0) return false;
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
const SERVER_ERROR_PATTERN = /InternalServerError|ServiceUnavailableError|500 Internal Server Error|503 Service Unavailable/i;
if (attempt === 0 && isAuthenticationFailedError(result.output)) return false;
if (isMissingApiKeyError(result.output)) return false;
if (hasNumerousPermissionDeniedIssues(result.output)) return false;
const nonRetryableGuard = detectNonRetryableHarnessGuard(result.output);
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive) return false;
const isTransient = RATE_LIMIT_ERROR_PATTERN.test(result.output) || SERVER_ERROR_PATTERN.test(result.output);
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive || nonRetryableGuard.maxRunsExceeded) return false;
const isRateLimit = isRateLimitError(result.output);
if (isRateLimit && isReconnectExhaustedError(result.output)) return false;
const isTransient = isRateLimit || isServerError(result.output);
return attempt < MAX_RETRIES && (result.hasOutput || isTransient);
}

Expand Down Expand Up @@ -473,6 +478,73 @@ env_key = "OPENAI_API_KEY"
};
expect(shouldRetry(result, 0)).toBe(false);
});

it("does not retry when maximum LLM invocations are exceeded", () => {
const result = {
exitCode: 1,
hasOutput: true,
output: '{"error":{"type":"max_runs_exceeded","message":"Maximum LLM invocations exceeded (20 / 20).","invocation_count":20,"max_runs":20}}',
};
expect(shouldRetry(result, 0)).toBe(false);
});

it("retries on rate limit with format 'Rate limit reached for' without exhausted reconnects", () => {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] The new "Rate limit reached for ..." format is tested here through the shouldRetry integration helper, but the dedicated isRateLimitError unit-test block (lines 88–110) was not extended to cover this variant. That block already has cases for each of the other three alternatives.

💡 Suggested addition to the isRateLimitError describe block
it("returns true for 'Rate limit reached for' human-readable message", () => {
  expect(
    isRateLimitError(
      "Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): " +
      "Limit 200000, Used 166655, Requested 35398. Please try again in 615ms."
    )
  ).toBe(true);
});

Keeping each alternative in the unit test makes the contract of isRateLimitError self-documenting and ensures regression detection stays close to the function under test.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in commit 8f2536b: the isRateLimitError unit-test block now includes a dedicated case for the human-readable Rate limit reached for ... on tokens per min message.

const result = {
exitCode: 1,
hasOutput: false,
output: '{"type":"error","message":"Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): Limit 200000, Used 50000, Requested 35000. Please try again in 615ms."}',
};
expect(shouldRetry(result, 0)).toBe(true);
});

it("does not retry when rate-limit reconnects are exhausted (N/N pattern)", () => {
// Simulates the real log format: multiple Reconnecting... lines appear in
// the output as codex retries the stream. The final "5/5" line is what
// triggers the exhausted-reconnect detection; intermediate lines (1/5, 2/5)
// confirm that the function ignores non-final attempts.
const output =
'{"type":"error","message":"Reconnecting... 1/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}\n' +
'{"type":"error","message":"Reconnecting... 2/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}\n' +
'{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}';
const result = { exitCode: 1, hasOutput: true, output };
expect(shouldRetry(result, 0)).toBe(false);
});

it("retries when reconnects are exhausted but no rate-limit error is present", () => {
const output =
'{"type":"error","message":"Reconnecting... 1/5 (stream disconnected before completion: Connection timed out)"}\n' + '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Connection timed out)"}';
const result = { exitCode: 1, hasOutput: true, output };
expect(shouldRetry(result, 0)).toBe(true);
});
});

describe("isReconnectExhaustedError", () => {
it("returns true when output contains Reconnecting N/N pattern (same numbers)", () => {
expect(isReconnectExhaustedError("Reconnecting... 5/5 (some error)")).toBe(true);
});

it("returns true for last reconnect embedded in JSON output", () => {
const output = '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini...)"}';
expect(isReconnectExhaustedError(output)).toBe(true);
});

it("returns false when reconnect attempt is not the last (different numbers)", () => {
expect(isReconnectExhaustedError("Reconnecting... 1/5 (some error)")).toBe(false);
expect(isReconnectExhaustedError("Reconnecting... 3/5 (some error)")).toBe(false);
});

it("returns false when output has no reconnect messages", () => {
expect(isReconnectExhaustedError("rate_limit_exceeded")).toBe(false);
expect(isReconnectExhaustedError("")).toBe(false);
});

it("returns true for multi-digit N/N", () => {
expect(isReconnectExhaustedError("Reconnecting... 10/10 (error)")).toBe(true);
});

it("returns false for N/M where N !== M", () => {
expect(isReconnectExhaustedError("Reconnecting... 10/15 (error)")).toBe(false);
});
});

describe("noop pre-flight and retry guard", () => {
Expand Down
Loading