-
Notifications
You must be signed in to change notification settings - Fork 433
fix: stop codex harness retry loop draining tokens on exhausted rate-limit reconnects #41385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cc38c94
dcb9c48
1c7a311
8f2536b
8facdf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ const { | |
| isMissingApiKeyError, | ||
| isServerError, | ||
| isInvalidModelError, | ||
| isReconnectExhaustedError, | ||
| countPermissionDeniedIssues, | ||
| hasNumerousPermissionDeniedIssues, | ||
| extractDeniedCommands, | ||
|
|
@@ -97,6 +98,10 @@ describe("codex_harness.cjs", () => { | |
| expect(isRateLimitError("RateLimitError: You exceeded your current quota")).toBe(true); | ||
| }); | ||
|
|
||
| it("returns true for 'Rate limit reached for' human-readable message", () => { | ||
| expect(isRateLimitError("Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): " + "Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.")).toBe(true); | ||
| }); | ||
|
|
||
| it("returns false for unrelated errors", () => { | ||
| expect(isRateLimitError("Error: ENOENT: no such file")).toBe(false); | ||
| expect(isRateLimitError("Fatal: out of memory")).toBe(false); | ||
|
|
@@ -408,14 +413,14 @@ env_key = "OPENAI_API_KEY" | |
| */ | ||
| function shouldRetry(result, attempt) { | ||
| if (result.exitCode === 0) return false; | ||
| const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i; | ||
| const SERVER_ERROR_PATTERN = /InternalServerError|ServiceUnavailableError|500 Internal Server Error|503 Service Unavailable/i; | ||
| if (attempt === 0 && isAuthenticationFailedError(result.output)) return false; | ||
| if (isMissingApiKeyError(result.output)) return false; | ||
| if (hasNumerousPermissionDeniedIssues(result.output)) return false; | ||
| const nonRetryableGuard = detectNonRetryableHarnessGuard(result.output); | ||
| if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive) return false; | ||
| const isTransient = RATE_LIMIT_ERROR_PATTERN.test(result.output) || SERVER_ERROR_PATTERN.test(result.output); | ||
| if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive || nonRetryableGuard.maxRunsExceeded) return false; | ||
| const isRateLimit = isRateLimitError(result.output); | ||
| if (isRateLimit && isReconnectExhaustedError(result.output)) return false; | ||
| const isTransient = isRateLimit || isServerError(result.output); | ||
| return attempt < MAX_RETRIES && (result.hasOutput || isTransient); | ||
| } | ||
|
|
||
|
|
@@ -473,6 +478,73 @@ env_key = "OPENAI_API_KEY" | |
| }; | ||
| expect(shouldRetry(result, 0)).toBe(false); | ||
| }); | ||
|
|
||
| it("does not retry when maximum LLM invocations are exceeded", () => { | ||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: true, | ||
| output: '{"error":{"type":"max_runs_exceeded","message":"Maximum LLM invocations exceeded (20 / 20).","invocation_count":20,"max_runs":20}}', | ||
| }; | ||
| expect(shouldRetry(result, 0)).toBe(false); | ||
| }); | ||
|
|
||
| it("retries on rate limit with format 'Rate limit reached for' without exhausted reconnects", () => { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] The new 💡 Suggested addition to the isRateLimitError describe blockit("returns true for 'Rate limit reached for' human-readable message", () => {
expect(
isRateLimitError(
"Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): " +
"Limit 200000, Used 166655, Requested 35398. Please try again in 615ms."
)
).toBe(true);
});Keeping each alternative in the unit test makes the contract of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in commit 8f2536b: the |
||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: false, | ||
| output: '{"type":"error","message":"Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): Limit 200000, Used 50000, Requested 35000. Please try again in 615ms."}', | ||
| }; | ||
| expect(shouldRetry(result, 0)).toBe(true); | ||
| }); | ||
|
|
||
| it("does not retry when rate-limit reconnects are exhausted (N/N pattern)", () => { | ||
| // Simulates the real log format: multiple Reconnecting... lines appear in | ||
| // the output as codex retries the stream. The final "5/5" line is what | ||
| // triggers the exhausted-reconnect detection; intermediate lines (1/5, 2/5) | ||
| // confirm that the function ignores non-final attempts. | ||
| const output = | ||
| '{"type":"error","message":"Reconnecting... 1/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}\n' + | ||
| '{"type":"error","message":"Reconnecting... 2/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}\n' + | ||
| '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}'; | ||
| const result = { exitCode: 1, hasOutput: true, output }; | ||
| expect(shouldRetry(result, 0)).toBe(false); | ||
| }); | ||
|
|
||
| it("retries when reconnects are exhausted but no rate-limit error is present", () => { | ||
| const output = | ||
| '{"type":"error","message":"Reconnecting... 1/5 (stream disconnected before completion: Connection timed out)"}\n' + '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Connection timed out)"}'; | ||
| const result = { exitCode: 1, hasOutput: true, output }; | ||
| expect(shouldRetry(result, 0)).toBe(true); | ||
| }); | ||
| }); | ||
|
|
||
| describe("isReconnectExhaustedError", () => { | ||
| it("returns true when output contains Reconnecting N/N pattern (same numbers)", () => { | ||
| expect(isReconnectExhaustedError("Reconnecting... 5/5 (some error)")).toBe(true); | ||
| }); | ||
|
|
||
| it("returns true for last reconnect embedded in JSON output", () => { | ||
| const output = '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini...)"}'; | ||
| expect(isReconnectExhaustedError(output)).toBe(true); | ||
| }); | ||
|
|
||
| it("returns false when reconnect attempt is not the last (different numbers)", () => { | ||
| expect(isReconnectExhaustedError("Reconnecting... 1/5 (some error)")).toBe(false); | ||
| expect(isReconnectExhaustedError("Reconnecting... 3/5 (some error)")).toBe(false); | ||
| }); | ||
|
|
||
| it("returns false when output has no reconnect messages", () => { | ||
| expect(isReconnectExhaustedError("rate_limit_exceeded")).toBe(false); | ||
| expect(isReconnectExhaustedError("")).toBe(false); | ||
| }); | ||
|
|
||
| it("returns true for multi-digit N/N", () => { | ||
| expect(isReconnectExhaustedError("Reconnecting... 10/10 (error)")).toBe(true); | ||
| }); | ||
|
|
||
| it("returns false for N/M where N !== M", () => { | ||
| expect(isReconnectExhaustedError("Reconnecting... 10/15 (error)")).toBe(false); | ||
| }); | ||
| }); | ||
|
|
||
| describe("noop pre-flight and retry guard", () => { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hard-stopping on exhausted reconnects is permanent — it discards the backoff delay that could have allowed the TPM window to roll over.
💡 Details
The harness's exponential backoff (
INITIAL_DELAY_MS = 5000, maxMAX_DELAY_MS = 60000) between attempts was designed precisely to let transient limits expire before a fresh run. OpenAI TPM windows are 1-minute rolling windows. By the time a first codex attempt finishes (~15 min per the PR description), runs 60 s of backoff, and reaches attempt 2, the earliest tokens consumed are well outside the window — the token budget has substantially rolled over.vs the old behavior (wrongly retrying three times) vs the better behaviour (retry once after max backoff):
Suggested alternative: instead of
break, apply a fixed long-delay sleep (e.g., 2× the TPM window = 120 s) then allow exactly one more attempt, rather than zero. If that also exhausts reconnects, then break.At minimum, document the tradeoff: if the failure is truly non-retryable (rate limit exceeds session budget regardless of timing), the current change is correct. If the failure is timing-based (instantaneous limit with rolling window), the current change silently discards the recovery path.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Kept current behavior intentionally. This PR targets the token-drain failure mode where exhausted reconnects repeatedly consume budget without recovery. We now stop only when both conditions are present (
rate-limit+ exhaustedN/Nreconnects), while keeping normal transient retries in place for non-exhausted reconnects and other transient failures.