github · pelikhan · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/actions/setup/js/codex_harness.cjs b/actions/setup/js/codex_harness.cjs
@@ -58,10 +58,23 @@ const BACKOFF_MULTIPLIER = 2;
 // Maximum delay cap in milliseconds
 const MAX_DELAY_MS = 60000;
 
-// Pattern to detect OpenAI rate-limit errors (HTTP 429).
-// Matches "rate_limit_exceeded" from the OpenAI error type field and the "429" status code
-// that Codex emits when the API rate limit is hit.
-const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
+// Pattern to detect OpenAI rate-limit errors.
+// Matches the JSON error type field ("rate_limit_exceeded"), the HTTP status code
+// ("429 Too Many Requests"), the client-side exception class ("RateLimitError"), and
+// the human-readable message Codex emits inside "Reconnecting..." / error lines:
+// "Rate limit reached for <model> in organization <org> on tokens per min (TPM): ..."
+const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError|Rate limit reached for [^\s]+(?: in organization [^\s]+)? on tokens per min/i;
+
+// Pattern to detect when Codex's internal stream-reconnect budget is fully spent.
+// Codex emits "Reconnecting... N/N (reason)" where both numbers are the same when
+// the reconnect is the last allowed attempt.  Seeing this pattern together with a
+// rate-limit error means the session cannot make forward progress: every reconnect
+// attempt immediately fails with the same rate-limit, and a fresh harness run will
+// re-encounter the same limit since the same work pattern consumes the same TPM budget.
+//
+// The backreference \1 requires the two numeric parts of "N/N" to be identical —
+// "5/5" matches (exhausted) but "1/5", "3/5", "4/5" do not (still retrying).
+const RECONNECT_EXHAUSTED_PATTERN = /Reconnecting\.\.\.\s+(\d+)\/\1\b/;
 const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;
 
 // Pattern to detect a missing API key at startup — Codex emits this before making any API
@@ -130,6 +143,20 @@ function isInvalidModelError(output) {
   return INVALID_MODEL_ERROR_PATTERN.test(output);
 }
 
+/**
+ * Determines if the collected output shows that Codex's internal stream-reconnect
+ * retries are exhausted (i.e., the output contains "Reconnecting... N/N" where both
+ * numbers are the same, indicating the last reconnect attempt).
+ *
+ * When this is true together with a rate-limit error, retrying from scratch would
+ * immediately encounter the same rate limit and drain the token budget further.
+ * @param {string} output - Collected stdout+stderr from the process
+ * @returns {boolean}
+ */
+function isReconnectExhaustedError(output) {
+  return RECONNECT_EXHAUSTED_PATTERN.test(output);
+}
+
 /**
  * Resolve --prompt-file arguments for the Codex run.
  * Strips the --prompt-file <path> pair from args and appends the file content
@@ -439,11 +466,12 @@ async function main() {
     }
 
     const nonRetryableGuard = detectNonRetryableHarnessGuard(result.output);
-    if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive) {
+    if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive || nonRetryableGuard.maxRunsExceeded) {
       const reasons = [];
       if (nonRetryableGuard.aiCreditsExceeded) reasons.push("AI credits budget exceeded");
       if (nonRetryableGuard.awfAPIProxyBlockingRequests) reasons.push("AWF API proxy is blocking requests");
       if (nonRetryableGuard.goalAlreadyActive) reasons.push("goal is already active for this thread (use update_goal when the current goal is complete)");
+      if (nonRetryableGuard.maxRunsExceeded) reasons.push("maximum LLM invocations exceeded");
       log(`attempt ${attempt + 1}: ${reasons.join(" and ")} — not retrying (non-retryable guard condition)`);
       break;
     }
@@ -470,6 +498,15 @@ async function main() {
       break;
     }
 
+    // Codex's internal stream-reconnect retries are exhausted and the root cause is a
+    // rate-limit error.  Each reconnect attempt immediately failed with the same limit,
+    // so a fresh harness run will encounter the same rate-limit at the same point in the
+    // session and drain the token budget further without making progress.
+    if (isRateLimit && isReconnectExhaustedError(result.output)) {
+      log(`attempt ${attempt + 1}: rate-limit with exhausted reconnects — not retrying (fresh run would hit the same rate limit)`);
+      break;
+    }
+
     // Retry when the session was partially executed (has output) or on well-known
     // transient errors (rate limit, server error) even without output.
     const isTransient = isRateLimit || isServer;
@@ -504,6 +541,7 @@ if (typeof module !== "undefined" && module.exports) {
     isMissingApiKeyError,
     isServerError,
     isInvalidModelError,
+    isReconnectExhaustedError,
     countPermissionDeniedIssues,
     hasNumerousPermissionDeniedIssues,
     extractDeniedCommands,

diff --git a/actions/setup/js/codex_harness.test.cjs b/actions/setup/js/codex_harness.test.cjs
@@ -14,6 +14,7 @@ const {
   isMissingApiKeyError,
   isServerError,
   isInvalidModelError,
+  isReconnectExhaustedError,
   countPermissionDeniedIssues,
   hasNumerousPermissionDeniedIssues,
   extractDeniedCommands,
@@ -97,6 +98,10 @@ describe("codex_harness.cjs", () => {
       expect(isRateLimitError("RateLimitError: You exceeded your current quota")).toBe(true);
     });
 
+    it("returns true for 'Rate limit reached for' human-readable message", () => {
+      expect(isRateLimitError("Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): " + "Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.")).toBe(true);
+    });
+
     it("returns false for unrelated errors", () => {
       expect(isRateLimitError("Error: ENOENT: no such file")).toBe(false);
       expect(isRateLimitError("Fatal: out of memory")).toBe(false);
@@ -408,14 +413,14 @@ env_key = "OPENAI_API_KEY"
      */
     function shouldRetry(result, attempt) {
       if (result.exitCode === 0) return false;
-      const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
-      const SERVER_ERROR_PATTERN = /InternalServerError|ServiceUnavailableError|500 Internal Server Error|503 Service Unavailable/i;
       if (attempt === 0 && isAuthenticationFailedError(result.output)) return false;
       if (isMissingApiKeyError(result.output)) return false;
       if (hasNumerousPermissionDeniedIssues(result.output)) return false;
       const nonRetryableGuard = detectNonRetryableHarnessGuard(result.output);
-      if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive) return false;
-      const isTransient = RATE_LIMIT_ERROR_PATTERN.test(result.output) || SERVER_ERROR_PATTERN.test(result.output);
+      if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive || nonRetryableGuard.maxRunsExceeded) return false;
+      const isRateLimit = isRateLimitError(result.output);
+      if (isRateLimit && isReconnectExhaustedError(result.output)) return false;
+      const isTransient = isRateLimit || isServerError(result.output);
       return attempt < MAX_RETRIES && (result.hasOutput || isTransient);
     }
 
@@ -473,6 +478,73 @@ env_key = "OPENAI_API_KEY"
       };
       expect(shouldRetry(result, 0)).toBe(false);
     });
+
+    it("does not retry when maximum LLM invocations are exceeded", () => {
+      const result = {
+        exitCode: 1,
+        hasOutput: true,
+        output: '{"error":{"type":"max_runs_exceeded","message":"Maximum LLM invocations exceeded (20 / 20).","invocation_count":20,"max_runs":20}}',
+      };
+      expect(shouldRetry(result, 0)).toBe(false);
+    });
+
+    it("retries on rate limit with format 'Rate limit reached for' without exhausted reconnects", () => {
+      const result = {
+        exitCode: 1,
+        hasOutput: false,
+        output: '{"type":"error","message":"Rate limit reached for gpt-4o-mini in organization org-xxx on tokens per min (TPM): Limit 200000, Used 50000, Requested 35000. Please try again in 615ms."}',
+      };
+      expect(shouldRetry(result, 0)).toBe(true);
+    });
+
+    it("does not retry when rate-limit reconnects are exhausted (N/N pattern)", () => {
+      // Simulates the real log format: multiple Reconnecting... lines appear in
+      // the output as codex retries the stream. The final "5/5" line is what
+      // triggers the exhausted-reconnect detection; intermediate lines (1/5, 2/5)
+      // confirm that the function ignores non-final attempts.
+      const output =
+        '{"type":"error","message":"Reconnecting... 1/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}\n' +
+        '{"type":"error","message":"Reconnecting... 2/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}\n' +
+        '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini on tokens per min (TPM): Limit 200000, Used 166655, Requested 35398. Please try again in 615ms.)"}';
+      const result = { exitCode: 1, hasOutput: true, output };
+      expect(shouldRetry(result, 0)).toBe(false);
+    });
+
+    it("retries when reconnects are exhausted but no rate-limit error is present", () => {
+      const output =
+        '{"type":"error","message":"Reconnecting... 1/5 (stream disconnected before completion: Connection timed out)"}\n' + '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Connection timed out)"}';
+      const result = { exitCode: 1, hasOutput: true, output };
+      expect(shouldRetry(result, 0)).toBe(true);
+    });
+  });
+
+  describe("isReconnectExhaustedError", () => {
+    it("returns true when output contains Reconnecting N/N pattern (same numbers)", () => {
+      expect(isReconnectExhaustedError("Reconnecting... 5/5 (some error)")).toBe(true);
+    });
+
+    it("returns true for last reconnect embedded in JSON output", () => {
+      const output = '{"type":"error","message":"Reconnecting... 5/5 (stream disconnected before completion: Rate limit reached for gpt-4o-mini...)"}';
+      expect(isReconnectExhaustedError(output)).toBe(true);
+    });
+
+    it("returns false when reconnect attempt is not the last (different numbers)", () => {
+      expect(isReconnectExhaustedError("Reconnecting... 1/5 (some error)")).toBe(false);
+      expect(isReconnectExhaustedError("Reconnecting... 3/5 (some error)")).toBe(false);
+    });
+
+    it("returns false when output has no reconnect messages", () => {
+      expect(isReconnectExhaustedError("rate_limit_exceeded")).toBe(false);
+      expect(isReconnectExhaustedError("")).toBe(false);
+    });
+
+    it("returns true for multi-digit N/N", () => {
+      expect(isReconnectExhaustedError("Reconnecting... 10/10 (error)")).toBe(true);
+    });
+
+    it("returns false for N/M where N !== M", () => {
+      expect(isReconnectExhaustedError("Reconnecting... 10/15 (error)")).toBe(false);
+    });
   });
 
   describe("noop pre-flight and retry guard", () => {