diff --git a/internal/config/config.go b/internal/config/config.go index 15847f57e0..9788a2c548 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -365,6 +365,16 @@ type ClaudeKey struct { // ExcludedModels lists model IDs that should be excluded for this provider. ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` + // ResponseHeaderTimeout limits how long to wait for the upstream to start + // responding (in seconds). Once the first response byte arrives, this timeout + // no longer applies — streaming responses are not affected. 0 means no timeout. + ResponseHeaderTimeout int `yaml:"response-header-timeout,omitempty" json:"response-header-timeout,omitempty"` + + // TransientErrorCooldown overrides the default 1-minute cooldown applied when + // a transient error (408/500/502/503/504/524) is received from this upstream + // (in seconds). 0 means use the default (60s). + TransientErrorCooldown int `yaml:"transient-error-cooldown,omitempty" json:"transient-error-cooldown,omitempty"` + // Cloak configures request cloaking for non-Claude-Code clients. Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"` diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 6f218f9653..e39dca2582 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -10,6 +10,7 @@ import ( "encoding/hex" "fmt" "io" + "net" "net/http" "net/textproto" "strings" @@ -192,6 +193,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r httpResp, err := httpClient.Do(httpReq) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + return resp, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)} + } return resp, err } helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) @@ -360,6 +364,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A httpResp, err := httpClient.Do(httpReq) if err != nil { helps.RecordAPIResponseError(ctx, e.cfg, err) + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + return nil, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)} + } return nil, err } helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) diff --git a/internal/runtime/executor/helps/proxy_helpers.go b/internal/runtime/executor/helps/proxy_helpers.go index 022bc65c17..731798c059 100644 --- a/internal/runtime/executor/helps/proxy_helpers.go +++ b/internal/runtime/executor/helps/proxy_helpers.go @@ -3,6 +3,7 @@ package helps import ( "context" "net/http" + "strconv" "strings" "time" @@ -31,6 +32,8 @@ func NewProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip httpClient.Timeout = timeout } + respHeaderTimeout := responseHeaderTimeoutFromAuth(auth) + // Priority 1: Use auth.ProxyURL if configured var proxyURL string if auth != nil { @@ -46,7 +49,7 @@ func NewProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip if proxyURL != "" { transport := buildProxyTransport(proxyURL) if transport != nil { - httpClient.Transport = transport + httpClient.Transport = applyResponseHeaderTimeout(transport, respHeaderTimeout) return httpClient } // If proxy setup failed, log and fall through to context RoundTripper @@ -58,9 +61,39 @@ func NewProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip httpClient.Transport = rt } + httpClient.Transport = applyResponseHeaderTimeout(httpClient.Transport, respHeaderTimeout) + return httpClient } +func responseHeaderTimeoutFromAuth(auth *cliproxyauth.Auth) time.Duration { + if auth == nil || auth.Attributes == nil { + return 0 + } + secs, err := strconv.Atoi(auth.Attributes["response_header_timeout"]) + if err != nil || secs <= 0 { + return 0 + } + return time.Duration(secs) * time.Second +} + +func applyResponseHeaderTimeout(rt http.RoundTripper, timeout time.Duration) http.RoundTripper { + if timeout <= 0 { + return rt + } + if transport, ok := rt.(*http.Transport); ok { + clonedTransport := transport.Clone() + clonedTransport.ResponseHeaderTimeout = timeout + return clonedTransport + } + if rt == nil { + transport := http.DefaultTransport.(*http.Transport).Clone() + transport.ResponseHeaderTimeout = timeout + return transport + } + return rt +} + // buildProxyTransport creates an HTTP transport configured for the given proxy URL. // It supports SOCKS5, HTTP, and HTTPS proxy protocols. // diff --git a/internal/runtime/executor/helps/utls_client.go b/internal/runtime/executor/helps/utls_client.go index 39512a58de..24cbbff058 100644 --- a/internal/runtime/executor/helps/utls_client.go +++ b/internal/runtime/executor/helps/utls_client.go @@ -174,6 +174,7 @@ func NewUtlsHTTPClient(cfg *config.Config, auth *cliproxyauth.Auth, timeout time standardTransport = transport } } + standardTransport = applyResponseHeaderTimeout(standardTransport, responseHeaderTimeoutFromAuth(auth)) client := &http.Client{ Transport: &fallbackRoundTripper{ diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go index 52ae9a4808..ad36edf724 100644 --- a/internal/watcher/synthesizer/config.go +++ b/internal/watcher/synthesizer/config.go @@ -116,6 +116,12 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea if hash := diff.ComputeClaudeModelsHash(ck.Models); hash != "" { attrs["models_hash"] = hash } + if ck.ResponseHeaderTimeout > 0 { + attrs["response_header_timeout"] = strconv.Itoa(ck.ResponseHeaderTimeout) + } + if ck.TransientErrorCooldown > 0 { + attrs["transient_error_cooldown"] = strconv.Itoa(ck.TransientErrorCooldown) + } addConfigHeadersToAttrs(ck.Headers, attrs) proxyURL := strings.TrimSpace(ck.ProxyURL) a := &coreauth.Auth{ diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 478c7921ff..00a8f831a3 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1894,11 +1894,19 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) { suspendReason = "quota" shouldSuspendModel = true setModelQuota = true - case 408, 500, 502, 503, 504: + case 408, 500, 502, 503, 504, 524: if quotaCooldownDisabledForAuth(auth) { state.NextRetryAfter = time.Time{} } else { - next := now.Add(1 * time.Minute) + cooldown := 1 * time.Minute + if auth.Attributes != nil { + if v, ok := auth.Attributes["transient_error_cooldown"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + cooldown = time.Duration(secs) * time.Second + } + } + } + next := now.Add(cooldown) state.NextRetryAfter = next } default: @@ -2247,12 +2255,20 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati } auth.Quota.NextRecoverAt = next auth.NextRetryAfter = next - case 408, 500, 502, 503, 504: + case 408, 500, 502, 503, 504, 524: auth.StatusMessage = "transient upstream error" if quotaCooldownDisabledForAuth(auth) { auth.NextRetryAfter = time.Time{} } else { - auth.NextRetryAfter = now.Add(1 * time.Minute) + cooldown := 1 * time.Minute + if auth.Attributes != nil { + if v, ok := auth.Attributes["transient_error_cooldown"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + cooldown = time.Duration(secs) * time.Second + } + } + } + auth.NextRetryAfter = now.Add(cooldown) } default: if auth.StatusMessage == "" {