diff --git a/internal/config/config.go b/internal/config/config.go index 5a6595f778..d7b10afec2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -339,6 +339,16 @@ type ClaudeKey struct { // ExcludedModels lists model IDs that should be excluded for this provider. ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` + // ResponseHeaderTimeout limits how long to wait for the upstream to start + // responding (in seconds). Once the first response byte arrives, this timeout + // no longer applies — streaming responses are not affected. 0 means no timeout. + ResponseHeaderTimeout int `yaml:"response-header-timeout,omitempty" json:"response-header-timeout,omitempty"` + + // TransientErrorCooldown overrides the default 1-minute cooldown applied when + // a transient error (408/500/502/503/504/524) is received from this upstream + // (in seconds). 0 means use the default (60s). + TransientErrorCooldown int `yaml:"transient-error-cooldown,omitempty" json:"transient-error-cooldown,omitempty"` + // Cloak configures request cloaking for non-Claude-Code clients. Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"` } diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 82b12a2f80..9baeec28cb 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -12,6 +12,7 @@ import ( "encoding/json" "fmt" "io" + "net" "net/http" "net/textproto" "runtime" @@ -183,6 +184,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + return resp, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)} + } return resp, err } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) @@ -346,6 +350,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + return nil, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)} + } return nil, err } recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) diff --git a/internal/runtime/executor/proxy_helpers.go b/internal/runtime/executor/proxy_helpers.go index ab0f626acc..7bc8b9575c 100644 --- a/internal/runtime/executor/proxy_helpers.go +++ b/internal/runtime/executor/proxy_helpers.go @@ -5,6 +5,7 @@ import ( "net" "net/http" "net/url" + "strconv" "strings" "time" @@ -33,6 +34,16 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip httpClient.Timeout = timeout } + // Read per-auth response header timeout from attributes. + var respHeaderTimeout time.Duration + if auth != nil && auth.Attributes != nil { + if v, ok := auth.Attributes["response_header_timeout"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + respHeaderTimeout = time.Duration(secs) * time.Second + } + } + } + // Priority 1: Use auth.ProxyURL if configured var proxyURL string if auth != nil { @@ -48,6 +59,9 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip if proxyURL != "" { transport := buildProxyTransport(proxyURL) if transport != nil { + if respHeaderTimeout > 0 { + transport.ResponseHeaderTimeout = respHeaderTimeout + } httpClient.Transport = transport return httpClient } @@ -60,6 +74,20 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip httpClient.Transport = rt } + // Apply ResponseHeaderTimeout if set. + if respHeaderTimeout > 0 { + if transport, ok := httpClient.Transport.(*http.Transport); ok { + // Clone the transport to avoid modifying a shared instance. + clonedTransport := transport.Clone() + clonedTransport.ResponseHeaderTimeout = respHeaderTimeout + httpClient.Transport = clonedTransport + } else if httpClient.Transport == nil { + transport := http.DefaultTransport.(*http.Transport).Clone() + transport.ResponseHeaderTimeout = respHeaderTimeout + httpClient.Transport = transport + } + } + return httpClient } diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go index 52ae9a4808..ad36edf724 100644 --- a/internal/watcher/synthesizer/config.go +++ b/internal/watcher/synthesizer/config.go @@ -116,6 +116,12 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea if hash := diff.ComputeClaudeModelsHash(ck.Models); hash != "" { attrs["models_hash"] = hash } + if ck.ResponseHeaderTimeout > 0 { + attrs["response_header_timeout"] = strconv.Itoa(ck.ResponseHeaderTimeout) + } + if ck.TransientErrorCooldown > 0 { + attrs["transient_error_cooldown"] = strconv.Itoa(ck.TransientErrorCooldown) + } addConfigHeadersToAttrs(ck.Headers, attrs) proxyURL := strings.TrimSpace(ck.ProxyURL) a := &coreauth.Auth{ diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index b29e04db8c..c087a9937b 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1665,11 +1665,19 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) { suspendReason = "quota" shouldSuspendModel = true setModelQuota = true - case 408, 500, 502, 503, 504: + case 408, 500, 502, 503, 504, 524: if quotaCooldownDisabledForAuth(auth) { state.NextRetryAfter = time.Time{} } else { - next := now.Add(1 * time.Minute) + cooldown := 1 * time.Minute + if auth.Attributes != nil { + if v, ok := auth.Attributes["transient_error_cooldown"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + cooldown = time.Duration(secs) * time.Second + } + } + } + next := now.Add(cooldown) state.NextRetryAfter = next } default: @@ -1942,12 +1950,20 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati } auth.Quota.NextRecoverAt = next auth.NextRetryAfter = next - case 408, 500, 502, 503, 504: + case 408, 500, 502, 503, 504, 524: auth.StatusMessage = "transient upstream error" if quotaCooldownDisabledForAuth(auth) { auth.NextRetryAfter = time.Time{} } else { - auth.NextRetryAfter = now.Add(1 * time.Minute) + cooldown := 1 * time.Minute + if auth.Attributes != nil { + if v, ok := auth.Attributes["transient_error_cooldown"]; ok { + if secs, err := strconv.Atoi(v); err == nil && secs > 0 { + cooldown = time.Duration(secs) * time.Second + } + } + } + auth.NextRetryAfter = now.Add(cooldown) } default: if auth.StatusMessage == "" {