Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,16 @@ type ClaudeKey struct {
// ExcludedModels lists model IDs that should be excluded for this provider.
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`

// ResponseHeaderTimeout limits how long to wait for the upstream to start
// responding (in seconds). Once the first response byte arrives, this timeout
// no longer applies — streaming responses are not affected. 0 means no timeout.
ResponseHeaderTimeout int `yaml:"response-header-timeout,omitempty" json:"response-header-timeout,omitempty"`

// TransientErrorCooldown overrides the default 1-minute cooldown applied when
// a transient error (408/500/502/503/504/524) is received from this upstream
// (in seconds). 0 means use the default (60s).
TransientErrorCooldown int `yaml:"transient-error-cooldown,omitempty" json:"transient-error-cooldown,omitempty"`

// Cloak configures request cloaking for non-Claude-Code clients.
Cloak *CloakConfig `yaml:"cloak,omitempty" json:"cloak,omitempty"`
}
Expand Down
7 changes: 7 additions & 0 deletions internal/runtime/executor/claude_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/textproto"
"runtime"
Expand Down Expand Up @@ -183,6 +184,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
httpResp, err := httpClient.Do(httpReq)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
return resp, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)}
}
return resp, err
}
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
Expand Down Expand Up @@ -346,6 +350,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
httpResp, err := httpClient.Do(httpReq)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
return nil, statusErr{code: http.StatusGatewayTimeout, msg: fmt.Sprintf("upstream timeout: %v", err)}
}
return nil, err
}
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
Expand Down
28 changes: 28 additions & 0 deletions internal/runtime/executor/proxy_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"net"
"net/http"
"net/url"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -33,6 +34,16 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip
httpClient.Timeout = timeout
}

// Read per-auth response header timeout from attributes.
var respHeaderTimeout time.Duration
if auth != nil && auth.Attributes != nil {
if v, ok := auth.Attributes["response_header_timeout"]; ok {
if secs, err := strconv.Atoi(v); err == nil && secs > 0 {
respHeaderTimeout = time.Duration(secs) * time.Second
}
}
}

// Priority 1: Use auth.ProxyURL if configured
var proxyURL string
if auth != nil {
Expand All @@ -48,6 +59,9 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip
if proxyURL != "" {
transport := buildProxyTransport(proxyURL)
if transport != nil {
if respHeaderTimeout > 0 {
transport.ResponseHeaderTimeout = respHeaderTimeout
}
httpClient.Transport = transport
return httpClient
}
Expand All @@ -60,6 +74,20 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip
httpClient.Transport = rt
}

// Apply ResponseHeaderTimeout if set.
if respHeaderTimeout > 0 {
if transport, ok := httpClient.Transport.(*http.Transport); ok {
// Clone the transport to avoid modifying a shared instance.
clonedTransport := transport.Clone()
clonedTransport.ResponseHeaderTimeout = respHeaderTimeout
httpClient.Transport = clonedTransport
} else if httpClient.Transport == nil {
transport := http.DefaultTransport.(*http.Transport).Clone()
transport.ResponseHeaderTimeout = respHeaderTimeout
httpClient.Transport = transport
}
}

return httpClient
}

Expand Down
6 changes: 6 additions & 0 deletions internal/watcher/synthesizer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea
if hash := diff.ComputeClaudeModelsHash(ck.Models); hash != "" {
attrs["models_hash"] = hash
}
if ck.ResponseHeaderTimeout > 0 {
attrs["response_header_timeout"] = strconv.Itoa(ck.ResponseHeaderTimeout)
}
if ck.TransientErrorCooldown > 0 {
attrs["transient_error_cooldown"] = strconv.Itoa(ck.TransientErrorCooldown)
}
addConfigHeadersToAttrs(ck.Headers, attrs)
proxyURL := strings.TrimSpace(ck.ProxyURL)
a := &coreauth.Auth{
Expand Down
24 changes: 20 additions & 4 deletions sdk/cliproxy/auth/conductor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1665,11 +1665,19 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
case 408, 500, 502, 503, 504:
case 408, 500, 502, 503, 504, 524:
if quotaCooldownDisabledForAuth(auth) {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
cooldown := 1 * time.Minute
if auth.Attributes != nil {
if v, ok := auth.Attributes["transient_error_cooldown"]; ok {
if secs, err := strconv.Atoi(v); err == nil && secs > 0 {
cooldown = time.Duration(secs) * time.Second
}
}
}
next := now.Add(cooldown)
state.NextRetryAfter = next
}
default:
Expand Down Expand Up @@ -1942,12 +1950,20 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
}
auth.Quota.NextRecoverAt = next
auth.NextRetryAfter = next
case 408, 500, 502, 503, 504:
case 408, 500, 502, 503, 504, 524:
auth.StatusMessage = "transient upstream error"
if quotaCooldownDisabledForAuth(auth) {
auth.NextRetryAfter = time.Time{}
} else {
auth.NextRetryAfter = now.Add(1 * time.Minute)
cooldown := 1 * time.Minute
if auth.Attributes != nil {
if v, ok := auth.Attributes["transient_error_cooldown"]; ok {
if secs, err := strconv.Atoi(v); err == nil && secs > 0 {
cooldown = time.Duration(secs) * time.Second
}
}
}
auth.NextRetryAfter = now.Add(cooldown)
}
default:
if auth.StatusMessage == "" {
Expand Down
Loading