Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ import (
)

var (
Version = "dev"
Commit = "none"
BuildDate = "unknown"
Version = "6.9.0-alpha.1"
Commit = "Failover-MVP"
BuildDate = "2026-03-20T14:45:00Z"
DefaultConfigPath = ""
)

Expand Down
6 changes: 4 additions & 2 deletions internal/api/handlers/management/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,10 @@ func (h *Handler) Middleware() gin.HandlerFunc {
h.attemptsMu.Unlock()
}
}
if secretHash == "" && envSecret == "" {
c.AbortWithStatusJSON(http.StatusForbidden, gin.H{"error": "remote management key not set"})
// Require at least one management credential source overall.
// Local TUI mode provides h.localPassword for localhost-only access.
if secretHash == "" && envSecret == "" && h.localPassword == "" {
c.AbortWithStatusJSON(http.StatusForbidden, gin.H{"error": "management key not set"})
return
}

Expand Down
37 changes: 31 additions & 6 deletions internal/runtime/executor/gemini_cli_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
return nil, err
}

if attemptModel != baseModel {
reporter.setActualModel(attemptModel)
}
Comment on lines +383 to +385

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Report Gemini CLI failovers for non-stream responses too

This only updates actualModel for ExecuteStream. The non-stream Execute path uses the same 429 fallback loop but still publishes usage with RequestedModel == ActualModel and returns upstream headers unchanged, so regular chat/completions requests that fall back from gemini-2.5-* to 2.0/1.5 are invisible to both clients and the new total_failovers accounting. The reporting now depends on whether the caller set stream=true.

Useful? React with 👍 / 👎.


out := make(chan cliproxyexecutor.StreamChunk)
go func(resp *http.Response, reqBody []byte, attemptModel string) {
defer close(out)
Expand Down Expand Up @@ -439,7 +443,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
}
}(httpResp, append([]byte(nil), payload...), attemptModel)

return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
headers := httpResp.Header.Clone()
if attemptModel != baseModel {
headers.Set("x-cliproxy-model-fallback", fmt.Sprintf("requested=%s,actual=%s", baseModel, attemptModel))
}
return &cliproxyexecutor.StreamResult{Headers: headers, Chunks: out}, nil
}

if len(lastBody) > 0 {
Expand Down Expand Up @@ -747,19 +755,36 @@ func applyGeminiCLIHeaders(r *http.Request, model string) {
// cliPreviewFallbackOrder returns preview model candidates for a base model.
func cliPreviewFallbackOrder(model string) []string {
switch model {
case "gemini-2.0-pro-exp-02-05":
return []string{
"gemini-2.0-flash",
"gemini-1.5-pro",
"gemini-1.5-flash",
}
case "gemini-2.0-flash":
return []string{
"gemini-1.5-pro",
"gemini-1.5-flash",
}
case "gemini-1.5-pro":
return []string{
"gemini-1.5-flash",
}
case "gemini-2.5-pro":
return []string{
// "gemini-2.5-pro-preview-05-06",
// "gemini-2.5-pro-preview-06-05",
"gemini-2.0-pro-exp-02-05",
"gemini-2.0-flash",
"gemini-1.5-pro",
}
Comment on lines 774 to 778

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid enabling Gemini CLI fallbacks that CountTokens can't use

Adding extra candidates here makes GeminiCLIExecutor.CountTokens start retrying on 429, but that function still loops with for range models and builds every request from baseModel instead of the fallback entry. For throttled gemini-2.5-* count-tokens calls, this will now resend the same upstream request 3-4 times, increasing latency and quota pressure while still failing instead of ever trying gemini-2.0-*/1.5-*.

Useful? React with 👍 / 👎.

case "gemini-2.5-flash":
return []string{
// "gemini-2.5-flash-preview-04-17",
// "gemini-2.5-flash-preview-05-20",
"gemini-2.0-flash",
"gemini-1.5-pro",
"gemini-1.5-flash",
}
case "gemini-2.5-flash-lite":
return []string{
// "gemini-2.5-flash-lite-preview-06-17",
"gemini-1.5-flash",
}
default:
return nil
Expand Down
77 changes: 46 additions & 31 deletions internal/runtime/executor/usage_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,28 @@ import (
)

type usageReporter struct {
provider string
model string
authID string
authIndex string
apiKey string
source string
requestedAt time.Time
once sync.Once
provider string
model string
requestedModel string
actualModel string
authID string
authIndex string
apiKey string
source string
requestedAt time.Time
once sync.Once
}

func newUsageReporter(ctx context.Context, provider, model string, auth *cliproxyauth.Auth) *usageReporter {
apiKey := apiKeyFromContext(ctx)
reporter := &usageReporter{
provider: provider,
model: model,
requestedAt: time.Now(),
apiKey: apiKey,
source: resolveUsageSource(auth, apiKey),
provider: provider,
model: model,
requestedModel: model,
actualModel: model,
requestedAt: time.Now(),
apiKey: apiKey,
source: resolveUsageSource(auth, apiKey),
}
if auth != nil {
reporter.authID = auth.ID
Expand All @@ -42,6 +46,13 @@ func newUsageReporter(ctx context.Context, provider, model string, auth *cliprox
return reporter
}

func (r *usageReporter) setActualModel(model string) {
if r == nil {
return
}
r.actualModel = model
}

func (r *usageReporter) publish(ctx context.Context, detail usage.Detail) {
r.publishWithOutcome(ctx, detail, false)
}
Expand Down Expand Up @@ -74,15 +85,17 @@ func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
}
r.once.Do(func() {
usage.PublishRecord(ctx, usage.Record{
Provider: r.provider,
Model: r.model,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
RequestedAt: r.requestedAt,
Failed: failed,
Detail: detail,
Provider: r.provider,
Model: r.model,
RequestedModel: r.requestedModel,
ActualModel: r.actualModel,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
RequestedAt: r.requestedAt,
Failed: failed,
Detail: detail,
})
})
}
Expand All @@ -97,15 +110,17 @@ func (r *usageReporter) ensurePublished(ctx context.Context) {
}
r.once.Do(func() {
usage.PublishRecord(ctx, usage.Record{
Provider: r.provider,
Model: r.model,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
RequestedAt: r.requestedAt,
Failed: false,
Detail: usage.Detail{},
Provider: r.provider,
Model: r.model,
RequestedModel: r.requestedModel,
ActualModel: r.actualModel,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
RequestedAt: r.requestedAt,
Failed: false,
Detail: usage.Detail{},
})
})
}
Expand Down
87 changes: 56 additions & 31 deletions internal/usage/logger_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ func StatisticsEnabled() bool { return statisticsEnabled.Load() }
type RequestStatistics struct {
mu sync.RWMutex

totalRequests int64
successCount int64
failureCount int64
totalTokens int64
totalRequests int64
successCount int64
failureCount int64
totalTokens int64
totalFailovers int64

apis map[string]*apiStats

Expand All @@ -75,9 +76,10 @@ type RequestStatistics struct {

// apiStats holds aggregated metrics for a single API key.
type apiStats struct {
TotalRequests int64
TotalTokens int64
Models map[string]*modelStats
TotalRequests int64
TotalTokens int64
TotalFailovers int64
Models map[string]*modelStats
}

// modelStats holds aggregated metrics for a specific model within an API.
Expand All @@ -89,11 +91,13 @@ type modelStats struct {

// RequestDetail stores the timestamp and token usage for a single request.
type RequestDetail struct {
Timestamp time.Time `json:"timestamp"`
Source string `json:"source"`
AuthIndex string `json:"auth_index"`
Tokens TokenStats `json:"tokens"`
Failed bool `json:"failed"`
Timestamp time.Time `json:"timestamp"`
RequestedModel string `json:"requested_model,omitempty"`
ActualModel string `json:"actual_model,omitempty"`
Source string `json:"source"`
AuthIndex string `json:"auth_index"`
Tokens TokenStats `json:"tokens"`
Failed bool `json:"failed"`
}

// TokenStats captures the token usage breakdown for a request.
Expand All @@ -107,10 +111,11 @@ type TokenStats struct {

// StatisticsSnapshot represents an immutable view of the aggregated metrics.
type StatisticsSnapshot struct {
TotalRequests int64 `json:"total_requests"`
SuccessCount int64 `json:"success_count"`
FailureCount int64 `json:"failure_count"`
TotalTokens int64 `json:"total_tokens"`
TotalRequests int64 `json:"total_requests"`
SuccessCount int64 `json:"success_count"`
FailureCount int64 `json:"failure_count"`
TotalTokens int64 `json:"total_tokens"`
TotalFailovers int64 `json:"total_failovers"`

APIs map[string]APISnapshot `json:"apis"`

Expand All @@ -122,9 +127,10 @@ type StatisticsSnapshot struct {

// APISnapshot summarises metrics for a single API key.
type APISnapshot struct {
TotalRequests int64 `json:"total_requests"`
TotalTokens int64 `json:"total_tokens"`
Models map[string]ModelSnapshot `json:"models"`
TotalRequests int64 `json:"total_requests"`
TotalTokens int64 `json:"total_tokens"`
TotalFailovers int64 `json:"total_failovers"`
Models map[string]ModelSnapshot `json:"models"`
}

// ModelSnapshot summarises metrics for a specific model.
Expand Down Expand Up @@ -180,6 +186,8 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record)
dayKey := timestamp.Format("2006-01-02")
hourKey := timestamp.Hour()

failover := record.ActualModel != "" && record.RequestedModel != "" && record.ActualModel != record.RequestedModel

s.mu.Lock()
defer s.mu.Unlock()

Expand All @@ -190,29 +198,37 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record)
s.failureCount++
}
s.totalTokens += totalTokens
if failover {
s.totalFailovers++
}

stats, ok := s.apis[statsKey]
if !ok {
stats = &apiStats{Models: make(map[string]*modelStats)}
s.apis[statsKey] = stats
}
s.updateAPIStats(stats, modelName, RequestDetail{
Timestamp: timestamp,
Source: record.Source,
AuthIndex: record.AuthIndex,
Tokens: detail,
Failed: failed,
})
Timestamp: timestamp,
RequestedModel: record.RequestedModel,
ActualModel: record.ActualModel,
Source: record.Source,
AuthIndex: record.AuthIndex,
Tokens: detail,
Failed: failed,
}, failover)

s.requestsByDay[dayKey]++
s.requestsByHour[hourKey]++
s.tokensByDay[dayKey] += totalTokens
s.tokensByHour[hourKey] += totalTokens
}

func (s *RequestStatistics) updateAPIStats(stats *apiStats, model string, detail RequestDetail) {
func (s *RequestStatistics) updateAPIStats(stats *apiStats, model string, detail RequestDetail, failover bool) {
stats.TotalRequests++
stats.TotalTokens += detail.Tokens.TotalTokens
if failover {
stats.TotalFailovers++
}
modelStatsValue, ok := stats.Models[model]
if !ok {
modelStatsValue = &modelStats{}
Expand All @@ -237,13 +253,15 @@ func (s *RequestStatistics) Snapshot() StatisticsSnapshot {
result.SuccessCount = s.successCount
result.FailureCount = s.failureCount
result.TotalTokens = s.totalTokens
result.TotalFailovers = s.totalFailovers

result.APIs = make(map[string]APISnapshot, len(s.apis))
for apiName, stats := range s.apis {
apiSnapshot := APISnapshot{
TotalRequests: stats.TotalRequests,
TotalTokens: stats.TotalTokens,
Models: make(map[string]ModelSnapshot, len(stats.Models)),
TotalRequests: stats.TotalRequests,
TotalTokens: stats.TotalTokens,
TotalFailovers: stats.TotalFailovers,
Models: make(map[string]ModelSnapshot, len(stats.Models)),
}
for modelName, modelStatsValue := range stats.Models {
requestDetails := make([]RequestDetail, len(modelStatsValue.Details))
Expand Down Expand Up @@ -356,15 +374,20 @@ func (s *RequestStatistics) recordImported(apiName, modelName string, stats *api
totalTokens = 0
}

failover := detail.ActualModel != "" && detail.RequestedModel != "" && detail.ActualModel != detail.RequestedModel

s.totalRequests++
if detail.Failed {
s.failureCount++
} else {
s.successCount++
}
s.totalTokens += totalTokens
if failover {
s.totalFailovers++
}

s.updateAPIStats(stats, modelName, detail)
s.updateAPIStats(stats, modelName, detail, failover)

dayKey := detail.Timestamp.Format("2006-01-02")
hourKey := detail.Timestamp.Hour()
Expand All @@ -379,9 +402,11 @@ func dedupKey(apiName, modelName string, detail RequestDetail) string {
timestamp := detail.Timestamp.UTC().Format(time.RFC3339Nano)
tokens := normaliseTokenStats(detail.Tokens)
return fmt.Sprintf(
"%s|%s|%s|%s|%s|%t|%d|%d|%d|%d|%d",
"%s|%s|%s|%s|%s|%s|%s|%t|%d|%d|%d|%d|%d",
apiName,
modelName,
detail.RequestedModel,
detail.ActualModel,
timestamp,
detail.Source,
detail.AuthIndex,
Expand Down
Loading
Loading