diff --git a/cmd/server/main.go b/cmd/server/main.go index 3d9ee6cf99..0c6eb20e01 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -36,9 +36,9 @@ import ( ) var ( - Version = "dev" - Commit = "none" - BuildDate = "unknown" + Version = "6.9.0-alpha.1" + Commit = "Failover-MVP" + BuildDate = "2026-03-20T14:45:00Z" DefaultConfigPath = "" ) diff --git a/internal/api/handlers/management/handler.go b/internal/api/handlers/management/handler.go index 45786b9d3e..9acb567c5f 100644 --- a/internal/api/handlers/management/handler.go +++ b/internal/api/handlers/management/handler.go @@ -202,8 +202,10 @@ func (h *Handler) Middleware() gin.HandlerFunc { h.attemptsMu.Unlock() } } - if secretHash == "" && envSecret == "" { - c.AbortWithStatusJSON(http.StatusForbidden, gin.H{"error": "remote management key not set"}) + // Require at least one management credential source overall. + // Local TUI mode provides h.localPassword for localhost-only access. + if secretHash == "" && envSecret == "" && h.localPassword == "" { + c.AbortWithStatusJSON(http.StatusForbidden, gin.H{"error": "management key not set"}) return } diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 1be245b702..dcf9c1b47d 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -380,6 +380,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut return nil, err } + if attemptModel != baseModel { + reporter.setActualModel(attemptModel) + } + out := make(chan cliproxyexecutor.StreamChunk) go func(resp *http.Response, reqBody []byte, attemptModel string) { defer close(out) @@ -439,7 +443,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut } }(httpResp, append([]byte(nil), payload...), attemptModel) - return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil + headers := httpResp.Header.Clone() + if attemptModel != baseModel { + headers.Set("x-cliproxy-model-fallback", fmt.Sprintf("requested=%s,actual=%s", baseModel, attemptModel)) + } + return &cliproxyexecutor.StreamResult{Headers: headers, Chunks: out}, nil } if len(lastBody) > 0 { @@ -747,19 +755,36 @@ func applyGeminiCLIHeaders(r *http.Request, model string) { // cliPreviewFallbackOrder returns preview model candidates for a base model. func cliPreviewFallbackOrder(model string) []string { switch model { + case "gemini-2.0-pro-exp-02-05": + return []string{ + "gemini-2.0-flash", + "gemini-1.5-pro", + "gemini-1.5-flash", + } + case "gemini-2.0-flash": + return []string{ + "gemini-1.5-pro", + "gemini-1.5-flash", + } + case "gemini-1.5-pro": + return []string{ + "gemini-1.5-flash", + } case "gemini-2.5-pro": return []string{ - // "gemini-2.5-pro-preview-05-06", - // "gemini-2.5-pro-preview-06-05", + "gemini-2.0-pro-exp-02-05", + "gemini-2.0-flash", + "gemini-1.5-pro", } case "gemini-2.5-flash": return []string{ - // "gemini-2.5-flash-preview-04-17", - // "gemini-2.5-flash-preview-05-20", + "gemini-2.0-flash", + "gemini-1.5-pro", + "gemini-1.5-flash", } case "gemini-2.5-flash-lite": return []string{ - // "gemini-2.5-flash-lite-preview-06-17", + "gemini-1.5-flash", } default: return nil diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index 00f547df22..aeaba2d6e5 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -16,24 +16,28 @@ import ( ) type usageReporter struct { - provider string - model string - authID string - authIndex string - apiKey string - source string - requestedAt time.Time - once sync.Once + provider string + model string + requestedModel string + actualModel string + authID string + authIndex string + apiKey string + source string + requestedAt time.Time + once sync.Once } func newUsageReporter(ctx context.Context, provider, model string, auth *cliproxyauth.Auth) *usageReporter { apiKey := apiKeyFromContext(ctx) reporter := &usageReporter{ - provider: provider, - model: model, - requestedAt: time.Now(), - apiKey: apiKey, - source: resolveUsageSource(auth, apiKey), + provider: provider, + model: model, + requestedModel: model, + actualModel: model, + requestedAt: time.Now(), + apiKey: apiKey, + source: resolveUsageSource(auth, apiKey), } if auth != nil { reporter.authID = auth.ID @@ -42,6 +46,13 @@ func newUsageReporter(ctx context.Context, provider, model string, auth *cliprox return reporter } +func (r *usageReporter) setActualModel(model string) { + if r == nil { + return + } + r.actualModel = model +} + func (r *usageReporter) publish(ctx context.Context, detail usage.Detail) { r.publishWithOutcome(ctx, detail, false) } @@ -74,15 +85,17 @@ func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Det } r.once.Do(func() { usage.PublishRecord(ctx, usage.Record{ - Provider: r.provider, - Model: r.model, - Source: r.source, - APIKey: r.apiKey, - AuthID: r.authID, - AuthIndex: r.authIndex, - RequestedAt: r.requestedAt, - Failed: failed, - Detail: detail, + Provider: r.provider, + Model: r.model, + RequestedModel: r.requestedModel, + ActualModel: r.actualModel, + Source: r.source, + APIKey: r.apiKey, + AuthID: r.authID, + AuthIndex: r.authIndex, + RequestedAt: r.requestedAt, + Failed: failed, + Detail: detail, }) }) } @@ -97,15 +110,17 @@ func (r *usageReporter) ensurePublished(ctx context.Context) { } r.once.Do(func() { usage.PublishRecord(ctx, usage.Record{ - Provider: r.provider, - Model: r.model, - Source: r.source, - APIKey: r.apiKey, - AuthID: r.authID, - AuthIndex: r.authIndex, - RequestedAt: r.requestedAt, - Failed: false, - Detail: usage.Detail{}, + Provider: r.provider, + Model: r.model, + RequestedModel: r.requestedModel, + ActualModel: r.actualModel, + Source: r.source, + APIKey: r.apiKey, + AuthID: r.authID, + AuthIndex: r.authIndex, + RequestedAt: r.requestedAt, + Failed: false, + Detail: usage.Detail{}, }) }) } diff --git a/internal/usage/logger_plugin.go b/internal/usage/logger_plugin.go index e4371e8d39..8de3e9c0d4 100644 --- a/internal/usage/logger_plugin.go +++ b/internal/usage/logger_plugin.go @@ -60,10 +60,11 @@ func StatisticsEnabled() bool { return statisticsEnabled.Load() } type RequestStatistics struct { mu sync.RWMutex - totalRequests int64 - successCount int64 - failureCount int64 - totalTokens int64 + totalRequests int64 + successCount int64 + failureCount int64 + totalTokens int64 + totalFailovers int64 apis map[string]*apiStats @@ -75,9 +76,10 @@ type RequestStatistics struct { // apiStats holds aggregated metrics for a single API key. type apiStats struct { - TotalRequests int64 - TotalTokens int64 - Models map[string]*modelStats + TotalRequests int64 + TotalTokens int64 + TotalFailovers int64 + Models map[string]*modelStats } // modelStats holds aggregated metrics for a specific model within an API. @@ -89,11 +91,13 @@ type modelStats struct { // RequestDetail stores the timestamp and token usage for a single request. type RequestDetail struct { - Timestamp time.Time `json:"timestamp"` - Source string `json:"source"` - AuthIndex string `json:"auth_index"` - Tokens TokenStats `json:"tokens"` - Failed bool `json:"failed"` + Timestamp time.Time `json:"timestamp"` + RequestedModel string `json:"requested_model,omitempty"` + ActualModel string `json:"actual_model,omitempty"` + Source string `json:"source"` + AuthIndex string `json:"auth_index"` + Tokens TokenStats `json:"tokens"` + Failed bool `json:"failed"` } // TokenStats captures the token usage breakdown for a request. @@ -107,10 +111,11 @@ type TokenStats struct { // StatisticsSnapshot represents an immutable view of the aggregated metrics. type StatisticsSnapshot struct { - TotalRequests int64 `json:"total_requests"` - SuccessCount int64 `json:"success_count"` - FailureCount int64 `json:"failure_count"` - TotalTokens int64 `json:"total_tokens"` + TotalRequests int64 `json:"total_requests"` + SuccessCount int64 `json:"success_count"` + FailureCount int64 `json:"failure_count"` + TotalTokens int64 `json:"total_tokens"` + TotalFailovers int64 `json:"total_failovers"` APIs map[string]APISnapshot `json:"apis"` @@ -122,9 +127,10 @@ type StatisticsSnapshot struct { // APISnapshot summarises metrics for a single API key. type APISnapshot struct { - TotalRequests int64 `json:"total_requests"` - TotalTokens int64 `json:"total_tokens"` - Models map[string]ModelSnapshot `json:"models"` + TotalRequests int64 `json:"total_requests"` + TotalTokens int64 `json:"total_tokens"` + TotalFailovers int64 `json:"total_failovers"` + Models map[string]ModelSnapshot `json:"models"` } // ModelSnapshot summarises metrics for a specific model. @@ -180,6 +186,8 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) dayKey := timestamp.Format("2006-01-02") hourKey := timestamp.Hour() + failover := record.ActualModel != "" && record.RequestedModel != "" && record.ActualModel != record.RequestedModel + s.mu.Lock() defer s.mu.Unlock() @@ -190,6 +198,9 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) s.failureCount++ } s.totalTokens += totalTokens + if failover { + s.totalFailovers++ + } stats, ok := s.apis[statsKey] if !ok { @@ -197,12 +208,14 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) s.apis[statsKey] = stats } s.updateAPIStats(stats, modelName, RequestDetail{ - Timestamp: timestamp, - Source: record.Source, - AuthIndex: record.AuthIndex, - Tokens: detail, - Failed: failed, - }) + Timestamp: timestamp, + RequestedModel: record.RequestedModel, + ActualModel: record.ActualModel, + Source: record.Source, + AuthIndex: record.AuthIndex, + Tokens: detail, + Failed: failed, + }, failover) s.requestsByDay[dayKey]++ s.requestsByHour[hourKey]++ @@ -210,9 +223,12 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) s.tokensByHour[hourKey] += totalTokens } -func (s *RequestStatistics) updateAPIStats(stats *apiStats, model string, detail RequestDetail) { +func (s *RequestStatistics) updateAPIStats(stats *apiStats, model string, detail RequestDetail, failover bool) { stats.TotalRequests++ stats.TotalTokens += detail.Tokens.TotalTokens + if failover { + stats.TotalFailovers++ + } modelStatsValue, ok := stats.Models[model] if !ok { modelStatsValue = &modelStats{} @@ -237,13 +253,15 @@ func (s *RequestStatistics) Snapshot() StatisticsSnapshot { result.SuccessCount = s.successCount result.FailureCount = s.failureCount result.TotalTokens = s.totalTokens + result.TotalFailovers = s.totalFailovers result.APIs = make(map[string]APISnapshot, len(s.apis)) for apiName, stats := range s.apis { apiSnapshot := APISnapshot{ - TotalRequests: stats.TotalRequests, - TotalTokens: stats.TotalTokens, - Models: make(map[string]ModelSnapshot, len(stats.Models)), + TotalRequests: stats.TotalRequests, + TotalTokens: stats.TotalTokens, + TotalFailovers: stats.TotalFailovers, + Models: make(map[string]ModelSnapshot, len(stats.Models)), } for modelName, modelStatsValue := range stats.Models { requestDetails := make([]RequestDetail, len(modelStatsValue.Details)) @@ -356,6 +374,8 @@ func (s *RequestStatistics) recordImported(apiName, modelName string, stats *api totalTokens = 0 } + failover := detail.ActualModel != "" && detail.RequestedModel != "" && detail.ActualModel != detail.RequestedModel + s.totalRequests++ if detail.Failed { s.failureCount++ @@ -363,8 +383,11 @@ func (s *RequestStatistics) recordImported(apiName, modelName string, stats *api s.successCount++ } s.totalTokens += totalTokens + if failover { + s.totalFailovers++ + } - s.updateAPIStats(stats, modelName, detail) + s.updateAPIStats(stats, modelName, detail, failover) dayKey := detail.Timestamp.Format("2006-01-02") hourKey := detail.Timestamp.Hour() @@ -379,9 +402,11 @@ func dedupKey(apiName, modelName string, detail RequestDetail) string { timestamp := detail.Timestamp.UTC().Format(time.RFC3339Nano) tokens := normaliseTokenStats(detail.Tokens) return fmt.Sprintf( - "%s|%s|%s|%s|%s|%t|%d|%d|%d|%d|%d", + "%s|%s|%s|%s|%s|%s|%s|%t|%d|%d|%d|%d|%d", apiName, modelName, + detail.RequestedModel, + detail.ActualModel, timestamp, detail.Source, detail.AuthIndex, diff --git a/sdk/cliproxy/usage/manager.go b/sdk/cliproxy/usage/manager.go index 58b0360761..565eb4b273 100644 --- a/sdk/cliproxy/usage/manager.go +++ b/sdk/cliproxy/usage/manager.go @@ -10,15 +10,17 @@ import ( // Record contains the usage statistics captured for a single provider request. type Record struct { - Provider string - Model string - APIKey string - AuthID string - AuthIndex string - Source string - RequestedAt time.Time - Failed bool - Detail Detail + Provider string + Model string + RequestedModel string + ActualModel string + APIKey string + AuthID string + AuthIndex string + Source string + RequestedAt time.Time + Failed bool + Detail Detail } // Detail holds the token usage breakdown.