From 59614fd1503df6094fe35b3487e1b968dd189dba Mon Sep 17 00:00:00 2001 From: Gemini CLI Date: Fri, 20 Mar 2026 16:44:20 +0800 Subject: [PATCH 1/3] feat(gemini): implement intra-account model failover and usage reporting --- .../runtime/executor/gemini_cli_executor.go | 37 ++++++-- internal/runtime/executor/usage_helpers.go | 77 +++++++++------- internal/usage/logger_plugin.go | 87 ++++++++++++------- sdk/cliproxy/usage/manager.go | 20 +++-- 4 files changed, 144 insertions(+), 77 deletions(-) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 1be245b702..dcf9c1b47d 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -380,6 +380,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut return nil, err } + if attemptModel != baseModel { + reporter.setActualModel(attemptModel) + } + out := make(chan cliproxyexecutor.StreamChunk) go func(resp *http.Response, reqBody []byte, attemptModel string) { defer close(out) @@ -439,7 +443,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut } }(httpResp, append([]byte(nil), payload...), attemptModel) - return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil + headers := httpResp.Header.Clone() + if attemptModel != baseModel { + headers.Set("x-cliproxy-model-fallback", fmt.Sprintf("requested=%s,actual=%s", baseModel, attemptModel)) + } + return &cliproxyexecutor.StreamResult{Headers: headers, Chunks: out}, nil } if len(lastBody) > 0 { @@ -747,19 +755,36 @@ func applyGeminiCLIHeaders(r *http.Request, model string) { // cliPreviewFallbackOrder returns preview model candidates for a base model. func cliPreviewFallbackOrder(model string) []string { switch model { + case "gemini-2.0-pro-exp-02-05": + return []string{ + "gemini-2.0-flash", + "gemini-1.5-pro", + "gemini-1.5-flash", + } + case "gemini-2.0-flash": + return []string{ + "gemini-1.5-pro", + "gemini-1.5-flash", + } + case "gemini-1.5-pro": + return []string{ + "gemini-1.5-flash", + } case "gemini-2.5-pro": return []string{ - // "gemini-2.5-pro-preview-05-06", - // "gemini-2.5-pro-preview-06-05", + "gemini-2.0-pro-exp-02-05", + "gemini-2.0-flash", + "gemini-1.5-pro", } case "gemini-2.5-flash": return []string{ - // "gemini-2.5-flash-preview-04-17", - // "gemini-2.5-flash-preview-05-20", + "gemini-2.0-flash", + "gemini-1.5-pro", + "gemini-1.5-flash", } case "gemini-2.5-flash-lite": return []string{ - // "gemini-2.5-flash-lite-preview-06-17", + "gemini-1.5-flash", } default: return nil diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index 00f547df22..aeaba2d6e5 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -16,24 +16,28 @@ import ( ) type usageReporter struct { - provider string - model string - authID string - authIndex string - apiKey string - source string - requestedAt time.Time - once sync.Once + provider string + model string + requestedModel string + actualModel string + authID string + authIndex string + apiKey string + source string + requestedAt time.Time + once sync.Once } func newUsageReporter(ctx context.Context, provider, model string, auth *cliproxyauth.Auth) *usageReporter { apiKey := apiKeyFromContext(ctx) reporter := &usageReporter{ - provider: provider, - model: model, - requestedAt: time.Now(), - apiKey: apiKey, - source: resolveUsageSource(auth, apiKey), + provider: provider, + model: model, + requestedModel: model, + actualModel: model, + requestedAt: time.Now(), + apiKey: apiKey, + source: resolveUsageSource(auth, apiKey), } if auth != nil { reporter.authID = auth.ID @@ -42,6 +46,13 @@ func newUsageReporter(ctx context.Context, provider, model string, auth *cliprox return reporter } +func (r *usageReporter) setActualModel(model string) { + if r == nil { + return + } + r.actualModel = model +} + func (r *usageReporter) publish(ctx context.Context, detail usage.Detail) { r.publishWithOutcome(ctx, detail, false) } @@ -74,15 +85,17 @@ func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Det } r.once.Do(func() { usage.PublishRecord(ctx, usage.Record{ - Provider: r.provider, - Model: r.model, - Source: r.source, - APIKey: r.apiKey, - AuthID: r.authID, - AuthIndex: r.authIndex, - RequestedAt: r.requestedAt, - Failed: failed, - Detail: detail, + Provider: r.provider, + Model: r.model, + RequestedModel: r.requestedModel, + ActualModel: r.actualModel, + Source: r.source, + APIKey: r.apiKey, + AuthID: r.authID, + AuthIndex: r.authIndex, + RequestedAt: r.requestedAt, + Failed: failed, + Detail: detail, }) }) } @@ -97,15 +110,17 @@ func (r *usageReporter) ensurePublished(ctx context.Context) { } r.once.Do(func() { usage.PublishRecord(ctx, usage.Record{ - Provider: r.provider, - Model: r.model, - Source: r.source, - APIKey: r.apiKey, - AuthID: r.authID, - AuthIndex: r.authIndex, - RequestedAt: r.requestedAt, - Failed: false, - Detail: usage.Detail{}, + Provider: r.provider, + Model: r.model, + RequestedModel: r.requestedModel, + ActualModel: r.actualModel, + Source: r.source, + APIKey: r.apiKey, + AuthID: r.authID, + AuthIndex: r.authIndex, + RequestedAt: r.requestedAt, + Failed: false, + Detail: usage.Detail{}, }) }) } diff --git a/internal/usage/logger_plugin.go b/internal/usage/logger_plugin.go index e4371e8d39..8de3e9c0d4 100644 --- a/internal/usage/logger_plugin.go +++ b/internal/usage/logger_plugin.go @@ -60,10 +60,11 @@ func StatisticsEnabled() bool { return statisticsEnabled.Load() } type RequestStatistics struct { mu sync.RWMutex - totalRequests int64 - successCount int64 - failureCount int64 - totalTokens int64 + totalRequests int64 + successCount int64 + failureCount int64 + totalTokens int64 + totalFailovers int64 apis map[string]*apiStats @@ -75,9 +76,10 @@ type RequestStatistics struct { // apiStats holds aggregated metrics for a single API key. type apiStats struct { - TotalRequests int64 - TotalTokens int64 - Models map[string]*modelStats + TotalRequests int64 + TotalTokens int64 + TotalFailovers int64 + Models map[string]*modelStats } // modelStats holds aggregated metrics for a specific model within an API. @@ -89,11 +91,13 @@ type modelStats struct { // RequestDetail stores the timestamp and token usage for a single request. type RequestDetail struct { - Timestamp time.Time `json:"timestamp"` - Source string `json:"source"` - AuthIndex string `json:"auth_index"` - Tokens TokenStats `json:"tokens"` - Failed bool `json:"failed"` + Timestamp time.Time `json:"timestamp"` + RequestedModel string `json:"requested_model,omitempty"` + ActualModel string `json:"actual_model,omitempty"` + Source string `json:"source"` + AuthIndex string `json:"auth_index"` + Tokens TokenStats `json:"tokens"` + Failed bool `json:"failed"` } // TokenStats captures the token usage breakdown for a request. @@ -107,10 +111,11 @@ type TokenStats struct { // StatisticsSnapshot represents an immutable view of the aggregated metrics. type StatisticsSnapshot struct { - TotalRequests int64 `json:"total_requests"` - SuccessCount int64 `json:"success_count"` - FailureCount int64 `json:"failure_count"` - TotalTokens int64 `json:"total_tokens"` + TotalRequests int64 `json:"total_requests"` + SuccessCount int64 `json:"success_count"` + FailureCount int64 `json:"failure_count"` + TotalTokens int64 `json:"total_tokens"` + TotalFailovers int64 `json:"total_failovers"` APIs map[string]APISnapshot `json:"apis"` @@ -122,9 +127,10 @@ type StatisticsSnapshot struct { // APISnapshot summarises metrics for a single API key. type APISnapshot struct { - TotalRequests int64 `json:"total_requests"` - TotalTokens int64 `json:"total_tokens"` - Models map[string]ModelSnapshot `json:"models"` + TotalRequests int64 `json:"total_requests"` + TotalTokens int64 `json:"total_tokens"` + TotalFailovers int64 `json:"total_failovers"` + Models map[string]ModelSnapshot `json:"models"` } // ModelSnapshot summarises metrics for a specific model. @@ -180,6 +186,8 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) dayKey := timestamp.Format("2006-01-02") hourKey := timestamp.Hour() + failover := record.ActualModel != "" && record.RequestedModel != "" && record.ActualModel != record.RequestedModel + s.mu.Lock() defer s.mu.Unlock() @@ -190,6 +198,9 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) s.failureCount++ } s.totalTokens += totalTokens + if failover { + s.totalFailovers++ + } stats, ok := s.apis[statsKey] if !ok { @@ -197,12 +208,14 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) s.apis[statsKey] = stats } s.updateAPIStats(stats, modelName, RequestDetail{ - Timestamp: timestamp, - Source: record.Source, - AuthIndex: record.AuthIndex, - Tokens: detail, - Failed: failed, - }) + Timestamp: timestamp, + RequestedModel: record.RequestedModel, + ActualModel: record.ActualModel, + Source: record.Source, + AuthIndex: record.AuthIndex, + Tokens: detail, + Failed: failed, + }, failover) s.requestsByDay[dayKey]++ s.requestsByHour[hourKey]++ @@ -210,9 +223,12 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record) s.tokensByHour[hourKey] += totalTokens } -func (s *RequestStatistics) updateAPIStats(stats *apiStats, model string, detail RequestDetail) { +func (s *RequestStatistics) updateAPIStats(stats *apiStats, model string, detail RequestDetail, failover bool) { stats.TotalRequests++ stats.TotalTokens += detail.Tokens.TotalTokens + if failover { + stats.TotalFailovers++ + } modelStatsValue, ok := stats.Models[model] if !ok { modelStatsValue = &modelStats{} @@ -237,13 +253,15 @@ func (s *RequestStatistics) Snapshot() StatisticsSnapshot { result.SuccessCount = s.successCount result.FailureCount = s.failureCount result.TotalTokens = s.totalTokens + result.TotalFailovers = s.totalFailovers result.APIs = make(map[string]APISnapshot, len(s.apis)) for apiName, stats := range s.apis { apiSnapshot := APISnapshot{ - TotalRequests: stats.TotalRequests, - TotalTokens: stats.TotalTokens, - Models: make(map[string]ModelSnapshot, len(stats.Models)), + TotalRequests: stats.TotalRequests, + TotalTokens: stats.TotalTokens, + TotalFailovers: stats.TotalFailovers, + Models: make(map[string]ModelSnapshot, len(stats.Models)), } for modelName, modelStatsValue := range stats.Models { requestDetails := make([]RequestDetail, len(modelStatsValue.Details)) @@ -356,6 +374,8 @@ func (s *RequestStatistics) recordImported(apiName, modelName string, stats *api totalTokens = 0 } + failover := detail.ActualModel != "" && detail.RequestedModel != "" && detail.ActualModel != detail.RequestedModel + s.totalRequests++ if detail.Failed { s.failureCount++ @@ -363,8 +383,11 @@ func (s *RequestStatistics) recordImported(apiName, modelName string, stats *api s.successCount++ } s.totalTokens += totalTokens + if failover { + s.totalFailovers++ + } - s.updateAPIStats(stats, modelName, detail) + s.updateAPIStats(stats, modelName, detail, failover) dayKey := detail.Timestamp.Format("2006-01-02") hourKey := detail.Timestamp.Hour() @@ -379,9 +402,11 @@ func dedupKey(apiName, modelName string, detail RequestDetail) string { timestamp := detail.Timestamp.UTC().Format(time.RFC3339Nano) tokens := normaliseTokenStats(detail.Tokens) return fmt.Sprintf( - "%s|%s|%s|%s|%s|%t|%d|%d|%d|%d|%d", + "%s|%s|%s|%s|%s|%s|%s|%t|%d|%d|%d|%d|%d", apiName, modelName, + detail.RequestedModel, + detail.ActualModel, timestamp, detail.Source, detail.AuthIndex, diff --git a/sdk/cliproxy/usage/manager.go b/sdk/cliproxy/usage/manager.go index 58b0360761..565eb4b273 100644 --- a/sdk/cliproxy/usage/manager.go +++ b/sdk/cliproxy/usage/manager.go @@ -10,15 +10,17 @@ import ( // Record contains the usage statistics captured for a single provider request. type Record struct { - Provider string - Model string - APIKey string - AuthID string - AuthIndex string - Source string - RequestedAt time.Time - Failed bool - Detail Detail + Provider string + Model string + RequestedModel string + ActualModel string + APIKey string + AuthID string + AuthIndex string + Source string + RequestedAt time.Time + Failed bool + Detail Detail } // Detail holds the token usage breakdown. From 84eec3d694f652cce2f7f26d953e67320e8584bd Mon Sep 17 00:00:00 2001 From: Gemini CLI Date: Fri, 20 Mar 2026 16:48:23 +0800 Subject: [PATCH 2/3] chore: bump version to 6.9.0-alpha.1 --- cmd/server/main.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/server/main.go b/cmd/server/main.go index 3d9ee6cf99..0c6eb20e01 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -36,9 +36,9 @@ import ( ) var ( - Version = "dev" - Commit = "none" - BuildDate = "unknown" + Version = "6.9.0-alpha.1" + Commit = "Failover-MVP" + BuildDate = "2026-03-20T14:45:00Z" DefaultConfigPath = "" ) From dff3e67aeef90e2c4f71c61d6ef79196583f7cd8 Mon Sep 17 00:00:00 2001 From: Gemini CLI Date: Fri, 20 Mar 2026 18:34:00 +0800 Subject: [PATCH 3/3] fix(tui): allow localPassword-only management auth for standalone mode When no RemoteManagement.SecretKey and no MANAGEMENT_PASSWORD env var are set, the middleware previously rejected all requests before reaching the localPassword check. This caused TUI standalone mode to fail with "embedded server is not ready" after all 30 polling retries received 403. Fix adds `&& h.localPassword == ""` to the forbidden guard so that standalone TUI sessions (which rely solely on the auto-generated localPassword) can authenticate successfully. Co-Authored-By: Claude Sonnet 4.6 --- internal/api/handlers/management/handler.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/api/handlers/management/handler.go b/internal/api/handlers/management/handler.go index 45786b9d3e..9acb567c5f 100644 --- a/internal/api/handlers/management/handler.go +++ b/internal/api/handlers/management/handler.go @@ -202,8 +202,10 @@ func (h *Handler) Middleware() gin.HandlerFunc { h.attemptsMu.Unlock() } } - if secretHash == "" && envSecret == "" { - c.AbortWithStatusJSON(http.StatusForbidden, gin.H{"error": "remote management key not set"}) + // Require at least one management credential source overall. + // Local TUI mode provides h.localPassword for localhost-only access. + if secretHash == "" && envSecret == "" && h.localPassword == "" { + c.AbortWithStatusJSON(http.StatusForbidden, gin.H{"error": "management key not set"}) return }