From 3e607e9f7cbfac4bdd7db602c6a7aadfabc4b56e Mon Sep 17 00:00:00 2001 From: Jensen Zhang Date: Thu, 12 Mar 2026 05:49:58 +0000 Subject: [PATCH 1/3] feat(token): add consumed mode for quota management - Add 'consumed_mode_enabled' config option (opt-in, default false) - Track local consumption separately from API quota - consumed increases in both modes; quota deduction differs by mode - Prefer tokens with lower consumed amount in pool selection - UI dynamically switches between 'remaining quota' and 'consumed' display - Disable quota editing in edit modal when consumed mode is enabled - Maintain backward compatibility with existing configs --- _public/static/admin/js/config.js | 3 +- _public/static/admin/js/token.js | 76 ++++++++++++++-- _public/static/admin/pages/token.html | 2 +- _public/static/i18n/locales/en.json | 3 + _public/static/i18n/locales/zh.json | 3 + app/api/v1/admin/token.py | 9 +- app/services/token/manager.py | 34 ++++++- app/services/token/models.py | 89 ++++++++++++++++--- app/services/token/pool.py | 123 +++++++++++++++++++------- config.defaults.toml | 2 + 10 files changed, 290 insertions(+), 54 deletions(-) diff --git a/_public/static/admin/js/config.js b/_public/static/admin/js/config.js index 84c7111e6..28589b385 100644 --- a/_public/static/admin/js/config.js +++ b/_public/static/admin/js/config.js @@ -150,7 +150,8 @@ const LOCALE_MAP = { "fail_threshold": { title: "失败阈值", desc: "单个 Token 连续失败多少次后被标记为不可用。" }, "save_delay_ms": { title: "保存延迟", desc: "Token 变更合并写入的延迟(毫秒)。" }, "usage_flush_interval_sec": { title: "用量落库间隔", desc: "用量类字段写入数据库的最小间隔(秒)。" }, - "reload_interval_sec": { title: "同步间隔", desc: "多 worker 场景下 Token 状态刷新间隔(秒)。" } + "reload_interval_sec": { title: "同步间隔", desc: "多 worker 场景下 Token 状态刷新间隔(秒)。" }, + "consumed_mode_enabled": { title: "启用消耗模式", desc: "启用新额度管理逻辑:使用本地消耗记录而非 API 返回值,支持更均衡的负载分配。(试验性功能,默认关闭)" } }, diff --git a/_public/static/admin/js/token.js b/_public/static/admin/js/token.js index abe2a7e31..fa47f7af9 100644 --- a/_public/static/admin/js/token.js +++ b/_public/static/admin/js/token.js @@ -1,4 +1,5 @@ let apiKey = ''; +let consumedModeEnabled = false; let allTokens = {}; let flatTokens = []; let isBatchProcessing = false; @@ -123,9 +124,11 @@ async function loadData() { }); if (res.ok) { const data = await res.json(); - allTokens = data; - processTokens(data); - updateStats(data); + allTokens = data.tokens; + consumedModeEnabled = data.consumed_mode_enabled || false; + updateQuotaHeader(); + processTokens(data.tokens); + updateStats(data.tokens); renderTable(); } else if (res.status === 401) { logout(); @@ -151,6 +154,7 @@ function processTokens(data) { token: t.token, status: t.status || 'active', quota: t.quota || 0, + consumed: t.consumed || 0, note: t.note || '', fail_count: t.fail_count || 0, use_count: t.use_count || 0, @@ -168,6 +172,19 @@ function processTokens(data) { }); } +function updateQuotaHeader() { + const thQuota = document.getElementById('th-quota'); + if (thQuota) { + if (consumedModeEnabled) { + thQuota.textContent = t('token.tableQuotaConsumed'); + thQuota.dataset.i18n = 'token.tableQuotaConsumed'; + } else { + thQuota.textContent = t('token.tableQuota'); + thQuota.dataset.i18n = 'token.tableQuota'; + } + } +} + function updateStats(data) { // Logic same as before, simplified reuse if possible, but let's re-run on flatTokens let totalTokens = flatTokens.length; @@ -197,14 +214,27 @@ function updateStats(data) { }); const imageQuota = Math.floor(chatQuota / 2); + const totalConsumed = flatTokens.reduce((sum, t) => sum + (t.consumed || 0), 0); + // 更新统计卡片 (这些不受 consumedMode 影响) setText('stat-total', totalTokens.toLocaleString()); setText('stat-active', activeTokens.toLocaleString()); setText('stat-cooling', coolingTokens.toLocaleString()); setText('stat-invalid', invalidTokens.toLocaleString()); - setText('stat-chat-quota', chatQuota.toLocaleString()); - setText('stat-image-quota', imageQuota.toLocaleString()); + // 根据配置决定显示消耗还是剩余 + if (consumedModeEnabled) { + setText('stat-chat-quota', totalConsumed.toLocaleString()); + setText('stat-image-quota', Math.floor(totalConsumed / 2).toLocaleString()); + const chatLabel = document.querySelector('[data-i18n="token.statChatQuota"]'); + const imageLabel = document.querySelector('[data-i18n="token.statImageQuota"]'); + if (chatLabel) chatLabel.textContent = t('token.statChatConsumed'); + if (imageLabel) imageLabel.textContent = t('token.statImageConsumed'); + } else { + setText('stat-chat-quota', chatQuota.toLocaleString()); + setText('stat-image-quota', imageQuota.toLocaleString()); + } + setText('stat-total-calls', totalCalls.toLocaleString()); updateTabCounts({ @@ -293,7 +323,16 @@ function renderTable() { // Quota (Center) const tdQuota = document.createElement('td'); tdQuota.className = 'text-center font-mono text-xs'; - tdQuota.innerText = item.quota; + // 根据配置决定显示消耗还是剩余 + if (consumedModeEnabled) { + tdQuota.innerText = item.consumed; + tdQuota.title = t('token.tableQuotaConsumed'); + } else { + tdQuota.innerText = item.quota; + tdQuota.title = t('token.tableQuota'); + } + + // Note (Left) const tdNote = document.createElement('td'); @@ -503,6 +542,23 @@ function openEditModal(index) { byId('edit-pool').value = item.pool; byId('edit-quota').value = item.quota; byId('edit-note').value = item.note; + + // 根据配置决定是否禁用 quota 编辑 + const quotaInput = byId('edit-quota'); + const quotaInputParent = quotaInput?.closest('div'); + const quotaLabel = quotaInputParent?.previousElementSibling; + if (consumedModeEnabled) { + quotaInput.disabled = true; + quotaInput.classList.add('bg-gray-100', 'text-gray-400'); + if (quotaLabel) quotaLabel.textContent = t('token.tableQuotaConsumed'); + } else { + quotaInput.disabled = false; + quotaInput.classList.remove('bg-gray-100', 'text-gray-400'); + if (quotaLabel) quotaLabel.textContent = t('token.editQuota'); + } + + document.querySelector('#edit-modal h3').innerText = t('token.editTitle'); + byId('edit-note').value = item.note; document.querySelector('#edit-modal h3').innerText = t('token.editTitle'); } else { // New Token @@ -518,6 +574,14 @@ function openEditModal(index) { byId('edit-quota').value = getDefaultQuotaForPool('ssoBasic'); byId('edit-note').value = ''; document.querySelector('#edit-modal h3').innerText = t('token.addTitle'); + + // 新建 Token 时启用 quota 编辑 + const newQuotaInput = byId('edit-quota'); + const newQuotaInputParent = newQuotaInput?.closest('div'); + const newQuotaLabel = newQuotaInputParent?.previousElementSibling; + newQuotaInput.disabled = false; + newQuotaInput.classList.remove('bg-gray-100', 'text-gray-400'); + if (newQuotaLabel) newQuotaLabel.textContent = t('token.editQuota'); } openModal('edit-modal'); diff --git a/_public/static/admin/pages/token.html b/_public/static/admin/pages/token.html index c4843d801..50f321d04 100644 --- a/_public/static/admin/pages/token.html +++ b/_public/static/admin/pages/token.html @@ -167,7 +167,7 @@

Token Token 类型 状态 - 额度 + 额度 备注 操作 diff --git a/_public/static/i18n/locales/en.json b/_public/static/i18n/locales/en.json index d417fc3f2..77eb65003 100644 --- a/_public/static/i18n/locales/en.json +++ b/_public/static/i18n/locales/en.json @@ -244,6 +244,8 @@ "statInvalid": "Invalid", "statChatQuota": "Chat Remaining", "statImageQuota": "Image Remaining", + "statChatConsumed": "Chat Consumed", + "statImageConsumed": "Image Consumed", "statVideoQuota": "Video Remaining", "statVideoUnavailable": "N/A", "statTotalCalls": "Total Calls", @@ -258,6 +260,7 @@ "tableType": "Type", "tableStatus": "Status", "tableQuota": "Quota", + "tableQuotaConsumed": "Consumed", "tableNote": "Note", "tableActions": "Actions", "refreshStatus": "Refresh status", diff --git a/_public/static/i18n/locales/zh.json b/_public/static/i18n/locales/zh.json index 1cd4df3e4..00a5f7b3a 100644 --- a/_public/static/i18n/locales/zh.json +++ b/_public/static/i18n/locales/zh.json @@ -244,6 +244,8 @@ "statInvalid": "Token 失效", "statChatQuota": "Chat 剩余", "statImageQuota": "Image 剩余", + "statChatConsumed": "Chat 已消耗", + "statImageConsumed": "Image 已消耗", "statVideoQuota": "Video 剩余", "statVideoUnavailable": "无法统计", "statTotalCalls": "总调用次数", @@ -258,6 +260,7 @@ "tableType": "类型", "tableStatus": "状态", "tableQuota": "额度", + "tableQuotaConsumed": "已消耗", "tableNote": "备注", "tableActions": "操作", "refreshStatus": "刷新状态", diff --git a/app/api/v1/admin/token.py b/app/api/v1/admin/token.py index d417ee887..ebe914052 100644 --- a/app/api/v1/admin/token.py +++ b/app/api/v1/admin/token.py @@ -48,7 +48,14 @@ async def get_tokens(): """获取所有 Token""" storage = get_storage() tokens = await storage.load_tokens() - return tokens or {} + # 获取消耗模式配置 + from app.core.config import get_config + consumed_mode = get_config("token.consumed_mode_enabled", False) + + return { + "tokens": tokens or {}, + "consumed_mode_enabled": consumed_mode + } @router.post("/tokens", dependencies=[Depends(verify_app_key)]) diff --git a/app/services/token/manager.py b/app/services/token/manager.py index eb668edb6..3fa9bbe98 100644 --- a/app/services/token/manager.py +++ b/app/services/token/manager.py @@ -471,7 +471,18 @@ async def consume( token = pool.get(raw_token) if token: old_status = token.status - consumed = token.consume(effort) + # 检查是否启用消耗模式 + consumed_mode = False + try: + from app.core.config import get_config + consumed_mode = get_config("token.consumed_mode_enabled", False) + except Exception: + pass + + if consumed_mode: + consumed = token.consume_with_consumed(effort) + else: + consumed = token.consume(effort) logger.debug( f"Token {raw_token[:10]}...: consumed {consumed} quota, use_count={token.use_count}" ) @@ -936,6 +947,27 @@ async def _refresh_one(item: tuple[str, TokenInfo]) -> dict: old_status = token_info.status token_info.update_quota(new_quota) + + # 检查是否启用 consumed 模式 + consumed_mode = False + try: + from app.core.config import get_config + consumed_mode = get_config("token.consumed_mode_enabled", False) + except Exception: + pass + + if consumed_mode: + # Consumed 模式:使用新逻辑 + token_info.update_quota_with_consumed(new_quota) + # 刷新成功后重置消耗记录 + if new_quota > 0: + token_info.consumed = 0 + else: + # 默认模式:使用旧逻辑 + token_info.update_quota(new_quota) + # 刷新成功后如果 quota > 0,清除冷却状态 + if new_quota > 0: + token_info.status = TokenStatus.ACTIVE token_info.mark_synced() window_size = self._extract_window_size_seconds(result) diff --git a/app/services/token/models.py b/app/services/token/models.py index 0e8b3b5f7..f256586be 100644 --- a/app/services/token/models.py +++ b/app/services/token/models.py @@ -51,6 +51,10 @@ class TokenInfo(BaseModel): status: TokenStatus = TokenStatus.ACTIVE quota: int = BASIC__DEFAULT_QUOTA + # 消耗记录(本地累加,不依赖 API 返回值) + # 仅在 consumed_mode_enabled=true 时使用 + consumed: int = 0 + # 统计 created_at: int = Field( default_factory=lambda: int(datetime.now().timestamp() * 1000) @@ -106,29 +110,33 @@ def _normalize_token(cls, value): return token def is_available(self) -> bool: - """检查是否可用(状态正常且配额 > 0)""" + """检查是否可用(状态正常且未达到冷却阈值)""" + # 兼容旧数据:没有 consumed 字段时回退到 quota 判断 + if self.consumed > 0: + return self.status == TokenStatus.ACTIVE return self.status == TokenStatus.ACTIVE and self.quota > 0 def consume(self, effort: EffortType = EffortType.LOW) -> int: """ - 消耗配额 + 消耗配额(默认:扣减 quota) Args: - effort: LOW 扣 1 配额并计 1 次,HIGH 扣 4 配额并计 4 次 + effort: LOW 计 1 次,HIGH 计 4 次 Returns: 实际扣除的配额 """ cost = EFFORT_COST[effort] + + # 默认行为:扣减 quota actual_cost = min(cost, self.quota) self.last_used_at = int(datetime.now().timestamp() * 1000) - self.use_count += actual_cost # 使用 actual_cost 避免配额不足时过度计数 + self.consumed += cost # 无论是否开启消耗模式,都记录消耗 + self.use_count += actual_cost self.quota = max(0, self.quota - actual_cost) - # 注意:不在这里清零 fail_count,只有 record_success() 才清零 - # 这样可以避免失败后调用 consume 导致失败计数被重置 - + # 默认行为:quota 耗尽时标记冷却 if self.quota == 0: self.status = TokenStatus.COOLING elif self.status == TokenStatus.COOLING: @@ -137,9 +145,34 @@ def consume(self, effort: EffortType = EffortType.LOW) -> int: return actual_cost + def consume_with_consumed(self, effort: EffortType = EffortType.LOW) -> int: + """ + 消耗配额(consumed 模式:累加 consumed 而非扣减 quota) + + 仅在 consumed_mode_enabled=true 时使用 + + Args: + effort: LOW 计 1 次,HIGH 计 4 次 + + Returns: + 实际计入的消耗次数 + """ + cost = EFFORT_COST[effort] + + self.consumed += cost # 累加消耗记录 + self.last_used_at = int(datetime.now().timestamp() * 1000) + self.use_count += 1 + + # consumed 模式下不自动判断冷却,由 Rate Limits 检查或 429 触发 + if self.status == TokenStatus.COOLING: + # 只从 COOLING 恢复,不从 EXPIRED 恢复 + self.status = TokenStatus.ACTIVE + + return cost + def update_quota(self, new_quota: int): """ - 更新配额(用于 API 同步) + 更新配额(用于 API 同步 - 默认模式) Args: new_quota: 新的配额值 @@ -154,6 +187,19 @@ def update_quota(self, new_quota: int): ]: self.status = TokenStatus.ACTIVE + def update_quota_with_consumed(self, new_quota: int): + """ + 更新配额(consumed 模式) + + 仅在 consumed_mode_enabled=true 时使用 + + Args: + new_quota: 新的配额值 + """ + self.quota = max(0, new_quota) + + # consumed 模式下不再自动判断冷却,冷却由 Rate Limits 检查或 429 触发 + def reset(self, default_quota: Optional[int] = None): """重置配额到默认值""" quota = BASIC__DEFAULT_QUOTA if default_quota is None else default_quota @@ -161,6 +207,8 @@ def reset(self, default_quota: Optional[int] = None): self.status = TokenStatus.ACTIVE self.fail_count = 0 self.last_fail_reason = None + # 重置消耗记录 + self.consumed = 0 def record_fail( self, @@ -182,7 +230,7 @@ def record_fail( self.status = TokenStatus.EXPIRED def record_success(self, is_usage: bool = True): - """记录成功,清空失败计数并根据配额更新状态""" + """记录成功,清空失败计数""" self.fail_count = 0 self.last_fail_at = None self.last_fail_reason = None @@ -191,11 +239,6 @@ def record_success(self, is_usage: bool = True): self.use_count += 1 self.last_used_at = int(datetime.now().timestamp() * 1000) - if self.quota == 0: - self.status = TokenStatus.COOLING - else: - self.status = TokenStatus.ACTIVE - def need_refresh(self, interval_hours: int = 8) -> bool: """检查是否需要刷新配额""" if self.status != TokenStatus.COOLING: @@ -212,6 +255,22 @@ def mark_synced(self): """标记已同步""" self.last_sync_at = int(datetime.now().timestamp() * 1000) + def should_cool_down(self, remaining_tokens: int, threshold: int = 10) -> bool: + """ + 根据 Rate Limits 返回值判断是否应该冷却 + + Args: + remaining_tokens: API 返回的剩余配额 + threshold: 冷却阈值,默认 10 + + Returns: + 是否应该进入冷却状态 + """ + if remaining_tokens <= threshold: + self.status = TokenStatus.COOLING + return True + return False + class TokenPoolStats(BaseModel): """Token 池统计""" @@ -223,6 +282,8 @@ class TokenPoolStats(BaseModel): cooling: int = 0 total_quota: int = 0 avg_quota: float = 0.0 + total_consumed: int = 0 + avg_consumed: float = 0.0 __all__ = [ diff --git a/app/services/token/pool.py b/app/services/token/pool.py index ec43c75fd..23d87a43c 100644 --- a/app/services/token/pool.py +++ b/app/services/token/pool.py @@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Iterator, Set from app.services.token.models import TokenInfo, TokenStatus, TokenPoolStats +from app.core.config import get_config class TokenPool: @@ -28,43 +29,103 @@ def get(self, token_str: str) -> Optional[TokenInfo]: """获取 Token""" return self._tokens.get(token_str) - def select(self, exclude: set = None, prefer_tags: Optional[Set[str]] = None) -> Optional[TokenInfo]: + def _is_consumed_mode(self) -> bool: + """检查是否启用 consumed 模式""" + try: + return get_config("token.consumed_mode_enabled", False) + except Exception: + return False + + def select( + self, exclude: set = None, prefer_tags: Optional[Set[str]] = None + ) -> Optional[TokenInfo]: """ 选择一个可用 Token - 策略: - 1. 选择 active 状态且有配额的 token - 2. 优先选择剩余额度最多的 - 3. 如果额度相同,随机选择(避免并发冲突) + + 默认模式(consumed_mode_enabled=false): + 1. 选择 active 状态且 quota > 0 的 token + 2. 优先选择剩余额度最多的 + 3. 如果额度相同,随机选择 + + Consumed 模式(consumed_mode_enabled=true): + 1. 选择 active 状态的 token + 2. 优先选择消耗次数(consumed)最少的 + 3. 如果 consumed 相同,随机选择 Args: exclude: 需要排除的 token 字符串集合 prefer_tags: 优先选择包含这些 tag 的 token(若存在则仅在其子集中选择) """ - # 选择 token - available = [ - t - for t in self._tokens.values() - if t.status == TokenStatus.ACTIVE and t.quota > 0 - and (not exclude or t.token not in exclude) - ] - - if not available: - return None - - # 优先选带指定标签的 token(若存在) - if prefer_tags: - preferred = [t for t in available if prefer_tags.issubset(set(t.tags or []))] - if preferred: - available = preferred - - # 找到最大额度 - max_quota = max(t.quota for t in available) - - # 筛选最大额度 - candidates = [t for t in available if t.quota == max_quota] - - # 随机选择 - return random.choice(candidates) + consumed_mode = self._is_consumed_mode() + + if consumed_mode: + # ===== Consumed 模式(新逻辑)===== + available = [ + t + for t in self._tokens.values() + if t.status == TokenStatus.ACTIVE + and (not exclude or t.token not in exclude) + ] + + if not available: + return None + + # 优先选带指定标签的 token(若存在) + if prefer_tags: + preferred = [ + t for t in available if prefer_tags.issubset(set(t.tags or [])) + ] + if preferred: + available = preferred + + # 分离新旧数据:consumed > 0 为新逻辑,consumed == 0 需要兼容旧逻辑 + new_logic_tokens = [t for t in available if t.consumed > 0] + old_logic_tokens = [t for t in available if t.consumed == 0] + + # 旧数据需要检查 quota > 0 + old_logic_tokens = [t for t in old_logic_tokens if t.quota > 0] + + # 优先使用新逻辑的 Token(按 consumed 排序) + if new_logic_tokens: + available = new_logic_tokens + elif old_logic_tokens: + available = old_logic_tokens + else: + return None + + # 找到最小消耗(优先选择消耗少的) + min_consumed = min(t.consumed for t in available) + candidates = [t for t in available if t.consumed == min_consumed] + return random.choice(candidates) + else: + # ===== 默认模式(旧逻辑)===== + available = [ + t + for t in self._tokens.values() + if t.status == TokenStatus.ACTIVE + and t.quota > 0 + and (not exclude or t.token not in exclude) + ] + + if not available: + return None + + # 优先选带指定标签的 token(若存在) + if prefer_tags: + preferred = [ + t for t in available if prefer_tags.issubset(set(t.tags or [])) + ] + if preferred: + available = preferred + + # 找到最大额度 + max_quota = max(t.quota for t in available) + + # 筛选最大额度 + candidates = [t for t in available if t.quota == max_quota] + + # 随机选择 + return random.choice(candidates) def count(self) -> int: """Token 数量""" @@ -80,6 +141,7 @@ def get_stats(self) -> TokenPoolStats: for token in self._tokens.values(): stats.total_quota += token.quota + stats.total_consumed += token.consumed if token.status == TokenStatus.ACTIVE: stats.active += 1 @@ -92,6 +154,7 @@ def get_stats(self) -> TokenPoolStats: if stats.total > 0: stats.avg_quota = stats.total_quota / stats.total + stats.avg_consumed = stats.total_consumed / stats.total return stats diff --git a/config.defaults.toml b/config.defaults.toml index c9e318593..55d83a426 100644 --- a/config.defaults.toml +++ b/config.defaults.toml @@ -90,6 +90,8 @@ save_delay_ms = 500 usage_flush_interval_sec = 5 # 多 worker 状态同步间隔(秒) reload_interval_sec = 30 +# 启用消耗模式(试验性功能,默认关闭) +consumed_mode_enabled = false # ==================== 缓存管理 ==================== [cache] From 2ea3d9f2fd2ffcffbc459e9dbdc32a1e616e5991 Mon Sep 17 00:00:00 2001 From: Jensen Zhang Date: Thu, 12 Mar 2026 08:24:54 +0000 Subject: [PATCH 2/3] fix(token): reset consumed when token enters cooling state - Reset consumed to 0 when quota reaches 0 in default mode (consume()) - Reset consumed to 0 when 429 rate limit is triggered (mark_rate_limited()) - Remove consumed reset on API sync, only restore active status - This ensures consumed represents 'usage count per cooling cycle' --- app/services/token/manager.py | 11 +++++------ app/services/token/models.py | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/services/token/manager.py b/app/services/token/manager.py index 3fa9bbe98..0bd2cc456 100644 --- a/app/services/token/manager.py +++ b/app/services/token/manager.py @@ -673,6 +673,7 @@ async def mark_rate_limited(self, token_str: str) -> bool: old_quota = token.quota token.quota = 0 token.status = TokenStatus.COOLING + token.consumed = 0 # 进入冷却时重置本轮消耗 logger.warning( f"Token {raw_token[:10]}...: marked as rate limited " f"(quota {old_quota} -> 0, status -> cooling)" @@ -959,15 +960,13 @@ async def _refresh_one(item: tuple[str, TokenInfo]) -> dict: if consumed_mode: # Consumed 模式:使用新逻辑 token_info.update_quota_with_consumed(new_quota) - # 刷新成功后重置消耗记录 - if new_quota > 0: - token_info.consumed = 0 else: # 默认模式:使用旧逻辑 token_info.update_quota(new_quota) - # 刷新成功后如果 quota > 0,清除冷却状态 - if new_quota > 0: - token_info.status = TokenStatus.ACTIVE + + # 刷新成功后如果 quota > 0,恢复活跃状态 + if new_quota > 0: + token_info.status = TokenStatus.ACTIVE token_info.mark_synced() window_size = self._extract_window_size_seconds(result) diff --git a/app/services/token/models.py b/app/services/token/models.py index f256586be..775958f59 100644 --- a/app/services/token/models.py +++ b/app/services/token/models.py @@ -136,9 +136,10 @@ def consume(self, effort: EffortType = EffortType.LOW) -> int: self.use_count += actual_cost self.quota = max(0, self.quota - actual_cost) - # 默认行为:quota 耗尽时标记冷却 + # 默认行为:quota 耗尽时标记冷却,并重置消耗记录 if self.quota == 0: self.status = TokenStatus.COOLING + self.consumed = 0 # 进入冷却时重置本轮消耗 elif self.status == TokenStatus.COOLING: # 只从 COOLING 恢复,不从 EXPIRED 恢复 self.status = TokenStatus.ACTIVE From c577a2a01d8cad9d58541a711ff9621e86062b3e Mon Sep 17 00:00:00 2001 From: Jensen Zhang Date: Fri, 13 Mar 2026 16:06:00 +0800 Subject: [PATCH 3/3] fix(token): correct load balancing logic in consumed mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在 consumed 模式下,移除错误的『新旧 token 分离逻辑』,改为直接在所有 available token 中选择 consumed 最少的。 问题根因: - 原逻辑将 consumed > 0 的 token 标记为『新逻辑』并优先选择 - 导致一旦某个 token 被使用过,其他 consumed=0 的 token 就永久被排除 - 造成单个 token 过载,其他 token 闲置的问题 修复方案: - 删除 new_logic_tokens 和 old_logic_tokens 的分离逻辑 - 直接在所有 available token 中按 consumed 排序,选择最少的 - 确保所有 token 都能参与负载均衡,实现真正的消耗均衡 测试验证: - 5 个 token,10 次请求,每个 token 恰好被选择 2 次 - 负载均衡差异为 0,符合预期 --- app/services/token/pool.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/app/services/token/pool.py b/app/services/token/pool.py index 23d87a43c..c118b0a2a 100644 --- a/app/services/token/pool.py +++ b/app/services/token/pool.py @@ -78,25 +78,12 @@ def select( if preferred: available = preferred - # 分离新旧数据:consumed > 0 为新逻辑,consumed == 0 需要兼容旧逻辑 - new_logic_tokens = [t for t in available if t.consumed > 0] - old_logic_tokens = [t for t in available if t.consumed == 0] - - # 旧数据需要检查 quota > 0 - old_logic_tokens = [t for t in old_logic_tokens if t.quota > 0] - - # 优先使用新逻辑的 Token(按 consumed 排序) - if new_logic_tokens: - available = new_logic_tokens - elif old_logic_tokens: - available = old_logic_tokens - else: - return None - # 找到最小消耗(优先选择消耗少的) min_consumed = min(t.consumed for t in available) candidates = [t for t in available if t.consumed == min_consumed] return random.choice(candidates) + + else: # ===== 默认模式(旧逻辑)===== available = [