diff --git a/_public/static/admin/js/config.js b/_public/static/admin/js/config.js
index 84c7111e..28589b38 100644
--- a/_public/static/admin/js/config.js
+++ b/_public/static/admin/js/config.js
@@ -150,7 +150,8 @@ const LOCALE_MAP = {
"fail_threshold": { title: "失败阈值", desc: "单个 Token 连续失败多少次后被标记为不可用。" },
"save_delay_ms": { title: "保存延迟", desc: "Token 变更合并写入的延迟(毫秒)。" },
"usage_flush_interval_sec": { title: "用量落库间隔", desc: "用量类字段写入数据库的最小间隔(秒)。" },
- "reload_interval_sec": { title: "同步间隔", desc: "多 worker 场景下 Token 状态刷新间隔(秒)。" }
+ "reload_interval_sec": { title: "同步间隔", desc: "多 worker 场景下 Token 状态刷新间隔(秒)。" },
+ "consumed_mode_enabled": { title: "启用消耗模式", desc: "启用新额度管理逻辑:使用本地消耗记录而非 API 返回值,支持更均衡的负载分配。(试验性功能,默认关闭)" }
},
diff --git a/_public/static/admin/js/token.js b/_public/static/admin/js/token.js
index abe2a7e3..fa47f7af 100644
--- a/_public/static/admin/js/token.js
+++ b/_public/static/admin/js/token.js
@@ -1,4 +1,5 @@
let apiKey = '';
+let consumedModeEnabled = false;
let allTokens = {};
let flatTokens = [];
let isBatchProcessing = false;
@@ -123,9 +124,11 @@ async function loadData() {
});
if (res.ok) {
const data = await res.json();
- allTokens = data;
- processTokens(data);
- updateStats(data);
+ allTokens = data.tokens;
+ consumedModeEnabled = data.consumed_mode_enabled || false;
+ updateQuotaHeader();
+ processTokens(data.tokens);
+ updateStats(data.tokens);
renderTable();
} else if (res.status === 401) {
logout();
@@ -151,6 +154,7 @@ function processTokens(data) {
token: t.token,
status: t.status || 'active',
quota: t.quota || 0,
+ consumed: t.consumed || 0,
note: t.note || '',
fail_count: t.fail_count || 0,
use_count: t.use_count || 0,
@@ -168,6 +172,19 @@ function processTokens(data) {
});
}
+function updateQuotaHeader() {
+ const thQuota = document.getElementById('th-quota');
+ if (thQuota) {
+ if (consumedModeEnabled) {
+ thQuota.textContent = t('token.tableQuotaConsumed');
+ thQuota.dataset.i18n = 'token.tableQuotaConsumed';
+ } else {
+ thQuota.textContent = t('token.tableQuota');
+ thQuota.dataset.i18n = 'token.tableQuota';
+ }
+ }
+}
+
function updateStats(data) {
// Logic same as before, simplified reuse if possible, but let's re-run on flatTokens
let totalTokens = flatTokens.length;
@@ -197,14 +214,27 @@ function updateStats(data) {
});
const imageQuota = Math.floor(chatQuota / 2);
+ const totalConsumed = flatTokens.reduce((sum, t) => sum + (t.consumed || 0), 0);
+ // 更新统计卡片 (这些不受 consumedMode 影响)
setText('stat-total', totalTokens.toLocaleString());
setText('stat-active', activeTokens.toLocaleString());
setText('stat-cooling', coolingTokens.toLocaleString());
setText('stat-invalid', invalidTokens.toLocaleString());
- setText('stat-chat-quota', chatQuota.toLocaleString());
- setText('stat-image-quota', imageQuota.toLocaleString());
+ // 根据配置决定显示消耗还是剩余
+ if (consumedModeEnabled) {
+ setText('stat-chat-quota', totalConsumed.toLocaleString());
+ setText('stat-image-quota', Math.floor(totalConsumed / 2).toLocaleString());
+ const chatLabel = document.querySelector('[data-i18n="token.statChatQuota"]');
+ const imageLabel = document.querySelector('[data-i18n="token.statImageQuota"]');
+ if (chatLabel) chatLabel.textContent = t('token.statChatConsumed');
+ if (imageLabel) imageLabel.textContent = t('token.statImageConsumed');
+ } else {
+ setText('stat-chat-quota', chatQuota.toLocaleString());
+ setText('stat-image-quota', imageQuota.toLocaleString());
+ }
+
setText('stat-total-calls', totalCalls.toLocaleString());
updateTabCounts({
@@ -293,7 +323,16 @@ function renderTable() {
// Quota (Center)
const tdQuota = document.createElement('td');
tdQuota.className = 'text-center font-mono text-xs';
- tdQuota.innerText = item.quota;
+ // 根据配置决定显示消耗还是剩余
+ if (consumedModeEnabled) {
+ tdQuota.innerText = item.consumed;
+ tdQuota.title = t('token.tableQuotaConsumed');
+ } else {
+ tdQuota.innerText = item.quota;
+ tdQuota.title = t('token.tableQuota');
+ }
+
+
// Note (Left)
const tdNote = document.createElement('td');
@@ -503,6 +542,23 @@ function openEditModal(index) {
byId('edit-pool').value = item.pool;
byId('edit-quota').value = item.quota;
byId('edit-note').value = item.note;
+
+ // 根据配置决定是否禁用 quota 编辑
+ const quotaInput = byId('edit-quota');
+ const quotaInputParent = quotaInput?.closest('div');
+ const quotaLabel = quotaInputParent?.previousElementSibling;
+ if (consumedModeEnabled) {
+ quotaInput.disabled = true;
+ quotaInput.classList.add('bg-gray-100', 'text-gray-400');
+ if (quotaLabel) quotaLabel.textContent = t('token.tableQuotaConsumed');
+ } else {
+ quotaInput.disabled = false;
+ quotaInput.classList.remove('bg-gray-100', 'text-gray-400');
+ if (quotaLabel) quotaLabel.textContent = t('token.editQuota');
+ }
+
+ document.querySelector('#edit-modal h3').innerText = t('token.editTitle');
+ byId('edit-note').value = item.note;
document.querySelector('#edit-modal h3').innerText = t('token.editTitle');
} else {
// New Token
@@ -518,6 +574,14 @@ function openEditModal(index) {
byId('edit-quota').value = getDefaultQuotaForPool('ssoBasic');
byId('edit-note').value = '';
document.querySelector('#edit-modal h3').innerText = t('token.addTitle');
+
+ // 新建 Token 时启用 quota 编辑
+ const newQuotaInput = byId('edit-quota');
+ const newQuotaInputParent = newQuotaInput?.closest('div');
+ const newQuotaLabel = newQuotaInputParent?.previousElementSibling;
+ newQuotaInput.disabled = false;
+ newQuotaInput.classList.remove('bg-gray-100', 'text-gray-400');
+ if (newQuotaLabel) newQuotaLabel.textContent = t('token.editQuota');
}
openModal('edit-modal');
diff --git a/_public/static/admin/pages/token.html b/_public/static/admin/pages/token.html
index c4843d80..50f321d0 100644
--- a/_public/static/admin/pages/token.html
+++ b/_public/static/admin/pages/token.html
@@ -167,7 +167,7 @@
Token
| Token |
类型 |
状态 |
- 额度 |
+ 额度 |
备注 |
操作 |
diff --git a/_public/static/i18n/locales/en.json b/_public/static/i18n/locales/en.json
index d417fc3f..77eb6500 100644
--- a/_public/static/i18n/locales/en.json
+++ b/_public/static/i18n/locales/en.json
@@ -244,6 +244,8 @@
"statInvalid": "Invalid",
"statChatQuota": "Chat Remaining",
"statImageQuota": "Image Remaining",
+ "statChatConsumed": "Chat Consumed",
+ "statImageConsumed": "Image Consumed",
"statVideoQuota": "Video Remaining",
"statVideoUnavailable": "N/A",
"statTotalCalls": "Total Calls",
@@ -258,6 +260,7 @@
"tableType": "Type",
"tableStatus": "Status",
"tableQuota": "Quota",
+ "tableQuotaConsumed": "Consumed",
"tableNote": "Note",
"tableActions": "Actions",
"refreshStatus": "Refresh status",
diff --git a/_public/static/i18n/locales/zh.json b/_public/static/i18n/locales/zh.json
index 1cd4df3e..00a5f7b3 100644
--- a/_public/static/i18n/locales/zh.json
+++ b/_public/static/i18n/locales/zh.json
@@ -244,6 +244,8 @@
"statInvalid": "Token 失效",
"statChatQuota": "Chat 剩余",
"statImageQuota": "Image 剩余",
+ "statChatConsumed": "Chat 已消耗",
+ "statImageConsumed": "Image 已消耗",
"statVideoQuota": "Video 剩余",
"statVideoUnavailable": "无法统计",
"statTotalCalls": "总调用次数",
@@ -258,6 +260,7 @@
"tableType": "类型",
"tableStatus": "状态",
"tableQuota": "额度",
+ "tableQuotaConsumed": "已消耗",
"tableNote": "备注",
"tableActions": "操作",
"refreshStatus": "刷新状态",
diff --git a/app/api/v1/admin/token.py b/app/api/v1/admin/token.py
index d417ee88..ebe91405 100644
--- a/app/api/v1/admin/token.py
+++ b/app/api/v1/admin/token.py
@@ -48,7 +48,14 @@ async def get_tokens():
"""获取所有 Token"""
storage = get_storage()
tokens = await storage.load_tokens()
- return tokens or {}
+ # 获取消耗模式配置
+ from app.core.config import get_config
+ consumed_mode = get_config("token.consumed_mode_enabled", False)
+
+ return {
+ "tokens": tokens or {},
+ "consumed_mode_enabled": consumed_mode
+ }
@router.post("/tokens", dependencies=[Depends(verify_app_key)])
diff --git a/app/services/token/manager.py b/app/services/token/manager.py
index eb668edb..0bd2cc45 100644
--- a/app/services/token/manager.py
+++ b/app/services/token/manager.py
@@ -471,7 +471,18 @@ async def consume(
token = pool.get(raw_token)
if token:
old_status = token.status
- consumed = token.consume(effort)
+ # 检查是否启用消耗模式
+ consumed_mode = False
+ try:
+ from app.core.config import get_config
+ consumed_mode = get_config("token.consumed_mode_enabled", False)
+ except Exception:
+ pass
+
+ if consumed_mode:
+ consumed = token.consume_with_consumed(effort)
+ else:
+ consumed = token.consume(effort)
logger.debug(
f"Token {raw_token[:10]}...: consumed {consumed} quota, use_count={token.use_count}"
)
@@ -662,6 +673,7 @@ async def mark_rate_limited(self, token_str: str) -> bool:
old_quota = token.quota
token.quota = 0
token.status = TokenStatus.COOLING
+ token.consumed = 0 # 进入冷却时重置本轮消耗
logger.warning(
f"Token {raw_token[:10]}...: marked as rate limited "
f"(quota {old_quota} -> 0, status -> cooling)"
@@ -936,6 +948,25 @@ async def _refresh_one(item: tuple[str, TokenInfo]) -> dict:
old_status = token_info.status
token_info.update_quota(new_quota)
+
+ # 检查是否启用 consumed 模式
+ consumed_mode = False
+ try:
+ from app.core.config import get_config
+ consumed_mode = get_config("token.consumed_mode_enabled", False)
+ except Exception:
+ pass
+
+ if consumed_mode:
+ # Consumed 模式:使用新逻辑
+ token_info.update_quota_with_consumed(new_quota)
+ else:
+ # 默认模式:使用旧逻辑
+ token_info.update_quota(new_quota)
+
+ # 刷新成功后如果 quota > 0,恢复活跃状态
+ if new_quota > 0:
+ token_info.status = TokenStatus.ACTIVE
token_info.mark_synced()
window_size = self._extract_window_size_seconds(result)
diff --git a/app/services/token/models.py b/app/services/token/models.py
index 0e8b3b5f..775958f5 100644
--- a/app/services/token/models.py
+++ b/app/services/token/models.py
@@ -51,6 +51,10 @@ class TokenInfo(BaseModel):
status: TokenStatus = TokenStatus.ACTIVE
quota: int = BASIC__DEFAULT_QUOTA
+ # 消耗记录(本地累加,不依赖 API 返回值)
+ # 仅在 consumed_mode_enabled=true 时使用
+ consumed: int = 0
+
# 统计
created_at: int = Field(
default_factory=lambda: int(datetime.now().timestamp() * 1000)
@@ -106,40 +110,70 @@ def _normalize_token(cls, value):
return token
def is_available(self) -> bool:
- """检查是否可用(状态正常且配额 > 0)"""
+ """检查是否可用(状态正常且未达到冷却阈值)"""
+ # 兼容旧数据:没有 consumed 字段时回退到 quota 判断
+ if self.consumed > 0:
+ return self.status == TokenStatus.ACTIVE
return self.status == TokenStatus.ACTIVE and self.quota > 0
def consume(self, effort: EffortType = EffortType.LOW) -> int:
"""
- 消耗配额
+ 消耗配额(默认:扣减 quota)
Args:
- effort: LOW 扣 1 配额并计 1 次,HIGH 扣 4 配额并计 4 次
+ effort: LOW 计 1 次,HIGH 计 4 次
Returns:
实际扣除的配额
"""
cost = EFFORT_COST[effort]
+
+ # 默认行为:扣减 quota
actual_cost = min(cost, self.quota)
self.last_used_at = int(datetime.now().timestamp() * 1000)
- self.use_count += actual_cost # 使用 actual_cost 避免配额不足时过度计数
+ self.consumed += cost # 无论是否开启消耗模式,都记录消耗
+ self.use_count += actual_cost
self.quota = max(0, self.quota - actual_cost)
- # 注意:不在这里清零 fail_count,只有 record_success() 才清零
- # 这样可以避免失败后调用 consume 导致失败计数被重置
-
+ # 默认行为:quota 耗尽时标记冷却,并重置消耗记录
if self.quota == 0:
self.status = TokenStatus.COOLING
+ self.consumed = 0 # 进入冷却时重置本轮消耗
elif self.status == TokenStatus.COOLING:
# 只从 COOLING 恢复,不从 EXPIRED 恢复
self.status = TokenStatus.ACTIVE
return actual_cost
+ def consume_with_consumed(self, effort: EffortType = EffortType.LOW) -> int:
+ """
+ 消耗配额(consumed 模式:累加 consumed 而非扣减 quota)
+
+ 仅在 consumed_mode_enabled=true 时使用
+
+ Args:
+ effort: LOW 计 1 次,HIGH 计 4 次
+
+ Returns:
+ 实际计入的消耗次数
+ """
+ cost = EFFORT_COST[effort]
+
+ self.consumed += cost # 累加消耗记录
+ self.last_used_at = int(datetime.now().timestamp() * 1000)
+ self.use_count += 1
+
+ # consumed 模式下不自动判断冷却,由 Rate Limits 检查或 429 触发
+ if self.status == TokenStatus.COOLING:
+ # 只从 COOLING 恢复,不从 EXPIRED 恢复
+ self.status = TokenStatus.ACTIVE
+
+ return cost
+
def update_quota(self, new_quota: int):
"""
- 更新配额(用于 API 同步)
+ 更新配额(用于 API 同步 - 默认模式)
Args:
new_quota: 新的配额值
@@ -154,6 +188,19 @@ def update_quota(self, new_quota: int):
]:
self.status = TokenStatus.ACTIVE
+ def update_quota_with_consumed(self, new_quota: int):
+ """
+ 更新配额(consumed 模式)
+
+ 仅在 consumed_mode_enabled=true 时使用
+
+ Args:
+ new_quota: 新的配额值
+ """
+ self.quota = max(0, new_quota)
+
+ # consumed 模式下不再自动判断冷却,冷却由 Rate Limits 检查或 429 触发
+
def reset(self, default_quota: Optional[int] = None):
"""重置配额到默认值"""
quota = BASIC__DEFAULT_QUOTA if default_quota is None else default_quota
@@ -161,6 +208,8 @@ def reset(self, default_quota: Optional[int] = None):
self.status = TokenStatus.ACTIVE
self.fail_count = 0
self.last_fail_reason = None
+ # 重置消耗记录
+ self.consumed = 0
def record_fail(
self,
@@ -182,7 +231,7 @@ def record_fail(
self.status = TokenStatus.EXPIRED
def record_success(self, is_usage: bool = True):
- """记录成功,清空失败计数并根据配额更新状态"""
+ """记录成功,清空失败计数"""
self.fail_count = 0
self.last_fail_at = None
self.last_fail_reason = None
@@ -191,11 +240,6 @@ def record_success(self, is_usage: bool = True):
self.use_count += 1
self.last_used_at = int(datetime.now().timestamp() * 1000)
- if self.quota == 0:
- self.status = TokenStatus.COOLING
- else:
- self.status = TokenStatus.ACTIVE
-
def need_refresh(self, interval_hours: int = 8) -> bool:
"""检查是否需要刷新配额"""
if self.status != TokenStatus.COOLING:
@@ -212,6 +256,22 @@ def mark_synced(self):
"""标记已同步"""
self.last_sync_at = int(datetime.now().timestamp() * 1000)
+ def should_cool_down(self, remaining_tokens: int, threshold: int = 10) -> bool:
+ """
+ 根据 Rate Limits 返回值判断是否应该冷却
+
+ Args:
+ remaining_tokens: API 返回的剩余配额
+ threshold: 冷却阈值,默认 10
+
+ Returns:
+ 是否应该进入冷却状态
+ """
+ if remaining_tokens <= threshold:
+ self.status = TokenStatus.COOLING
+ return True
+ return False
+
class TokenPoolStats(BaseModel):
"""Token 池统计"""
@@ -223,6 +283,8 @@ class TokenPoolStats(BaseModel):
cooling: int = 0
total_quota: int = 0
avg_quota: float = 0.0
+ total_consumed: int = 0
+ avg_consumed: float = 0.0
__all__ = [
diff --git a/app/services/token/pool.py b/app/services/token/pool.py
index ec43c75f..c118b0a2 100644
--- a/app/services/token/pool.py
+++ b/app/services/token/pool.py
@@ -4,6 +4,7 @@
from typing import Dict, List, Optional, Iterator, Set
from app.services.token.models import TokenInfo, TokenStatus, TokenPoolStats
+from app.core.config import get_config
class TokenPool:
@@ -28,43 +29,90 @@ def get(self, token_str: str) -> Optional[TokenInfo]:
"""获取 Token"""
return self._tokens.get(token_str)
- def select(self, exclude: set = None, prefer_tags: Optional[Set[str]] = None) -> Optional[TokenInfo]:
+ def _is_consumed_mode(self) -> bool:
+ """检查是否启用 consumed 模式"""
+ try:
+ return get_config("token.consumed_mode_enabled", False)
+ except Exception:
+ return False
+
+ def select(
+ self, exclude: set = None, prefer_tags: Optional[Set[str]] = None
+ ) -> Optional[TokenInfo]:
"""
选择一个可用 Token
- 策略:
- 1. 选择 active 状态且有配额的 token
- 2. 优先选择剩余额度最多的
- 3. 如果额度相同,随机选择(避免并发冲突)
+
+ 默认模式(consumed_mode_enabled=false):
+ 1. 选择 active 状态且 quota > 0 的 token
+ 2. 优先选择剩余额度最多的
+ 3. 如果额度相同,随机选择
+
+ Consumed 模式(consumed_mode_enabled=true):
+ 1. 选择 active 状态的 token
+ 2. 优先选择消耗次数(consumed)最少的
+ 3. 如果 consumed 相同,随机选择
Args:
exclude: 需要排除的 token 字符串集合
prefer_tags: 优先选择包含这些 tag 的 token(若存在则仅在其子集中选择)
"""
- # 选择 token
- available = [
- t
- for t in self._tokens.values()
- if t.status == TokenStatus.ACTIVE and t.quota > 0
- and (not exclude or t.token not in exclude)
- ]
-
- if not available:
- return None
-
- # 优先选带指定标签的 token(若存在)
- if prefer_tags:
- preferred = [t for t in available if prefer_tags.issubset(set(t.tags or []))]
- if preferred:
- available = preferred
-
- # 找到最大额度
- max_quota = max(t.quota for t in available)
-
- # 筛选最大额度
- candidates = [t for t in available if t.quota == max_quota]
-
- # 随机选择
- return random.choice(candidates)
+ consumed_mode = self._is_consumed_mode()
+
+ if consumed_mode:
+ # ===== Consumed 模式(新逻辑)=====
+ available = [
+ t
+ for t in self._tokens.values()
+ if t.status == TokenStatus.ACTIVE
+ and (not exclude or t.token not in exclude)
+ ]
+
+ if not available:
+ return None
+
+ # 优先选带指定标签的 token(若存在)
+ if prefer_tags:
+ preferred = [
+ t for t in available if prefer_tags.issubset(set(t.tags or []))
+ ]
+ if preferred:
+ available = preferred
+
+ # 找到最小消耗(优先选择消耗少的)
+ min_consumed = min(t.consumed for t in available)
+ candidates = [t for t in available if t.consumed == min_consumed]
+ return random.choice(candidates)
+
+
+ else:
+ # ===== 默认模式(旧逻辑)=====
+ available = [
+ t
+ for t in self._tokens.values()
+ if t.status == TokenStatus.ACTIVE
+ and t.quota > 0
+ and (not exclude or t.token not in exclude)
+ ]
+
+ if not available:
+ return None
+
+ # 优先选带指定标签的 token(若存在)
+ if prefer_tags:
+ preferred = [
+ t for t in available if prefer_tags.issubset(set(t.tags or []))
+ ]
+ if preferred:
+ available = preferred
+
+ # 找到最大额度
+ max_quota = max(t.quota for t in available)
+
+ # 筛选最大额度
+ candidates = [t for t in available if t.quota == max_quota]
+
+ # 随机选择
+ return random.choice(candidates)
def count(self) -> int:
"""Token 数量"""
@@ -80,6 +128,7 @@ def get_stats(self) -> TokenPoolStats:
for token in self._tokens.values():
stats.total_quota += token.quota
+ stats.total_consumed += token.consumed
if token.status == TokenStatus.ACTIVE:
stats.active += 1
@@ -92,6 +141,7 @@ def get_stats(self) -> TokenPoolStats:
if stats.total > 0:
stats.avg_quota = stats.total_quota / stats.total
+ stats.avg_consumed = stats.total_consumed / stats.total
return stats
diff --git a/config.defaults.toml b/config.defaults.toml
index c9e31859..55d83a42 100644
--- a/config.defaults.toml
+++ b/config.defaults.toml
@@ -90,6 +90,8 @@ save_delay_ms = 500
usage_flush_interval_sec = 5
# 多 worker 状态同步间隔(秒)
reload_interval_sec = 30
+# 启用消耗模式(试验性功能,默认关闭)
+consumed_mode_enabled = false
# ==================== 缓存管理 ====================
[cache]