diff --git a/.monkeycode/specs/db-batch-optimization/tasklist.md b/.monkeycode/specs/db-batch-optimization/tasklist.md new file mode 100644 index 00000000..c1eaaad6 --- /dev/null +++ b/.monkeycode/specs/db-batch-optimization/tasklist.md @@ -0,0 +1,51 @@ +# 数据库批量操作优化实施计划 + +- [x] 1. 实现 collectCurrentRuntimeStateSnapshot() 分页加载优化 + - [x] 1.1 创建分页查询工具函数 + - 在 backupService.ts 中实现 batchQueryAll(db, table, batchSize) 工具函数 + - 每次查询最多返回 batchSize 条记录,支持游标分页 + - [x] 1.2 重构 proxyLogs 分页加载 + - 将 `db.select().from(schema.proxyLogs).all()` 改为分页加载 + - 按 id 升序分页,每次最多 10 条,循环直到加载完全部数据 + - 使用累积变量收集所有 proxyLogs 数据 + - [x] 1.3 重构其他大表分页加载 + - 对 checkinLogs 表实现相同分页策略 + +- [x] 2. 实现 importAccountsSection() 分批插入优化 + - [x] 2.1 创建分批插入工具函数 + - 实现 batchInsertHelper(tx, table, records, batchSize) 工具函数 + - 每批最多插入 10 条记录,循环直到完成 + - [x] 2.2 重构 sites 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.3 重构 siteApiEndpoints 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.4 重构 accounts 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.5 重构 accountTokens 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.6 重构 tokenRoutes 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.7 重构 routeChannels 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.8 重构 routeGroupSources 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.9 重构 modelAvailability 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.10 重构 tokenModelAvailability 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.11 重构 siteDisabledModels 表分批插入 + - 将 for 循环插入改为 batchInsertHelper 分批执行 + - [x] 2.12 重构 siteAnnouncements 表分批插入 + - 将 for 循环改为批量收集记录后使用 batchInsertHelper 分批执行 + - [x] 2.13 重构 proxyLogs 表分批插入 + - 将 for 循环改为批量收集记录后使用 batchInsertHelper 分批执行 + - [x] 2.14 重构 checkinLogs 表分批插入 + - 将 for 循环改为批量收集记录后使用 batchInsertHelper 分批执行 + +- [x] 3. 检查点 - 确保代码编译通过 + - 运行 TypeScript 编译检查 + - 确保没有语法错误 + +- [x] 4. 编写单元测试 + - [x] 4.1 为 batchQueryAll 编写单元测试 + - [x] 4.2 为 batchInsertHelper 编写单元测试 diff --git a/.trae/specs/database-timeout-optimization/checklist.md b/.trae/specs/database-timeout-optimization/checklist.md new file mode 100644 index 00000000..d69fd435 --- /dev/null +++ b/.trae/specs/database-timeout-optimization/checklist.md @@ -0,0 +1,12 @@ +# Database and Gateway Timeout Optimization - Verification Checklist + +- [x] Database connection timeout configuration is set to at least 300 seconds +- [x] Large data import timeout settings are appropriate for long-running operations +- [x] Gateway service timeout settings match or exceed database operation timeouts +- [x] Complex database operations complete successfully without timeout errors +- [x] No timeout errors occur during large data import operations +- [x] All timeout settings are consistent across database, application, and gateway layers +- [x] Documentation is updated with database and gateway timeout configuration best practices +- [x] Test scenarios are created for large database operations +- [x] Database connection pool settings are reviewed and adjusted if necessary +- [x] Gateway service routing settings are reviewed and adjusted if necessary diff --git a/.trae/specs/database-timeout-optimization/spec.md b/.trae/specs/database-timeout-optimization/spec.md new file mode 100644 index 00000000..95da64e4 --- /dev/null +++ b/.trae/specs/database-timeout-optimization/spec.md @@ -0,0 +1,84 @@ +# Database and Gateway Timeout Optimization - Product Requirement Document + +## Overview +- **Summary**: This project aims to optimize timeout settings for database connections, large data imports, and gateway services to address timeout issues during database operations and large data processing. +- **Purpose**: To resolve timeout problems that occur during database operations, large data imports, and gateway service interactions by ensuring appropriate timeout settings across all relevant components. +- **Target Users**: System administrators, developers, and end users who interact with the application, especially those performing database operations and large data imports. + +## Goals +- Optimize database connection timeout settings to handle long-running database operations +- Configure timeout settings for large data import operations +- Review and adjust gateway service timeout settings +- Ensure consistency in timeout settings across database, application, and gateway layers +- Provide documentation for timeout configuration best practices + +## Non-Goals (Out of Scope) +- Modifying database schema or structure +- Adding new database features or capabilities +- Changing the core architecture of the application +- Optimizing database query performance (this is about timeout settings, not query optimization) + +## Background & Context +- The application experiences timeout issues during database operations and large data imports +- Database connection timeouts may occur during complex queries or large transactions +- Large data imports can exceed default timeout settings +- Gateway services may have timeout settings that don't align with database operation requirements +- The backup import operation has been optimized with batch processing, but still requires sufficient timeout settings + +## Functional Requirements +- **FR-1**: Update database connection timeout settings to handle long-running database operations +- **FR-2**: Configure timeout settings for large data import operations +- **FR-3**: Review and adjust gateway service timeout settings +- **FR-4**: Test timeout settings with large database operations + +## Non-Functional Requirements +- **NFR-1**: All timeout settings should be consistent across database, application, and gateway layers +- **NFR-2**: Timeout settings should be configurable through environment variables where possible +- **NFR-3**: Documentation should be updated to include timeout configuration best practices +- **NFR-4**: Performance should not be negatively impacted by increased timeout settings + +## Constraints +- **Technical**: Must maintain compatibility with existing database systems and gateway services +- **Business**: Changes should not introduce security vulnerabilities +- **Dependencies**: Must work with existing database and gateway configurations + +## Assumptions +- The application uses a relational database (SQLite, MySQL, or PostgreSQL) +- Large data imports involve importing significant amounts of data that may take longer than 30 seconds +- Gateway services are used to handle external requests to the application + +## Acceptance Criteria + +### AC-1: Database connection timeout configuration +- **Given**: The database connection is configured with appropriate timeout settings +- **When**: A long-running database operation is initiated +- **Then**: The operation completes successfully without database connection timeout errors +- **Verification**: `programmatic` +- **Notes**: Database connection timeout should be set to at least 300 seconds + +### AC-2: Large data import timeout configuration +- **Given**: Large data import operations are configured with appropriate timeout settings +- **When**: A large data import is initiated +- **Then**: The import completes successfully without timeout errors +- **Verification**: `programmatic` +- **Notes**: Import timeout should be set to handle datasets that take several minutes to process + +### AC-3: Gateway service timeout configuration +- **Given**: Gateway service timeout settings are reviewed and adjusted +- **When**: Gateway services handle requests that require database operations +- **Then**: The requests complete successfully without gateway service timeouts +- **Verification**: `programmatic` +- **Notes**: Gateway timeout should be set to match or exceed database operation timeouts + +### AC-4: Consistency across all layers +- **Given**: All timeout settings are configured consistently +- **When**: A complex operation involving database, application, and gateway layers is performed +- **Then**: The operation completes successfully without timeout errors at any layer +- **Verification**: `programmatic` +- **Notes**: Test with operations that take 60+ seconds to complete + +## Open Questions +- [ ] What is the current database connection timeout configuration? +- [ ] What is the current gateway service timeout configuration? +- [ ] Are there any reverse proxy or load balancer timeout settings that need to be considered? +- [ ] What is the maximum expected duration for large data import operations? diff --git a/.trae/specs/database-timeout-optimization/tasks.md b/.trae/specs/database-timeout-optimization/tasks.md new file mode 100644 index 00000000..39a112cf --- /dev/null +++ b/.trae/specs/database-timeout-optimization/tasks.md @@ -0,0 +1,67 @@ +# Database and Gateway Timeout Optimization - Implementation Plan + +## [x] Task 1: Review current database connection timeout configuration +- **Priority**: P0 +- **Depends On**: None +- **Description**: + - Review the current database connection timeout settings + - Identify database connection parameters related to timeouts + - Check if there are any database-specific timeout settings that need adjustment +- **Acceptance Criteria Addressed**: AC-1 +- **Test Requirements**: + - `programmatic` TR-1.1: Verify database connection timeout is set to at least 300 seconds + - `programmatic` TR-1.2: Test that long-running database operations complete without connection timeouts +- **Notes**: Check database configuration files and connection pool settings + +## [x] Task 2: Configure timeout settings for large data import operations +- **Priority**: P0 +- **Depends On**: Task 1 +- **Description**: + - Review current timeout settings for data import operations + - Adjust timeout settings for large data imports + - Ensure import operations have sufficient time to complete +- **Acceptance Criteria Addressed**: AC-2 +- **Test Requirements**: + - `programmatic` TR-2.1: Verify large data import timeout settings are appropriate + - `programmatic` TR-2.2: Test large data import with dataset that takes 60+ seconds +- **Notes**: Focus on backup import and other data import operations + +## [x] Task 3: Review and adjust gateway service timeout settings +- **Priority**: P0 +- **Depends On**: Task 1 +- **Description**: + - Review current gateway service timeout settings + - Adjust gateway service timeouts to match or exceed database operation timeouts + - Ensure gateway services can handle requests that require long-running database operations +- **Acceptance Criteria Addressed**: AC-3 +- **Test Requirements**: + - `programmatic` TR-3.1: Verify gateway service timeout settings are appropriate + - `programmatic` TR-3.2: Test gateway service with requests that require long-running database operations +- **Notes**: Check gateway configuration files and routing settings + +## [x] Task 4: Test timeout settings with large database operations +- **Priority**: P1 +- **Depends On**: Tasks 1, 2, 3 +- **Description**: + - Create test scenarios for large database operations + - Test database operations that take 60+ seconds to complete + - Verify that operations complete successfully without timeout errors +- **Acceptance Criteria Addressed**: AC-4 +- **Test Requirements**: + - `programmatic` TR-4.1: Test complex database operations with large datasets + - `programmatic` TR-4.2: Verify no timeout errors occur at any layer +- **Notes**: Use realistic test data to simulate actual usage + +## [x] Task 5: Update documentation for timeout configuration +- **Priority**: P2 +- **Depends On**: Tasks 1, 2, 3, 4 +- **Description**: + - Update project documentation to include database and gateway timeout configuration best practices + - Document database connection timeout settings + - Document gateway service timeout settings + - Provide guidance for configuring timeouts in different environments +- **Acceptance Criteria Addressed**: NFR-3 +- **Test Requirements**: + - `human-judgment` TR-5.1: Documentation is clear and comprehensive + - `human-judgment` TR-5.2: Documentation covers all timeout configuration levels +- **Notes**: Update README or deployment documentation diff --git a/.trae/specs/import-issue-analysis/checklist.md b/.trae/specs/import-issue-analysis/checklist.md new file mode 100644 index 00000000..ce0b3a82 --- /dev/null +++ b/.trae/specs/import-issue-analysis/checklist.md @@ -0,0 +1,33 @@ +# 导入功能问题分析 - 验证检查清单 + +## 代码修改检查 +- [x] 检查 `coerceAccountsSection` 函数是否已修改,优化验证逻辑 +- [x] 检查 `coercePreferencesSection` 函数是否已修改,优化验证逻辑 +- [x] 检查 `detectAccountsSection` 函数是否已修改,确保能够正确识别各种格式的账号数据 +- [x] 检查 `detectPreferencesSection` 函数是否已修改,确保能够正确识别各种格式的设置数据 +- [x] 检查 `importBackup` 函数是否添加了更详细的错误信息 +- [x] 检查修改后的代码是否保持向后兼容性 + +## 功能测试检查 +- [x] 测试有效的账号数据备份文件导入 +- [x] 测试有效的设置数据备份文件导入 +- [x] 测试完整备份文件的导入 +- [x] 测试部分数据缺失的备份文件导入 +- [x] 测试旧版本备份文件的导入 +- [x] 测试空数据备份文件的导入 +- [x] 测试格式错误的备份文件导入 + +## 错误处理检查 +- [x] 验证导入过程中出现错误时是否显示清晰的错误信息 +- [x] 验证错误信息是否能够帮助用户理解失败原因 +- [x] 验证导入功能是否能够正确处理各种边界情况和异常情况 + +## 性能和稳定性检查 +- [x] 验证导入功能的性能是否受到影响 +- [x] 验证导入功能的稳定性是否良好 +- [x] 验证导入功能是否能够处理大型备份文件 + +## 文档和注释检查 +- [x] 检查代码注释是否更新,反映修改后的逻辑 +- [x] 检查是否添加了必要的文档说明 +- [x] 检查是否更新了相关的测试用例 \ No newline at end of file diff --git a/.trae/specs/import-issue-analysis/spec.md b/.trae/specs/import-issue-analysis/spec.md new file mode 100644 index 00000000..43350a58 --- /dev/null +++ b/.trae/specs/import-issue-analysis/spec.md @@ -0,0 +1,73 @@ +# 导入功能问题分析 - 产品需求文档 + +## 概述 +- **摘要**:分析并修复导入功能提示"备份导入完成:账号 未导入,设置 未导入"的问题 +- **目的**:解决备份导入功能无法正确识别和处理备份文件的问题,确保账号和设置数据能够正确导入 +- **目标用户**:使用备份导入功能的系统用户 + +## 目标 +- 修复导入功能中账号数据检测和验证的问题 +- 修复导入功能中设置数据检测和验证的问题 +- 提高导入功能的容错性和用户体验 +- 确保导入功能能够正确处理各种格式的备份文件 + +## 非目标(范围外) +- 不修改导出功能的逻辑 +- 不改变备份文件的基本结构 +- 不添加新的导入功能特性 + +## 背景与上下文 +- 导入功能当前使用 `detectAccountsSection` 和 `detectPreferencesSection` 函数来检测备份数据中的账号和设置部分 +- 当这两个函数都返回 null 时,导入功能会提示"账号 未导入,设置 未导入" +- 问题可能出现在数据检测、验证或处理逻辑中 + +## 功能需求 +- **FR-1**:修改账号数据检测和验证逻辑,确保能够正确识别和处理各种格式的账号数据 +- **FR-2**:修改设置数据检测和验证逻辑,确保能够正确识别和处理各种格式的设置数据 +- **FR-3**:提供更清晰的错误信息,说明导入失败的具体原因 +- **FR-4**:确保导入功能能够正确处理不同版本的备份文件格式 + +## 非功能需求 +- **NFR-1**:导入功能的性能不应受到影响 +- **NFR-2**:修改后的导入功能应保持向后兼容性 +- **NFR-3**:导入过程中的错误处理应更加健壮 + +## 约束 +- **技术**:基于现有的TypeScript代码结构,不引入新的依赖 +- **业务**:保持导入功能的核心逻辑不变,只修改检测和验证部分 +- **依赖**:依赖现有的数据库操作和事务处理机制 + +## 假设 +- 备份文件的基本结构保持不变 +- 用户期望导入功能能够处理各种格式的备份文件 +- 导入功能的性能要求保持不变 + +## 验收标准 + +### AC-1:账号数据检测修复 +- **给定**:有效的账号数据备份文件 +- **当**:用户尝试导入该备份文件 +- **则**:导入功能应成功检测并导入账号数据 +- **验证**:`programmatic` + +### AC-2:设置数据检测修复 +- **给定**:有效的设置数据备份文件 +- **当**:用户尝试导入该备份文件 +- **则**:导入功能应成功检测并导入设置数据 +- **验证**:`programmatic` + +### AC-3:错误信息改进 +- **给定**:无效的备份文件 +- **当**:用户尝试导入该备份文件 +- **则**:用户应收到清晰、具体的错误信息,说明导入失败的具体原因 +- **验证**:`human-judgment` + +### AC-4:向后兼容性 +- **给定**:使用旧版本备份文件 +- **当**:用户尝试导入该备份文件 +- **则**:导入功能应成功处理该文件,保持与旧版本的兼容性 +- **验证**:`programmatic` + +## 未解决问题 +- [ ] 是否需要添加导入数据预览功能,让用户在导入前了解数据结构 +- [ ] 是否需要添加导入失败时的自动修复机制 \ No newline at end of file diff --git a/.trae/specs/import-issue-analysis/tasks.md b/.trae/specs/import-issue-analysis/tasks.md new file mode 100644 index 00000000..e4be76f6 --- /dev/null +++ b/.trae/specs/import-issue-analysis/tasks.md @@ -0,0 +1,70 @@ +# 导入功能问题分析 - 实施计划 + +## [x] 任务 1:分析导入功能问题的根本原因 +- **优先级**:P0 +- **依赖**:None +- **描述**: + - 分析 `detectAccountsSection` 函数的逻辑,找出无法正确检测账号数据的原因 + - 分析 `detectPreferencesSection` 函数的逻辑,找出无法正确检测设置数据的原因 + - 分析 `coerceAccountsSection` 和 `coercePreferencesSection` 函数的验证逻辑 +- **Acceptance Criteria Addressed**:AC-1, AC-2 +- **Test Requirements**: + - `programmatic` TR-1.1:测试不同格式的备份文件导入,验证检测逻辑 + - `programmatic` TR-1.2:测试边界情况,如空数据、不完整数据等 +- **Notes**:重点关注数据检测和验证逻辑,找出可能导致返回 null 的原因 + +## [x] 任务 2:修复账号数据检测和验证逻辑 +- **优先级**:P0 +- **依赖**:任务 1 +- **描述**: + - 修改 `coerceAccountsSection` 函数,优化验证逻辑,提高容错性 + - 修改 `detectAccountsSection` 函数,确保能够正确识别各种格式的账号数据 + - 确保函数能够处理不同版本的备份文件格式 +- **Acceptance Criteria Addressed**:AC-1, AC-4 +- **Test Requirements**: + - `programmatic` TR-2.1:测试有效的账号数据备份文件导入 + - `programmatic` TR-2.2:测试旧版本备份文件的导入 + - `programmatic` TR-2.3:测试部分数据缺失的备份文件导入 +- **Notes**:确保修改后的逻辑保持与现有代码的兼容性 + +## [x] 任务 3:修复设置数据检测和验证逻辑 +- **优先级**:P0 +- **依赖**:任务 1 +- **描述**: + - 修改 `coercePreferencesSection` 函数,优化验证逻辑,提高容错性 + - 修改 `detectPreferencesSection` 函数,确保能够正确识别各种格式的设置数据 + - 确保函数能够处理不同版本的备份文件格式 +- **Acceptance Criteria Addressed**:AC-2, AC-4 +- **Test Requirements**: + - `programmatic` TR-3.1:测试有效的设置数据备份文件导入 + - `programmatic` TR-3.2:测试旧版本备份文件的导入 + - `programmatic` TR-3.3:测试部分数据缺失的备份文件导入 +- **Notes**:确保修改后的逻辑能够处理各种边界情况 + +## [x] 任务 4:改进错误信息 +- **优先级**:P1 +- **依赖**:任务 2, 任务 3 +- **描述**: + - 在 `importBackup` 函数中添加更详细的错误信息 + - 确保错误信息能够清晰说明导入失败的具体原因 + - 为不同类型的错误提供不同的错误信息 +- **Acceptance Criteria Addressed**:AC-3 +- **Test Requirements**: + - `human-judgment` TR-4.1:验证错误信息是否清晰、具体 + - `human-judgment` TR-4.2:验证错误信息是否能够帮助用户理解失败原因 +- **Notes**:错误信息应简洁明了,避免技术术语 + +## [x] 任务 5:测试修复后的导入功能 +- **优先级**:P0 +- **依赖**:任务 2, 任务 3, 任务 4 +- **描述**: + - 测试各种格式的备份文件导入 + - 验证导入功能能够正确处理部分数据缺失的情况 + - 验证导入功能的性能和稳定性 +- **Acceptance Criteria Addressed**:AC-1, AC-2, AC-3, AC-4 +- **Test Requirements**: + - `programmatic` TR-5.1:测试完整备份文件的导入 + - `programmatic` TR-5.2:测试部分数据缺失的备份文件导入 + - `programmatic` TR-5.3:测试旧版本备份文件的导入 + - `human-judgment` TR-5.4:验证导入过程中的错误处理和用户反馈 +- **Notes**:测试应覆盖各种边界情况和异常情况 \ No newline at end of file diff --git a/.trae/specs/model-probe-optimization/checklist.md b/.trae/specs/model-probe-optimization/checklist.md new file mode 100644 index 00000000..b1fcae8d --- /dev/null +++ b/.trae/specs/model-probe-optimization/checklist.md @@ -0,0 +1,36 @@ +# 批量测活功能优化 - 验证检查清单 + +- [x] 提示词优化 + - [x] 提示词库包含多个不同的提示词 + - [x] 同一站点内使用不同的提示词 + - [x] 提示词自然合理 + +- [x] 站点级并发控制 + - [x] 同一站点的账号串行处理 + - [x] 不同站点的账号并行处理 + - [x] 站点级 Lease 机制正常工作 + +- [x] 速率限制和频率限制 + - [x] 速率限制为 TPM=1 + - [x] 5 分钟内不超过 5 个测活 + - [x] 限制机制正常工作 + +- [x] 环境变量配置 + - [x] 环境变量配置生效 + - [x] 默认值正常工作 + - [x] 配置参数类型正确 + +- [x] 测试和验证 + - [x] 所有单元测试通过 + - [x] 集成测试通过 + - [x] 功能验证通过 + +- [x] 性能和可靠性 + - [x] 多站点并发测活时性能不劣化 + - [x] 测活过程稳定,不影响其他系统功能 + - [x] 避免被中转站识别为异常行为 + +- [x] 代码质量 + - [x] 代码清晰,易于理解和维护 + - [x] 符合项目代码风格 + - [x] 适当的注释和文档 \ No newline at end of file diff --git a/.trae/specs/model-probe-optimization/spec.md b/.trae/specs/model-probe-optimization/spec.md new file mode 100644 index 00000000..f638ef01 --- /dev/null +++ b/.trae/specs/model-probe-optimization/spec.md @@ -0,0 +1,87 @@ +# 批量测活功能优化 - 产品需求文档 + +## Overview +- **Summary**: 优化批量测活功能,包括提示词测试优化、并发控制、速率限制和环境变量配置等功能 +- **Purpose**: 提高批量测活的可靠性和安全性,避免被中转站识别为异常行为,同时提升测试效率 +- **Target Users**: Metapi 系统管理员和用户 + +## Goals +- 优化提示词测试,使用真实且多样化的提示词 +- 实现站点级并发控制,避免单个站点并发测活 +- 支持多站点并发测活,提高效率 +- 实现速率限制(TPM=1),避免触发中转站限流 +- 支持环境变量自定义相关参数 +- 实现定时停止和限制每个站点 5 分钟内不超过 5 个测活 + +## Non-Goals (Out of Scope) +- 重构整个测活架构 +- 改变现有的数据库结构 +- 支持复杂的测活策略 + +## Background & Context +- 当前批量测活功能使用固定的提示词 "Reply with OK." +- 当前并发控制基于账号级 Lease 机制 +- 缺乏速率限制和站点级并发控制 +- 缺乏环境变量配置支持 + +## Functional Requirements +- **FR-1**: 提示词优化,使用真实且多样化的提示词 +- **FR-2**: 站点级并发控制,同一站点不同账号不可并发测活 +- **FR-3**: 支持多站点并发测活 +- **FR-4**: 速率限制,设置 TPM=1 +- **FR-5**: 环境变量配置支持,包括提示词自定义 +- **FR-6**: 定时停止和频率限制(5 分钟内不超过 5 个测活) + +## Non-Functional Requirements +- **NFR-1**: 性能 - 多站点并发测活时性能不劣化 +- **NFR-2**: 可靠性 - 测活过程稳定,不影响其他系统功能 +- **NFR-3**: 安全性 - 避免被中转站识别为异常行为 +- **NFR-4**: 可维护性 - 代码清晰,易于理解和维护 + +## Constraints +- **Technical**: 保持与现有架构的兼容性 +- **Business**: 最小化对现有功能的影响 +- **Dependencies**: 依赖现有的数据库结构和服务 + +## Assumptions +- 中转站对相同提示词的频繁请求可能会被识别为异常行为 +- 中转站对高并发请求可能会进行限流 +- 不同站点的测活可以并行进行 + +## Acceptance Criteria + +### AC-1: 提示词优化 +- **Given**: 系统配置了多样化的提示词 +- **When**: 执行批量测活时 +- **Then**: 系统使用不同的提示词进行测试,同一站点内提示词不重复 +- **Verification**: `human-judgment` + +### AC-2: 站点级并发控制 +- **Given**: 多个账号属于同一站点 +- **When**: 执行批量测活时 +- **Then**: 系统串行处理同一站点的账号,不同站点的账号可以并行处理 +- **Verification**: `programmatic` + +### AC-3: 速率限制 +- **Given**: 系统配置了 TPM=1 +- **When**: 执行批量测活时 +- **Then**: 系统限制每个站点的测活速率为每分钟 1 个模型 +- **Verification**: `programmatic` + +### AC-4: 环境变量配置 +- **Given**: 用户配置了相关环境变量 +- **When**: 系统启动时 +- **Then**: 系统使用环境变量中的配置值 +- **Verification**: `programmatic` + +### AC-5: 频率限制 +- **Given**: 系统配置了 5 分钟内不超过 5 个测活 +- **When**: 执行批量测活时 +- **Then**: 系统限制每个站点 5 分钟内不超过 5 个测活 +- **Verification**: `programmatic` + +## Open Questions +- [ ] 提示词库的具体实现方式 +- [ ] 站点级并发控制的具体实现细节 +- [ ] 速率限制和频率限制的具体实现方式 +- [ ] 环境变量配置的具体参数设计 \ No newline at end of file diff --git a/.trae/specs/model-probe-optimization/tasks.md b/.trae/specs/model-probe-optimization/tasks.md new file mode 100644 index 00000000..d39afd85 --- /dev/null +++ b/.trae/specs/model-probe-optimization/tasks.md @@ -0,0 +1,73 @@ +# 批量测活功能优化 - 实现计划 + +## [x] Task 1: 提示词优化实现 +- **Priority**: P0 +- **Depends On**: None +- **Description**: + - 创建多样化的提示词库 + - 实现提示词选择逻辑,确保同一站点内提示词不重复 + - 修改 `buildProbeBody` 函数使用随机提示词 +- **Acceptance Criteria Addressed**: AC-1 +- **Test Requirements**: + - `programmatic` TR-1.1: 验证提示词库包含多个不同的提示词 + - `programmatic` TR-1.2: 验证同一站点内使用不同的提示词 + - `human-judgment` TR-1.3: 验证提示词自然合理 +- **Notes**: 提示词应简洁、自然,避免触发安全检查 + +## [x] Task 2: 站点级并发控制实现 +- **Priority**: P0 +- **Depends On**: None +- **Description**: + - 创建站点级 Lease 机制 + - 修改 `executeModelAvailabilityProbe` 函数,按站点分组处理账号 + - 实现站点级并发控制,确保同一站点的账号串行处理 + - 实现 5 分钟内不超过 5 个测活的频率限制 +- **Acceptance Criteria Addressed**: AC-2, AC-3, AC-5 +- **Test Requirements**: + - `programmatic` TR-2.1: 验证同一站点的账号串行处理 + - `programmatic` TR-2.2: 验证不同站点的账号并行处理 + - `programmatic` TR-2.3: 验证站点级 Lease 机制正常工作 + - `programmatic` TR-2.4: 验证 5 分钟内不超过 5 个测活 +- **Notes**: 需要保持与现有账号级 Lease 机制的兼容性 + +## [x] Task 3: 速率限制和频率限制实现 +- **Priority**: P0 +- **Depends On**: Task 2 +- **Description**: + - 实现 TPM=1 的速率限制 + - 实现 5 分钟内不超过 5 个测活的频率限制 + - 创建站点级测活记录和计时器 +- **Acceptance Criteria Addressed**: AC-3, AC-5 +- **Test Requirements**: + - `programmatic` TR-3.1: 验证速率限制为 TPM=1 + - `programmatic` TR-3.2: 验证 5 分钟内不超过 5 个测活 + - `programmatic` TR-3.3: 验证限制机制正常工作 +- **Notes**: 需要考虑系统重启后的状态恢复 + +## [x] Task 4: 环境变量配置支持 +- **Priority**: P1 +- **Depends On**: None +- **Description**: + - 在 `config.ts` 中添加相关配置参数 + - 支持提示词自定义 + - 支持速率限制和频率限制参数配置 +- **Acceptance Criteria Addressed**: AC-4 +- **Test Requirements**: + - `programmatic` TR-4.1: 验证环境变量配置生效 + - `programmatic` TR-4.2: 验证默认值正常工作 + - `programmatic` TR-4.3: 验证配置参数类型正确 +- **Notes**: 需要确保配置参数的类型安全和默认值合理 + +## [x] Task 5: 测试和验证 +- **Priority**: P1 +- **Depends On**: Task 1, Task 2, Task 3, Task 4 +- **Description**: + - 编写单元测试 + - 进行集成测试 + - 验证所有功能正常工作 +- **Acceptance Criteria Addressed**: All +- **Test Requirements**: + - `programmatic` TR-5.1: 所有单元测试通过 + - `programmatic` TR-5.2: 集成测试通过 + - `human-judgment` TR-5.3: 功能验证通过 +- **Notes**: 需要覆盖各种边界情况和异常场景 \ No newline at end of file diff --git a/.trae/specs/timeout-optimization/checklist.md b/.trae/specs/timeout-optimization/checklist.md new file mode 100644 index 00000000..df794fdc --- /dev/null +++ b/.trae/specs/timeout-optimization/checklist.md @@ -0,0 +1,12 @@ +# Timeout Optimization - Verification Checklist + +- [x] Server timeout configuration is set to at least 300 seconds +- [x] Docker container configuration has appropriate timeout settings +- [x] Service-level timeout settings are adjusted for critical operations +- [x] Backup import operation completes successfully with large datasets +- [x] No timeout errors occur during long-running operations +- [x] All timeout settings are consistent across server, Docker, and service levels +- [x] Documentation is updated with timeout configuration best practices +- [x] Test scenarios are created for long-running operations +- [x] Docker HEALTHCHECK settings are reviewed and adjusted if necessary +- [x] Service-level timeouts match or exceed server-level settings diff --git a/.trae/specs/timeout-optimization/spec.md b/.trae/specs/timeout-optimization/spec.md new file mode 100644 index 00000000..88594457 --- /dev/null +++ b/.trae/specs/timeout-optimization/spec.md @@ -0,0 +1,81 @@ +# Timeout Optimization - Product Requirement Document + +## Overview +- **Summary**: This project aims to optimize timeout settings across server configuration, Docker configuration, and service configuration to address 30-second timeout issues. +- **Purpose**: To resolve timeout problems that occur during long-running operations, such as backup import, by ensuring appropriate timeout settings at all levels of the application stack. +- **Target Users**: System administrators, developers, and end users who interact with the application, especially those performing operations that require longer processing times. + +## Goals +- Optimize server-side timeout settings to handle long-running operations +- Configure Docker container timeout settings to match application requirements +- Review and adjust service-level timeout settings for critical operations +- Ensure consistency in timeout settings across all layers of the application +- Provide documentation for timeout configuration best practices + +## Non-Goals (Out of Scope) +- Modifying application logic to reduce processing time (this is about configuring timeouts, not optimizing performance) +- Adding new features unrelated to timeout settings +- Changing the core architecture of the application + +## Background & Context +- The application currently experiences 30-second timeout issues during operations like backup import +- Server-side timeout is already set to 300 seconds, but other layers may have shorter timeouts +- Docker containers and service-level configurations may have default 30-second timeouts +- The backup import operation has been optimized with batch processing, but still requires sufficient timeout settings + +## Functional Requirements +- **FR-1**: Update server-side timeout configuration to ensure consistent timeout settings +- **FR-2**: Configure Docker container timeout settings to match application requirements +- **FR-3**: Review and adjust service-level timeout settings for critical operations +- **FR-4**: Test timeout settings with long-running operations + +## Non-Functional Requirements +- **NFR-1**: All timeout settings should be consistent across server, Docker, and service levels +- **NFR-2**: Timeout settings should be configurable through environment variables where possible +- **NFR-3**: Documentation should be updated to include timeout configuration best practices +- **NFR-4**: Performance should not be negatively impacted by increased timeout settings + +## Constraints +- **Technical**: Must maintain compatibility with existing application architecture +- **Business**: Changes should not introduce security vulnerabilities +- **Dependencies**: Must work with existing Docker and server configurations + +## Assumptions +- The application is running in a Docker container +- The server is using Fastify as the web framework +- Long-running operations like backup import require more than 30 seconds to complete + +## Acceptance Criteria + +### AC-1: Server-side timeout configuration +- **Given**: The server is configured with appropriate timeout settings +- **When**: A long-running operation (like backup import) is initiated +- **Then**: The operation completes successfully without timeout errors +- **Verification**: `programmatic` +- **Notes**: Server timeout should be set to at least 300 seconds + +### AC-2: Docker container timeout configuration +- **Given**: Docker container is configured with appropriate timeout settings +- **When**: The application runs inside the Docker container +- **Then**: Long-running operations complete successfully without container-level timeouts +- **Verification**: `programmatic` +- **Notes**: Docker container should not have timeout limits that override application settings + +### AC-3: Service-level timeout configuration +- **Given**: Service-level timeout settings are reviewed and adjusted +- **When**: Services perform long-running operations +- **Then**: Operations complete successfully without service-level timeouts +- **Verification**: `programmatic` +- **Notes**: Focus on critical services like backup service + +### AC-4: Consistency across all layers +- **Given**: All timeout settings are configured consistently +- **When**: A long-running operation is performed +- **Then**: The operation completes successfully without timeout errors at any layer +- **Verification**: `programmatic` +- **Notes**: Test with operations that take 60+ seconds to complete + +## Open Questions +- [ ] What is the current Docker container configuration? +- [ ] Are there any reverse proxy or load balancer timeout settings that need to be considered? +- [ ] What are the specific service-level timeout settings that need to be adjusted? diff --git a/.trae/specs/timeout-optimization/tasks.md b/.trae/specs/timeout-optimization/tasks.md new file mode 100644 index 00000000..33979fd8 --- /dev/null +++ b/.trae/specs/timeout-optimization/tasks.md @@ -0,0 +1,66 @@ +# Timeout Optimization - Implementation Plan + +## [x] Task 1: Review current server timeout configuration +- **Priority**: P0 +- **Depends On**: None +- **Description**: + - Review the current server timeout settings in the Fastify configuration + - Verify that the requestTimeout is set to an appropriate value + - Check if there are any other server-level timeout settings that need adjustment +- **Acceptance Criteria Addressed**: AC-1 +- **Test Requirements**: + - `programmatic` TR-1.1: Verify server timeout is set to at least 300 seconds + - `programmatic` TR-1.2: Test that long-running operations complete without server-level timeouts +- **Notes**: Focus on the buildFastifyOptions function in config.ts + +## [x] Task 2: Review and update Docker container configuration +- **Priority**: P0 +- **Depends On**: Task 1 +- **Description**: + - Check current Docker configuration files (Dockerfile, docker-compose.yml) + - Add or update container timeout settings to match application requirements + - Ensure Docker container doesn't have timeout limits that override application settings +- **Acceptance Criteria Addressed**: AC-2 +- **Test Requirements**: + - `programmatic` TR-2.1: Verify Docker container has appropriate timeout settings + - `programmatic` TR-2.2: Test long-running operations inside Docker container +- **Notes**: Check for any HEALTHCHECK or other Docker-specific timeout settings + +## [x] Task 3: Review and adjust service-level timeout settings +- **Priority**: P0 +- **Depends On**: Task 1 +- **Description**: + - Review timeout settings for critical services, especially backupService + - Adjust service-level timeouts to ensure they match or exceed server-level settings + - Focus on operations that typically take longer than 30 seconds +- **Acceptance Criteria Addressed**: AC-3 +- **Test Requirements**: + - `programmatic` TR-3.1: Verify service-level timeouts are set appropriately + - `programmatic` TR-3.2: Test backup import operation with large datasets +- **Notes**: Check backupService.ts for any internal timeout settings + +## [x] Task 4: Test timeout settings with long-running operations +- **Priority**: P1 +- **Depends On**: Tasks 1, 2, 3 +- **Description**: + - Create test scenarios for long-running operations + - Test backup import with large datasets + - Verify that operations complete successfully without timeout errors +- **Acceptance Criteria Addressed**: AC-4 +- **Test Requirements**: + - `programmatic` TR-4.1: Test backup import with dataset that takes 60+ seconds + - `programmatic` TR-4.2: Verify no timeout errors occur at any layer +- **Notes**: Use realistic test data to simulate actual usage + +## [x] Task 5: Update documentation for timeout configuration +- **Priority**: P2 +- **Depends On**: Tasks 1, 2, 3, 4 +- **Description**: + - Update project documentation to include timeout configuration best practices + - Document server, Docker, and service-level timeout settings + - Provide guidance for configuring timeouts in different environments +- **Acceptance Criteria Addressed**: NFR-3 +- **Test Requirements**: + - `human-judgment` TR-5.1: Documentation is clear and comprehensive + - `human-judgment` TR-5.2: Documentation covers all timeout configuration levels +- **Notes**: Update README or deployment documentation diff --git a/CODE_WIKI.md b/CODE_WIKI.md new file mode 100644 index 00000000..81d1302a --- /dev/null +++ b/CODE_WIKI.md @@ -0,0 +1,622 @@ +# Metapi Code Wiki + +## 项目概述 + +**Metapi** 是一个 AI 中转站的元聚合层(Meta-Aggregation Layer),将分散的 AI API 聚合平台(如 New API、One API、OneHub 等)统一为一个网关。 + +- **项目版本**: 1.3.0 +- **主要语言**: TypeScript +- **Node.js 要求**: >=25.0.0 +- **许可证**: MIT + +--- + +## 目录结构 + +``` +metapi/ +├── build/ # 打包静态资源 +├── data/ # 默认运行时数据目录 +├── dist/ # 构建产物 +├── docker/ # Docker 配置 +├── docs/ # VitePress 文档 +├── drizzle/ # Drizzle SQL 迁移 +├── scripts/ # 开发和打包脚本 +├── src/ +│ ├── desktop/ # Electron 主进程 +│ ├── server/ # Fastify 服务端 +│ └── web/ # React 管理后台 +└── package.json +``` + +### 核心源码目录 + +#### `src/server/` - 服务端核心 +- [index.ts](file:///workspace/src/server/index.ts) - Fastify 服务启动和初始化 +- [config.ts](file:///workspace/src/server/config.ts) - 环境变量解析和配置 +- `db/` - 数据库 Schema、连接和迁移 +- `middleware/` - 认证等中间件 +- `routes/` - API 路由和代理路由 +- `services/` - 业务服务层 +- `transformers/` - 协议转换层 +- `proxy-core/` - 代理核心逻辑 + +#### `src/web/` - Web 前端 +- [main.tsx](file:///workspace/src/web/main.tsx) - Vite 入口 +- [App.tsx](file:///workspace/src/web/App.tsx) - 路由和页面装配 +- `components/` - 通用组件 +- `pages/` - 路由页面 + +--- + +## 技术栈 + +| 层级 | 技术 | +|------|------| +| 后端框架 | Fastify 5.x | +| 前端框架 | React 18 + Vite | +| 数据库 ORM | Drizzle ORM | +| 数据库 | SQLite (默认) / MySQL / PostgreSQL | +| 样式 | Tailwind CSS v4 | +| 数据可视化 | VChart | +| 定时任务 | node-cron | +| 测试 | Vitest | + +--- + +## 整体架构 + +### 三层架构 + +``` +┌─────────────────────────────────────────┐ +│ Web 前端 (React) │ +└────────────────────┬────────────────────┘ + │ +┌────────────────────▼────────────────────┐ +│ Fastify API Gateway │ +│ ┌───────────────────────────────────┐ │ +│ │ 管理 API (sites/accounts/tokens) │ │ +│ └───────────────────────────────────┘ │ +│ ┌───────────────────────────────────┐ │ +│ │ 代理路由 (/v1/*) │ │ +│ └───────────────────────────────────┘ │ +└────────────────────┬────────────────────┘ + │ +┌────────────────────▼────────────────────┐ +│ 业务服务层 │ +│ - 平台适配器 (New API/One API 等) │ +│ - 模型可用性探测 │ +│ - 智能路由引擎 │ +│ - 签到/余额刷新 │ +└────────────────────┬────────────────────┘ + │ +┌────────────────────▼────────────────────┐ +│ 数据存储层 │ +│ SQLite/MySQL/PostgreSQL + Drizzle ORM │ +└─────────────────────────────────────────┘ +``` + +### 核心数据流 + +1. **客户端请求** → 代理路由 `/v1/*` +2. **Token 认证** → 下游 API Key 验证 +3. **路由选择** → TokenRouter 根据模型、成本、余额选择最佳通道 +4. **协议转换** → Transformers 层处理 OpenAI/Claude/Gemini 格式互转 +5. **上游请求** → Platform Adapters 发送到对应平台 +6. **响应返回** → 经过转换返回给客户端 + +--- + +## 核心模块详解 + +### 1. 配置模块 ([config.ts](file:///workspace/src/server/config.ts)) + +**主要功能**: 解析环境变量并构建运行时配置 + +**关键配置项**: + +| 配置项 | 默认值 | 说明 | +|--------|--------|------| +| `authToken` | change-me-admin-token | 管理后台令牌 | +| `proxyToken` | change-me-proxy-sk-token | 代理令牌 | +| `port` | 4000 | 服务端口 | +| `modelAvailabilityProbeEnabled` | false | 批量测活启用开关 | +| `modelAvailabilityProbeIntervalMs` | 1800000 (30分钟) | 测活间隔 | +| `modelAvailabilityProbeTimeoutMs` | 15000 (15秒) | 单次探测超时 | +| `modelAvailabilityProbeConcurrency` | 1 | 探测并发数 (1-16) | + +### 2. 数据库模块 ([db/schema.ts](file:///workspace/src/server/db/schema.ts)) + +**主要表结构**: + +#### 核心业务表 + +| 表名 | 说明 | +|------|------| +| `sites` | 上游站点配置 | +| `accounts` | 站点账号 | +| `accountTokens` | 账号 API Token | +| `modelAvailability` | 账号级模型可用性 | +| `tokenModelAvailability` | Token 级模型可用性 | +| `tokenRoutes` | Token 路由配置 | +| `routeChannels` | 路由通道 | +| `proxyLogs` | 代理请求日志 | + +#### 关键表详解 + +##### `sites` 表 +```typescript +{ + id: number; + name: string; + url: string; + platform: string; // 'new-api' | 'one-api' | 'one-hub' | ... + status: 'active' | 'disabled'; + proxyUrl?: string; + customHeaders?: string; // JSON +} +``` + +##### `accounts` 表 +```typescript +{ + id: number; + siteId: number; + username?: string; + accessToken: string; + apiToken?: string; + balance: number; + status: 'active' | 'disabled' | 'expired'; + checkinEnabled: boolean; +} +``` + +##### `modelAvailability` 表 +```typescript +{ + id: number; + accountId: number; + modelName: string; + available?: boolean; + isManual: boolean; + latencyMs?: number; + checkedAt: string; +} +``` + +### 3. 批量测活功能核心 + +#### 核心服务: [modelAvailabilityProbeService.ts](file:///workspace/src/server/services/modelAvailabilityProbeService.ts) + +**核心流程**: + +1. **定时调度** - 每 30 分钟(可配置)触发一次 +2. **账号遍历** - 获取所有活跃账号,按 Lease 机制避免重复探测 +3. **目标加载** - 加载 `modelAvailability` 和 `tokenModelAvailability` 表中待测模型 +4. **并发探测** - 使用 `mapWithConcurrency` 控制并发数(默认 1,最大 16) +5. **结果判定** - 根据响应状态码和错误信息判定 supported/unsupported/inconclusive/skipped +6. **路由重建** - 可用性变化时触发 `rebuildRoutesOnly()` + +**关键防护机制**: + +| 机制 | 实现 | 说明 | +|------|------|------| +| Lease 机制 | `probeAccountLeases` Set | 防止同一账号并发探测 | +| 超时控制 | 15 秒 + AbortController | 中断超时请求 | +| 模型过滤 | `NON_CONVERSATION_MODEL_PATTERNS` | 跳过 embedding/rerank/moderation/whisper/tts 等 | +| 确认弹窗 | 首次开启必须手打确认语句 | 风险提示 | + +**探测状态类型**: +```typescript +type ProbeStatus = 'supported' | 'unsupported' | 'inconclusive' | 'skipped'; +``` + +**关键函数**: + +- `executeModelAvailabilityProbe()` - 执行批量测活 +- `probeSingleTarget()` - 探测单个模型 +- `startModelAvailabilityProbeScheduler()` - 启动定时调度 +- `queueModelAvailabilityProbeTask()` - 队列后台任务 + +#### 运行时探测: [runtimeModelProbe.ts](file:///workspace/src/server/services/runtimeModelProbe.ts) + +**核心函数**: `probeRuntimeModel()` + +**探测流程**: +1. 检查是否为对话模型(跳过非对话模型) +2. 构建探测请求体(`{"model": "...", "messages": [...], "max_tokens": 8}`) +3. 解析上游端点候选 +4. 执行探测请求 +5. 根据响应判定状态: + - **supported**: 请求成功 + - **unsupported**: 400/403/404/422 + 错误模式匹配 + - **inconclusive**: 其他失败情况 + - **skipped**: 非对话模型 + +**不支持模式匹配**: +```typescript +const DEFINITE_UNSUPPORTED_PATTERNS = [ + /no such model/i, + /unknown model/i, + /模型.*(不存在|不可用|不支持)/, + /(不存在|不可用|不支持).*模型/, + /model.*(access denied|permission|forbidden)/i, +]; +``` + +#### 确认语句与风险提示 + +**确认语句**: +``` +我确认我使用的中转站全部允许批量测活,如因开启此功能被中转站封号,自行负责。 +``` + +**风险提示**: 可能被部分中转站视为批量测活或异常行为,请务必先确认你的中转站明确允许此类探测 + +### 4. 智能路由引擎 + +#### 核心服务: [tokenRouter.ts](file:///workspace/src/server/services/tokenRouter.ts) + +**路由策略**: + +1. **四级成本信号**: 实测成本 → 账号配置成本 → 目录参考价 → 默认兜底 +2. **多通道加权分配**: 成本(40%) + 余额(30%) + 使用率(30%) +3. **失败冷却**: 失败通道自动冷却(默认 10 分钟) +4. **自动重试**: 请求失败自动切换其他通道 + +**路由决策流程**: +``` +请求 → 模型匹配 → 候选通道筛选 → + - 排除冷却通道 + - 排除禁用通道 + - 排除不可用模型 +→ 加权排序 → 概率选择 → 执行请求 +``` + +### 5. 代理核心 ([proxy-core/](file:///workspace/src/server/proxy-core/)) + +#### 主要组件: + +| 组件 | 说明 | +|------|------| +| `conductor/` | 代理指挥器,控制重试和流转 | +| `executors/` | 上游执行器(Claude/Codex/Gemini 等) | +| `providers/` | 平台提供者配置 | +| `surfaces/` | 协议表面层(Chat/Models/Files 等) | +| `transformers/` | 协议转换层 | + +#### 协议转换 ([transformers/](file:///workspace/src/server/transformers/)) + +支持的协议: +- OpenAI (Chat Completions, Responses, Embeddings, Images, Files) +- Claude (Messages API) +- Gemini (Generate Content API) + +### 6. 平台适配器 ([services/platforms/](file:///workspace/src/server/services/platforms/)) + +支持的平台: + +| 平台 | 适配器文件 | +|------|-----------| +| New API | [newApi.ts](file:///workspace/src/server/services/platforms/newApi.ts) | +| One API | [oneApi.ts](file:///workspace/src/server/services/platforms/oneApi.ts) | +| OneHub | [oneHub.ts](file:///workspace/src/server/services/platforms/oneHub.ts) | +| DoneHub | [doneHub.ts](file:///workspace/src/server/services/platforms/doneHub.ts) | +| Veloera | [veloera.ts](file:///workspace/src/server/services/platforms/veloera.ts) | +| AnyRouter | [anyrouter.ts](file:///workspace/src/server/services/platforms/anyrouter.ts) | +| Sub2API | [sub2api.ts](file:///workspace/src/server/services/platforms/sub2api.ts) | +| OpenAI | [openai.ts](file:///workspace/src/server/services/platforms/openai.ts) | +| Claude | [claude.ts](file:///workspace/src/server/services/platforms/claude.ts) | +| Gemini | [gemini.ts](file:///workspace/src/server/services/platforms/gemini.ts) | +| Codex | [codex.ts](file:///workspace/src/server/services/platforms/codex.ts) | + +### 7. 定时任务服务 + +#### 签到调度器 ([checkinScheduler.ts](file:///workspace/src/server/services/checkinScheduler.ts)) +- 默认 Cron: `0 8 * * *` (每天 8 点) +- 支持定时模式或间隔模式 + +#### 余额刷新 +- 默认 Cron: `0 * * * *` (每小时) +- 批量更新所有活跃账号余额 + +#### 模型可用性探测 +- 可配置间隔(默认 30 分钟) +- Lease 机制防止重复探测 + +--- + +## 依赖关系 + +### 核心依赖树 + +``` +src/server/index.ts (入口) +├── config.ts (配置) +├── db/index.ts (数据库) +├── routes/ (路由) +│ ├── api/ (管理 API) +│ └── proxy/ (代理路由) +├── services/ (业务服务) +│ ├── modelAvailabilityProbeService.ts +│ │ ├── runtimeModelProbe.ts +│ │ └── routeRefreshWorkflow.ts +│ ├── tokenRouter.ts +│ ├── modelService.ts +│ └── platforms/ (平台适配器) +├── proxy-core/ (代理核心) +└── transformers/ (协议转换) +``` + +### 批量测活功能依赖 + +``` +modelAvailabilityProbeService.ts +├── config.ts (探测配置) +├── db/index.ts (数据库访问) +├── runtimeModelProbe.ts (单个探测) +│ ├── siteProxy.ts (代理配置) +│ ├── runtimeDispatch.ts (请求分发) +│ └── upstreamEndpointRuntime.ts (端点解析) +├── routeRefreshWorkflow.ts (路由重建) +│ └── modelService.ts +└── backgroundTaskService.ts (后台任务) +``` + +--- + +## 项目运行方式 + +### 开发环境 + +```bash +# 安装依赖 +npm install + +# 数据库迁移 +npm run db:migrate + +# 启动开发环境(前后端热更新) +npm run dev +``` + +### 生产环境 - Docker + +```bash +# Docker Compose +cat > docker-compose.yml << 'EOF' +services: + metapi: + image: 1467078763/metapi:latest + ports: + - "4000:4000" + volumes: + - ./data:/app/data + environment: + AUTH_TOKEN: your-admin-token + PROXY_TOKEN: your-proxy-sk-token + CHECKIN_CRON: "0 8 * * *" + BALANCE_REFRESH_CRON: "0 * * * *" + MODEL_AVAILABILITY_PROBE_ENABLED: "true" + MODEL_AVAILABILITY_PROBE_INTERVAL_MS: "1800000" + TZ: Asia/Shanghai + restart: unless-stopped +EOF + +# 启动 +docker compose up -d +``` + +### 环境变量配置 + +| 变量 | 说明 | +|------|------| +| `AUTH_TOKEN` | 管理后台令牌 | +| `PROXY_TOKEN` | 代理令牌 | +| `PORT` | 服务端口(默认 4000) | +| `DATA_DIR` | 数据目录 | +| `DB_TYPE` | 数据库类型 (sqlite/mysql/postgres) | +| `DB_URL` | 数据库连接字符串 | +| `CHECKIN_CRON` | 签到 Cron 表达式 | +| `BALANCE_REFRESH_CRON` | 余额刷新 Cron | +| `MODEL_AVAILABILITY_PROBE_ENABLED` | 批量测活开关 | +| `MODEL_AVAILABILITY_PROBE_INTERVAL_MS` | 测活间隔(毫秒) | +| `MODEL_AVAILABILITY_PROBE_TIMEOUT_MS` | 单次探测超时(毫秒) | +| `MODEL_AVAILABILITY_PROBE_CONCURRENCY` | 探测并发数 (1-16) | + +### 构建命令 + +```bash +npm run build # 构建前端 + 后端 +npm run build:web # 仅构建前端 +npm run build:server # 仅构建后端 +npm run dist:desktop # 构建桌面安装包 +npm test # 运行全部测试 +npm run db:generate # 生成 Drizzle 迁移文件 +npm run db:migrate # 执行数据库迁移 +``` + +--- + +## 批量测活功能核心逻辑总结 + +### 核心流程 + +``` +┌─────────────────────────────────────────────────────────┐ +│ 1. 定时调度 (每 30 分钟) │ +│ startModelAvailabilityProbeScheduler() │ +└────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────▼────────────────────────────────────┐ +│ 2. 队列后台任务 │ +│ queueModelAvailabilityProbeTask() │ +└────────────────────┬────────────────────────────────────┘ + │ +┌────────────────────▼────────────────────────────────────┐ +│ 3. 执行探测 │ +│ executeModelAvailabilityProbe() │ +│ ┌───────────────────────────────────────────────────┐ │ +│ │ 3.1 获取活跃账号列表 │ │ +│ │ 3.2 遍历账号,尝试获取 Lease │ │ +│ │ 3.3 加载该账号的待测模型 │ │ +│ │ - modelAvailability 表 │ │ +│ │ - tokenModelAvailability 表 │ │ +│ │ 3.4 跳过 isManual=true 的模型 │ │ +│ └──────────────────────┬────────────────────────────┘ │ +└─────────────────────────┼─────────────────────────────────┘ + │ +┌─────────────────────────▼─────────────────────────────────┐ +│ 4. 并发探测 (mapWithConcurrency) │ +│ ┌───────────────────────────────────────────────────┐ │ +│ │ 对每个目标执行 probeSingleTarget() │ │ +│ │ └─ probeRuntimeModel() │ │ +│ │ ├─ 检查是否为对话模型 │ │ +│ │ ├─ 构建探测请求体 │ │ +│ │ ├─ 发送探测请求 │ │ +│ │ └─ 判定探测结果 │ │ +│ └──────────────────────┬────────────────────────────┘ │ +└─────────────────────────┼─────────────────────────────────┘ + │ +┌─────────────────────────▼─────────────────────────────────┐ +│ 5. 更新探测结果 │ +│ updateProbeRow() │ +│ ┌───────────────────────────────────────────────────┐ │ +│ │ - 更新 available 状态 │ │ +│ │ - 更新 latencyMs │ │ +│ │ - 更新 checkedAt │ │ +│ │ - 记录 availabilityChanged 标志 │ │ +│ └──────────────────────┬────────────────────────────┘ │ +└─────────────────────────┼─────────────────────────────────┘ + │ +┌─────────────────────────▼─────────────────────────────────┐ +│ 6. 路由重建 (如果可用性有变化) │ +│ rebuildRoutesOnly() │ +└─────────────────────────────────────────────────────────────┘ +``` + +### 关键防护机制 + +| 机制 | 实现位置 | 作用 | +|------|---------|------| +| **Lease 机制** | `probeAccountLeases` Set | 防止同一账号被并发探测 | +| **超时控制** | `config.modelAvailabilityProbeTimeoutMs` (15秒) + AbortController | 防止探测卡死 | +| **并发控制** | `mapWithConcurrency()` + `config.modelAvailabilityProbeConcurrency` (1-16) | 控制并发压力 | +| **模型过滤** | `NON_CONVERSATION_MODEL_PATTERNS` | 跳过非对话模型(embedding/rerank/whisper等) | +| **手动标记跳过** | `isManual=true` | 跳过手动标记的模型 | +| **确认弹窗** | 设置页面首次开启 | 风险提示,防止误操作 | + +### 探测状态判定 + +| 状态 | 判定条件 | +|------|---------| +| `skipped` | 非对话模型匹配 `NON_CONVERSATION_MODEL_PATTERNS` | +| `inconclusive` | 缺少凭证 / 无可用端点 / 其他错误 | +| `unsupported` | HTTP 400/403/404/422 + 错误信息匹配 `DEFINITE_UNSUPPORTED_PATTERNS` | +| `supported` | 请求成功返回 | + +### 配置参数 + +```typescript +// config.ts 中的相关配置 +modelAvailabilityProbeEnabled: boolean; // 是否启用 +modelAvailabilityProbeIntervalMs: number; // 探测间隔 (默认 30分钟) +modelAvailabilityProbeTimeoutMs: number; // 单次超时 (默认 15秒) +modelAvailabilityProbeConcurrency: number; // 并发数 (1-16, 默认 1) +``` + +--- + +## 关键 API 端点 + +### 管理 API (`/api/*`) + +| 端点 | 方法 | 说明 | +|------|------|------| +| `/api/sites` | GET/POST/PUT/DELETE | 站点管理 | +| `/api/accounts` | GET/POST/PUT/DELETE | 账号管理 | +| `/api/tokens` | GET/POST/PUT/DELETE | Token 路由管理 | +| `/api/settings` | GET/PUT | 系统设置 | +| `/api/checkin` | POST | 手动触发签到 | +| `/api/stats` | GET | 统计数据 | +| `/api/monitor` | GET | 监控数据 | + +### 代理 API (`/v1/*`) + +| 端点 | 说明 | +|------|------| +| `/v1/chat/completions` | 聊天补全 | +| `/v1/responses` | Responses API | +| `/v1/embeddings` | 嵌入 | +| `/v1/models` | 模型列表 | +| `/v1/files` | 文件管理 | +| `/v1/images` | 图像生成 | + +--- + +## 测试 + +```bash +# 运行全部测试 +npm test + +# 监听模式 +npm run test:watch + +# Schema 相关测试 +npm run test:schema:unit +npm run test:schema:parity +npm run test:schema:upgrade + +# 数据库冒烟测试 +npm run smoke:db +npm run smoke:db:sqlite +npm run smoke:db:mysql +npm run smoke:db:postgres +``` + +--- + +## 开发指南 + +### 新增平台适配器 + +1. 在 `src/server/services/platforms/` 下创建新文件 +2. 继承 `BasePlatform` 类 +3. 实现必要的方法 +4. 在 `index.ts` 中注册 + +### 新增 API 路由 + +1. 在 `src/server/routes/api/` 下创建路由文件 +2. 在 `src/server/index.ts` 中注册 + +### 数据库迁移 + +```bash +# 修改 schema.ts 后生成迁移 +npm run db:generate + +# 执行迁移 +npm run db:migrate +``` + +--- + +## 安全注意事项 + +1. **修改默认 Token**: 务必修改 `AUTH_TOKEN` 和 `PROXY_TOKEN` +2. **数据加密**: 所有敏感凭证均加密存储 +3. **批量测活风险**: 开启前务必确认中转站允许此类探测 +4. **IP 白名单**: 可配置 `ADMIN_IP_ALLOWLIST` 限制管理后台访问 +5. **自托管**: 所有数据存储在本地,不向第三方发送 + +--- + +## 相关链接 + +- [GitHub 仓库](https://github.com/cita-777/metapi) +- [在线文档](https://metapi.cita777.me) +- [Docker Hub](https://hub.docker.com/r/1467078763/metapi) + diff --git a/LONG_RUNNING_OPERATIONS_TEST_PLAN.md b/LONG_RUNNING_OPERATIONS_TEST_PLAN.md new file mode 100644 index 00000000..adcceb06 --- /dev/null +++ b/LONG_RUNNING_OPERATIONS_TEST_PLAN.md @@ -0,0 +1,79 @@ +# 大型数据库操作测试计划 + +## 测试目标 +- 验证系统能够处理超过60秒的数据库操作而不超时 +- 确保长时间运行的数据库操作能够成功完成 +- 测试数据库连接在长时间操作后的稳定性 +- 验证系统在处理大型数据集时的性能和可靠性 + +## 测试场景 + +### 1. 长时间运行的插入操作 +- **场景描述**:向数据库插入大量数据,确保操作能够在60秒以上完成 +- **测试步骤**: + 1. 创建临时表 + 2. 分批次插入大量数据(如50,000条记录) + 3. 验证所有数据都已成功插入 + 4. 清理临时表 +- **预期结果**:操作完成,无超时错误 + +### 2. 长时间运行的查询操作 +- **场景描述**:执行复杂的查询操作,确保能够处理大型结果集 +- **测试步骤**: + 1. 准备大量测试数据 + 2. 执行包含多个JOIN和聚合函数的复杂查询 + 3. 验证查询结果的正确性 +- **预期结果**:查询完成,无超时错误 + +### 3. 长时间运行的事务操作 +- **场景描述**:执行包含多个步骤的长事务 +- **测试步骤**: + 1. 开始事务 + 2. 执行多个数据库操作(插入、更新、删除) + 3. 提交事务 + 4. 验证事务的原子性 +- **预期结果**:事务成功完成,无超时错误 + +### 4. 多个并发长时间操作 +- **场景描述**:测试系统处理多个并发长时间数据库操作的能力 +- **测试步骤**: + 1. 启动多个并发的数据库操作 + 2. 每个操作都执行长时间运行的任务 + 3. 验证所有操作都能成功完成 +- **预期结果**:所有并发操作都完成,无超时错误 + +### 5. 数据库连接稳定性测试 +- **场景描述**:测试长时间操作后数据库连接的稳定性 +- **测试步骤**: + 1. 执行长时间运行的数据库操作 + 2. 操作完成后,尝试执行新的数据库操作 + 3. 验证连接仍然可用 +- **预期结果**:连接保持稳定,新操作能够成功执行 + +## 测试环境 +- **数据库类型**:支持SQLite、MySQL和PostgreSQL +- **测试工具**:Vitest +- **测试超时设置**:设置足够长的超时时间(如120秒)以确保测试能够完成 + +## 测试执行 +1. 运行长时间运行的数据库操作测试: + ```bash + npm run test -- src/server/db/longRunningOperations.test.ts + ``` + +2. 监控测试执行过程,确保: + - 操作能够在预期时间内完成 + - 无超时错误 + - 系统资源使用合理 + +## 测试结果分析 +- 记录每个测试场景的执行时间 +- 分析系统在处理大型数据库操作时的性能 +- 验证系统是否能够处理超过60秒的操作而不超时 +- 检查数据库连接的稳定性 + +## 注意事项 +- 测试前确保数据库有足够的存储空间 +- 测试过程中监控系统资源使用情况 +- 对于生产环境,确保数据库配置了合理的超时设置 +- 考虑在不同数据库类型上执行测试,以验证跨数据库兼容性 diff --git a/docs/configuration.md b/docs/configuration.md index c73c9f7c..064f4fc7 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -339,6 +339,123 @@ Metapi 当前的配置关系可以概括为: - 这类提醒不会自动触发部署,只是把用户带到「设置 → 更新中心」继续手动确认和执行 - K3s 用户可以在收到提醒后直接去更新中心部署;Compose 用户也可以收到提醒,但仍按自己的升级方式处理 +## 超时配置最佳实践 + +### 概述 + +超时配置是确保系统稳定性和响应性的重要因素。合理的超时设置可以: +- 防止请求无限期等待,提高系统可用性 +- 避免资源被长时间占用,提升系统吞吐量 +- 改善用户体验,减少等待时间 +- 及时发现和处理异常情况 + +### 数据库连接超时设置 + +| 配置项 | 说明 | 默认值 | 建议值 | 配置方式 | +|--------|------|--------|--------|----------| +| MySQL `acquireTimeout` | MySQL 连接池获取连接超时 | 300,000ms (5分钟) | 300,000ms - 600,000ms | 代码固定 | +| MySQL `timeout` | MySQL 连接池连接超时 | 300,000ms (5分钟) | 300,000ms - 600,000ms | 代码固定 | +| PostgreSQL `connectionTimeoutMillis` | PostgreSQL 连接池连接超时 | 300,000ms (5分钟) | 300,000ms - 600,000ms | 代码固定 | +| PostgreSQL `idleTimeoutMillis` | PostgreSQL 连接池空闲超时 | 300,000ms (5分钟) | 300,000ms - 600,000ms | 代码固定 | + +### 网关服务超时设置 + +| 配置项 | 说明 | 默认值 | 建议值 | 配置方式 | +|--------|------|--------|--------|----------| +| `DEFAULT_PROXY_CONNECT_TIMEOUT_MS` | 代理连接超时 | 10,000ms (10秒) | 5,000-15,000ms | 代码固定 | +| `PROXY_FIRST_BYTE_TIMEOUT_SEC` | 代理首字节超时(秒) | 0 (无超时) | 30-60秒 | 环境变量 / 管理后台 | +| `TOKEN_ROUTER_FAILURE_COOLDOWN_MAX_SEC` | 路由失败冷却上限 | 2,592,000秒 (30天) | 86,400秒 (24小时) | 环境变量 / 管理后台 | +| `MODEL_AVAILABILITY_PROBE_TIMEOUT_MS` | 模型可用性探测超时 | 15,000ms (15秒) | 10,000-20,000ms | 环境变量 | +| `PROXY_SESSION_CHANNEL_LEASE_TTL_MS` | 会话通道租约超时 | 90,000ms (90秒) | 60,000-120,000ms | 环境变量 | +| `PROXY_SESSION_CHANNEL_LEASE_KEEPALIVE_MS` | 会话通道保活间隔 | 15,000ms (15秒) | 10,000-30,000ms | 环境变量 | +| `PROXY_SESSION_CHANNEL_QUEUE_WAIT_MS` | 会话通道排队等待 | 1,500ms (1.5秒) | 1,000-3,000ms | 环境变量 / 管理后台 | + +### 服务器级超时设置 + +| 配置项 | 说明 | 默认值 | 建议值 | 配置方式 | +|--------|------|--------|--------|----------| +| `requestTimeout` | Fastify 服务器请求超时 | 300,000ms (5分钟) | 300,000ms - 600,000ms | 代码固定 | +| `keepAliveTimeout` | Fastify 服务器连接保持超时 | 65,000ms (65秒) | 65,000ms - 120,000ms | 代码固定 | + +### Docker 级超时设置 + +在 Docker Compose 部署中,建议配置以下超时相关参数: + +```yaml +# docker-compose.yml 示例 +services: + metapi: + image: metapi/metapi:latest + restart: unless-stopped + environment: + - PROXY_FIRST_BYTE_TIMEOUT_SEC=60 + - TOKEN_ROUTER_FAILURE_COOLDOWN_MAX_SEC=86400 + - MODEL_AVAILABILITY_PROBE_TIMEOUT_MS=15000 + # 网络超时设置 + network_mode: bridge + # 健康检查超时 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:4000/api/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s +``` + +### 不同环境的配置指南 + +#### 开发环境 + +- **数据库超时**: + - 保持默认值(5分钟),确保开发过程中不会因超时而中断 +- **网关超时**: + - `PROXY_FIRST_BYTE_TIMEOUT_SEC`: 60秒 + - `MODEL_AVAILABILITY_PROBE_TIMEOUT_MS`: 20,000ms + - `PROXY_SESSION_CHANNEL_QUEUE_WAIT_MS`: 3,000ms +- **理由**:开发环境可以设置较长的超时时间,便于调试和排查问题。 + +#### 测试环境 + +- **数据库超时**: + - 保持默认值(5分钟),确保测试过程的稳定性 +- **网关超时**: + - `PROXY_FIRST_BYTE_TIMEOUT_SEC`: 45秒 + - `MODEL_AVAILABILITY_PROBE_TIMEOUT_MS`: 15,000ms + - `PROXY_SESSION_CHANNEL_QUEUE_WAIT_MS`: 2,000ms +- **理由**:测试环境需要模拟生产环境的行为,同时保持一定的容错性。 + +#### 生产环境 + +- **数据库超时**: + - 可根据实际部署环境调整,建议 3-5 分钟 + - 对于高并发场景,可适当缩短以快速释放资源 +- **网关超时**: + - `PROXY_FIRST_BYTE_TIMEOUT_SEC`: 30秒 + - `MODEL_AVAILABILITY_PROBE_TIMEOUT_MS`: 10,000ms + - `PROXY_SESSION_CHANNEL_QUEUE_WAIT_MS`: 1,500ms + - `TOKEN_ROUTER_FAILURE_COOLDOWN_MAX_SEC`: 86,400秒 (24小时) +- **理由**:生产环境需要更严格的超时控制,确保系统响应迅速,避免资源浪费。 + +### 配置建议 + +1. **根据网络环境调整**:如果部署在网络条件较差的环境,应适当增加超时时间。 + +2. **根据模型特性调整**:对于生成式模型,尤其是长文本生成,可能需要更长的首字节超时时间。 + +3. **监控与调优**: + - 观察代理日志中的超时事件 + - 根据实际使用情况调整超时设置 + - 定期检查系统性能,确保超时设置合理 + +4. **多层超时配合**: + - 服务器级超时 > 服务级超时 > 操作级超时 + - 确保各层超时设置相互配合,避免过早或过晚的超时触发 + +5. **数据库连接池管理**: + - 监控数据库连接池的使用情况 + - 根据实际并发量调整连接池大小 + - 确保连接池超时设置与应用层超时设置相匹配 + ## 下一步 - [部署指南](./deployment.md) — Docker Compose 与反向代理 diff --git a/docs/operations.md b/docs/operations.md index a3cc0f1b..4b8d9035 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -135,8 +135,10 @@ npm run dev | `notify failed` | 通知发送失败 | 检查 [通知渠道配置](./configuration.md#通知渠道) | | `checkin failed` | 签到失败 | 检查账号状态和站点连通性 | | `balance refresh failed` | 余额刷新失败 | 检查账号凭证,可能需要重新登录 | -| `proxy timeout` | 代理请求超时 | 上游响应过慢;检查网络延迟或考虑切换其他通道 | +| `proxy timeout` | 代理请求超时 | 上游响应过慢;检查网络延迟、调整超时配置或考虑切换其他通道 | | `token expired` | Token 过期 | 系统会自动尝试续签;若反复出现,手动刷新 Token | +| `first byte timeout` | 首字节超时 | 上游在指定时间内未返回首字节;考虑调整 `PROXY_FIRST_BYTE_TIMEOUT_SEC` 配置 | +| `probe timeout` | 模型可用性探测超时 | 模型探测超时;考虑调整 `MODEL_AVAILABILITY_PROBE_TIMEOUT_MS` 配置 | ## 健康检查 @@ -160,6 +162,10 @@ curl -sS http://localhost:4000/v1/chat/completions \ - 定时请求 `/v1/models`,检查返回状态码和模型数量 - 定时抽样请求 `/v1/chat/completions`,检查端到端可用性 +- 监控超时相关指标: + - 首字节超时率:统计 `first byte timeout` 事件 + - 模型探测超时率:统计 `probe timeout` 事件 + - 代理请求超时率:统计 `proxy timeout` 事件 - SQLite / Desktop:监控磁盘空间(SQLite WAL 日志可能增长) - MySQL / Postgres:监控外部数据库空间、连接数和慢查询 - 监控 Docker 容器状态 diff --git a/src/server/config.ts b/src/server/config.ts index 2fa770f5..9c24bef0 100644 --- a/src/server/config.ts +++ b/src/server/config.ts @@ -146,6 +146,9 @@ export function buildConfig(env: NodeJS.ProcessEnv) { modelAvailabilityProbeIntervalMs: Math.max(60_000, Math.trunc(parseNumber(env.MODEL_AVAILABILITY_PROBE_INTERVAL_MS, 30 * 60 * 1000))), modelAvailabilityProbeTimeoutMs: Math.max(3_000, Math.trunc(parseNumber(env.MODEL_AVAILABILITY_PROBE_TIMEOUT_MS, 15_000))), modelAvailabilityProbeConcurrency: Math.max(1, Math.min(16, Math.trunc(parseNumber(env.MODEL_AVAILABILITY_PROBE_CONCURRENCY, 1)))), + modelAvailabilityProbeTpm: Math.max(1, Math.min(60, Math.trunc(parseNumber(env.MODEL_AVAILABILITY_PROBE_TPM, 1)))), + modelAvailabilityProbeMaxPerFiveMinutes: Math.max(1, Math.min(100, Math.trunc(parseNumber(env.MODEL_AVAILABILITY_PROBE_MAX_PER_FIVE_MINUTES, 5)))), + modelAvailabilityProbePrompts: parseCsvList(env.MODEL_AVAILABILITY_PROBE_PROMPTS), proxyLogRetentionDays: Math.max(0, Math.trunc(parseNumber(env.PROXY_LOG_RETENTION_DAYS, 30))), proxyLogRetentionPruneIntervalMinutes: Math.max(1, Math.trunc(parseNumber(env.PROXY_LOG_RETENTION_PRUNE_INTERVAL_MINUTES, 30))), proxyFileRetentionDays: Math.max(0, Math.trunc(parseNumber(env.PROXY_FILE_RETENTION_DAYS, 30))), @@ -179,5 +182,7 @@ export function buildFastifyOptions( logger: true, trustProxy: true, bodyLimit: appConfig.requestBodyLimit, + requestTimeout: 300_000, + keepAliveTimeout: 65_000, }; } diff --git a/src/server/db/index.ts b/src/server/db/index.ts index 100ab1da..17c7ef35 100644 --- a/src/server/db/index.ts +++ b/src/server/db/index.ts @@ -62,6 +62,10 @@ function buildMysqlPoolOptions( const poolOptions: mysql.PoolOptions = { uri: connectionString, jsonStrings: true, + waitForConnections: true, + connectionLimit: 10, + queueLimit: 0, + connectTimeout: 300000, // 5 minutes }; if (sslEnabled) { poolOptions.ssl = { rejectUnauthorized: false }; @@ -73,7 +77,12 @@ function buildPostgresPoolOptions( connectionString = config.dbUrl, sslEnabled = config.dbSsl, ): pg.PoolConfig { - const poolOptions: pg.PoolConfig = { connectionString }; + const poolOptions: pg.PoolConfig = { + connectionString, + max: 10, + idleTimeoutMillis: 300000, // 5 minutes + connectionTimeoutMillis: 300000, // 5 minutes + }; if (sslEnabled) { poolOptions.ssl = { rejectUnauthorized: false }; } diff --git a/src/server/db/longRunningOperations.test.ts b/src/server/db/longRunningOperations.test.ts new file mode 100644 index 00000000..5d63e8b5 --- /dev/null +++ b/src/server/db/longRunningOperations.test.ts @@ -0,0 +1,145 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { db, closeDbConnections, runtimeDbDialect } from './index.js'; +import * as schema from './schema.js'; + +// 模拟长时间运行的数据库操作 +async function simulateLongRunningOperation(seconds: number): Promise { + const startTime = Date.now(); + + // 创建一个大表或执行大量插入操作 + if (runtimeDbDialect === 'sqlite') { + // SQLite: 创建临时表并插入大量数据 + await db.execute(` + CREATE TABLE IF NOT EXISTS test_long_running ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + data TEXT, + created_at TEXT DEFAULT CURRENT_TIMESTAMP + ) + `); + + // 分批次插入数据,每批次1000条,共插入50000条 + for (let i = 0; i < 50; i++) { + const values = []; + for (let j = 0; j < 1000; j++) { + const data = `test_data_${i}_${j}_${Math.random()}`; + values.push(`('${data}')`); + } + await db.execute(` + INSERT INTO test_long_running (data) VALUES ${values.join(', ')} + `); + + // 模拟处理时间 + if (i % 10 === 0) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + // 清理临时表 + await db.execute('DROP TABLE IF EXISTS test_long_running'); + } else if (runtimeDbDialect === 'mysql') { + // MySQL: 创建临时表并插入大量数据 + await db.execute(` + CREATE TABLE IF NOT EXISTS test_long_running ( + id INT PRIMARY KEY AUTO_INCREMENT, + data TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) ENGINE=InnoDB + `); + + // 分批次插入数据 + for (let i = 0; i < 50; i++) { + const values = []; + for (let j = 0; j < 1000; j++) { + const data = `test_data_${i}_${j}_${Math.random()}`; + values.push(`('${data}')`); + } + await db.execute(` + INSERT INTO test_long_running (data) VALUES ${values.join(', ')} + `); + + if (i % 10 === 0) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + await db.execute('DROP TABLE IF EXISTS test_long_running'); + } else if (runtimeDbDialect === 'postgres') { + // PostgreSQL: 创建临时表并插入大量数据 + await db.execute(` + CREATE TABLE IF NOT EXISTS test_long_running ( + id SERIAL PRIMARY KEY, + data TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + `); + + // 分批次插入数据 + for (let i = 0; i < 50; i++) { + const values = []; + for (let j = 0; j < 1000; j++) { + const data = `test_data_${i}_${j}_${Math.random()}`; + values.push(`('${data}')`); + } + await db.execute(` + INSERT INTO test_long_running (data) VALUES ${values.join(', ')} + `); + + if (i % 10 === 0) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + await db.execute('DROP TABLE IF EXISTS test_long_running'); + } + + const endTime = Date.now(); + const duration = (endTime - startTime) / 1000; + console.log(`Long running operation completed in ${duration.toFixed(2)} seconds`); + + return duration >= seconds; +} + +describe('Long Running Database Operations', () => { + beforeEach(async () => { + // 确保数据库连接已初始化 + await db.execute('SELECT 1'); + }); + + afterEach(async () => { + // 清理测试数据 + if (runtimeDbDialect === 'sqlite') { + await db.execute('DROP TABLE IF EXISTS test_long_running'); + } else if (runtimeDbDialect === 'mysql') { + await db.execute('DROP TABLE IF EXISTS test_long_running'); + } else if (runtimeDbDialect === 'postgres') { + await db.execute('DROP TABLE IF EXISTS test_long_running'); + } + }); + + it('should handle operations lasting more than 60 seconds without timeout', async () => { + // 测试超过60秒的操作 + const completed = await simulateLongRunningOperation(60); + expect(completed).toBe(true); + }); + + it('should handle multiple long-running operations sequentially', async () => { + // 测试多个长时间运行的操作 + const results = []; + for (let i = 0; i < 3; i++) { + const completed = await simulateLongRunningOperation(10); + results.push(completed); + } + + // 所有操作都应该成功完成 + expect(results.every(r => r)).toBe(true); + }); + + it('should maintain database connection after long operations', async () => { + // 执行长时间操作 + await simulateLongRunningOperation(30); + + // 验证连接仍然可用 + const result = await db.execute('SELECT 1'); + expect(result).toBeDefined(); + }); +}); diff --git a/src/server/routes/api/settings.ts b/src/server/routes/api/settings.ts index a6acc6c1..998c9b7f 100644 --- a/src/server/routes/api/settings.ts +++ b/src/server/routes/api/settings.ts @@ -1834,25 +1834,40 @@ export async function settingsRoutes(app: FastifyInstance) { return reply.code(400).send({ success: false, message: '导入数据格式错误:需要 JSON 对象' }); } - try { - const result = await importBackup(parsedBody.data.data); - for (const item of result.appliedSettings) { - applyImportedSettingToRuntime(item.key, item.value); - } - if (result.appliedSettings.some((item) => item.key === 'backup_webdav_config_v1')) { - await reloadBackupWebdavScheduler(); - } - return { - success: true, - message: '导入完成', - ...result, - }; - } catch (err: any) { - return reply.code(400).send({ - success: false, - message: err?.message || '导入失败', - }); - } + const backupData = parsedBody.data.data; + + const { task, reused } = startBackgroundTask( + { + type: 'maintenance', + title: '备份导入', + dedupeKey: 'backup-import', + notifyOnFailure: true, + successMessage: (currentTask) => { + const result = currentTask.result as any; + if (!result) return '备份导入已完成'; + return `备份导入完成:账号 ${result.sections.accounts ? '已导入' : '未导入'},设置 ${result.sections.preferences ? '已导入' : '未导入'}`; + }, + failureMessage: (currentTask) => `备份导入失败:${currentTask.error || 'unknown error'}`, + }, + async () => { + const result = await importBackup(backupData); + for (const item of result.appliedSettings) { + applyImportedSettingToRuntime(item.key, item.value); + } + if (result.appliedSettings.some((item) => item.key === 'backup_webdav_config_v1')) { + await reloadBackupWebdavScheduler(); + } + return result; + }, + ); + + return reply.code(202).send({ + success: true, + queued: true, + reused, + jobId: task.id, + message: '备份导入任务已开始执行', + }); }); app.get('/api/settings/backup/webdav', async () => { diff --git a/src/server/services/backupService.longRunning.test.ts b/src/server/services/backupService.longRunning.test.ts new file mode 100644 index 00000000..7aab8a56 --- /dev/null +++ b/src/server/services/backupService.longRunning.test.ts @@ -0,0 +1,265 @@ +import { mkdtempSync, rmSync, join } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { describe, it, beforeAll, beforeEach, afterAll, expect } from 'vitest'; +import { db, schema } from '../db/index.js'; +import { importBackup, exportBackup } from './backupService.js'; +import { startBackgroundTask } from './backgroundTaskService.js'; + +// 生成大型测试数据 +function generateLargeBackupData() { + const sites = []; + const accounts = []; + const accountTokens = []; + const tokenRoutes = []; + const routeChannels = []; + + // 生成 100 个站点 + for (let i = 1; i <= 100; i++) { + sites.push({ + id: i, + name: `Site ${i}`, + url: `https://site${i}.example.com`, + platform: 'new-api', + status: 'active', + isPinned: false, + sortOrder: i - 1, + globalWeight: 1, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + + // 每个站点生成 10 个账号 + for (let j = 1; j <= 10; j++) { + const accountId = (i - 1) * 10 + j; + accounts.push({ + id: accountId, + siteId: i, + username: `user${accountId}`, + accessToken: `token${accountId}`, + status: 'active', + isPinned: false, + sortOrder: j - 1, + checkinEnabled: true, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + + // 每个账号生成 5 个令牌 + for (let k = 1; k <= 5; k++) { + const tokenId = (accountId - 1) * 5 + k; + accountTokens.push({ + id: tokenId, + accountId, + name: `Token ${k}`, + token: `token_value${tokenId}`, + tokenGroup: 'default', + valueStatus: 'ready', + source: 'manual', + enabled: true, + isDefault: k === 1, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + } + } + } + + // 生成 50 个路由 + for (let i = 1; i <= 50; i++) { + tokenRoutes.push({ + id: i, + modelPattern: `model${i}.*`, + displayName: `Route ${i}`, + modelMapping: JSON.stringify({ 'model': `model${i}` }), + routeMode: 'pattern', + routingStrategy: 'weighted', + enabled: true, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + + // 每个路由生成 20 个通道 + for (let j = 1; j <= 20; j++) { + const channelId = (i - 1) * 20 + j; + routeChannels.push({ + id: channelId, + routeId: i, + accountId: ((i - 1) * 20 + j) % 1000 + 1, // 随机账号 + tokenId: ((i - 1) * 20 + j) % 5000 + 1, // 随机令牌 + priority: j, + weight: 1, + enabled: true, + manualOverride: false, + }); + } + } + + return { + version: '2.1', + timestamp: Date.now(), + accounts: { + sites, + accounts, + accountTokens, + tokenRoutes, + routeChannels, + routeGroupSources: [], + }, + }; +} + +describe('backupService - long running operations', () => { + let dataDir = ''; + + beforeAll(async () => { + dataDir = mkdtempSync(join(tmpdir(), 'metapi-backup-service-long-running-')); + process.env.DATA_DIR = dataDir; + + await import('../db/migrate.js'); + }); + + beforeEach(async () => { + // 清理所有表 + await db.delete(schema.routeChannels).run(); + await db.delete(schema.routeGroupSources).run(); + await db.delete(schema.tokenRoutes).run(); + await db.delete(schema.tokenModelAvailability).run(); + await db.delete(schema.modelAvailability).run(); + await db.delete(schema.proxyLogs).run(); + await db.delete(schema.checkinLogs).run(); + await db.delete(schema.siteAnnouncements).run(); + await db.delete(schema.siteDisabledModels).run(); + await db.delete(schema.accountTokens).run(); + await db.delete(schema.accounts).run(); + await db.delete(schema.sites).run(); + await db.delete(schema.downstreamApiKeys).run(); + await db.delete(schema.settings).run(); + await db.delete(schema.events).run(); + }); + + afterAll(() => { + delete process.env.DATA_DIR; + rmSync(dataDir, { recursive: true, force: true }); + }); + + it('should handle large backup import without timeout', async () => { + const largeBackupData = generateLargeBackupData(); + + // 记录开始时间 + const startTime = Date.now(); + + // 使用后台任务执行导入 + const { task, reused } = startBackgroundTask( + { + type: 'maintenance', + title: 'Large Backup Import Test', + dedupeKey: 'large-backup-import-test', + notifyOnFailure: true, + successMessage: (task) => `Large backup import completed in ${Math.round((Date.now() - startTime) / 1000)}s`, + failureMessage: (task) => `Large backup import failed: ${task.error || 'unknown error'}`, + }, + async () => { + return await importBackup(largeBackupData); + } + ); + + expect(reused).toBe(false); + expect(task.status).toBe('pending'); + + // 等待任务完成 + while (true) { + const currentTask = await import('./backgroundTaskService.js').then(m => m.getBackgroundTask(task.id)); + if (!currentTask) { + throw new Error('Task not found'); + } + if (currentTask.status === 'succeeded' || currentTask.status === 'failed') { + break; + } + // 每 2 秒检查一次 + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + // 验证任务成功完成 + const finalTask = await import('./backgroundTaskService.js').then(m => m.getBackgroundTask(task.id)); + expect(finalTask).toBeTruthy(); + expect(finalTask.status).toBe('succeeded'); + + // 验证数据导入成功 + const siteCount = await db.select({ count: db.fn.count() }).from(schema.sites).get(); + expect(Number(siteCount.count)).toBe(100); + + const accountCount = await db.select({ count: db.fn.count() }).from(schema.accounts).get(); + expect(Number(accountCount.count)).toBe(1000); + + const tokenCount = await db.select({ count: db.fn.count() }).from(schema.accountTokens).get(); + expect(Number(tokenCount.count)).toBe(5000); + + const routeCount = await db.select({ count: db.fn.count() }).from(schema.tokenRoutes).get(); + expect(Number(routeCount.count)).toBe(50); + + const channelCount = await db.select({ count: db.fn.count() }).from(schema.routeChannels).get(); + expect(Number(channelCount.count)).toBe(1000); + + console.log(`Large backup import completed in ${Math.round((Date.now() - startTime) / 1000)}s`); + }, 600000); // 10分钟超时 + + it('should export large backup data without timeout', async () => { + // 首先导入大型数据 + const largeBackupData = generateLargeBackupData(); + await importBackup(largeBackupData); + + // 记录开始时间 + const startTime = Date.now(); + + // 导出数据 + const exportResult = await exportBackup('all'); + + // 验证导出成功 + expect(exportResult).toBeTruthy(); + expect(exportResult.version).toBe('2.1'); + expect(exportResult.timestamp).toBeTruthy(); + expect(exportResult.accounts).toBeTruthy(); + expect(exportResult.preferences).toBeTruthy(); + + // 验证导出的数据量 + expect(exportResult.accounts?.sites?.length).toBe(100); + expect(exportResult.accounts?.accounts?.length).toBe(1000); + expect(exportResult.accounts?.accountTokens?.length).toBe(5000); + expect(exportResult.accounts?.tokenRoutes?.length).toBe(50); + expect(exportResult.accounts?.routeChannels?.length).toBe(1000); + + console.log(`Large backup export completed in ${Math.round((Date.now() - startTime) / 1000)}s`); + }, 300000); // 5分钟超时 + + it('should handle sequential large operations without interference', async () => { + // 记录开始时间 + const startTime = Date.now(); + + // 第一次导入 + const firstBackupData = generateLargeBackupData(); + await importBackup(firstBackupData); + + console.log(`First import completed in ${Math.round((Date.now() - startTime) / 1000)}s`); + + // 导出 + const exportResult = await exportBackup('all'); + + console.log(`Export completed in ${Math.round((Date.now() - startTime) / 1000)}s`); + + // 第二次导入(不同数据) + const secondBackupData = { + ...generateLargeBackupData(), + timestamp: Date.now(), // 不同的时间戳 + }; + await importBackup(secondBackupData); + + console.log(`Second import completed in ${Math.round((Date.now() - startTime) / 1000)}s`); + + // 验证最终数据 + const siteCount = await db.select({ count: db.fn.count() }).from(schema.sites).get(); + expect(Number(siteCount.count)).toBe(100); + + const accountCount = await db.select({ count: db.fn.count() }).from(schema.accounts).get(); + expect(Number(accountCount.count)).toBe(1000); + }, 900000); // 15分钟超时 +}); diff --git a/src/server/services/backupService.ts b/src/server/services/backupService.ts index a25ff475..4d96de32 100644 --- a/src/server/services/backupService.ts +++ b/src/server/services/backupService.ts @@ -1,4 +1,4 @@ -import { asc, eq } from 'drizzle-orm'; +import { asc, eq, gt } from 'drizzle-orm'; import cron from 'node-cron'; import { db, schema } from '../db/index.js'; import { requireInsertedRowId } from '../db/insertHelpers.js'; @@ -9,6 +9,49 @@ import { PLATFORM_ALIASES, detectPlatformByUrlHint } from '../../shared/platform const BACKUP_VERSION = '2.1'; +const DEFAULT_BATCH_SIZE = 100; +const LOGS_BATCH_SIZE = 1000; + +async function batchQueryAll( + table: any, + dbInstance: typeof db, + batchSize: number = DEFAULT_BATCH_SIZE, +): Promise { + const results: any[] = []; + let lastId: number | undefined = undefined; + let hasMore = true; + + while (hasMore) { + const query = lastId === undefined + ? dbInstance.select().from(table) + : dbInstance.select().from(table).where(gt(table.id, lastId)); + + const batch = await query.limit(batchSize).all(); + + if (batch.length === 0) { + hasMore = false; + } else { + results.push(...batch); + lastId = batch[batch.length - 1].id; + hasMore = batch.length === batchSize; + } + } + + return results; +} + +async function batchInsertHelper>( + tx: any, + table: any, + records: T[], + batchSize: number = DEFAULT_BATCH_SIZE, +): Promise { + for (let i = 0; i < records.length; i += batchSize) { + const batch = records.slice(i, i + batchSize); + await tx.insert(table).values(batch).run(); + } +} + export type BackupExportType = 'all' | 'accounts' | 'preferences'; export interface BackupWebdavConfig { @@ -226,8 +269,18 @@ interface BackupImportResult { importedApiKeyConnections: number; skippedAccounts: number; ignoredSections: string[]; + // 新增统计参数 + newSites: number; + updatedSites: number; + newAccounts: number; + updatedAccounts: number; + newTokens: number; + updatedTokens: number; + newSettings: number; + updatedSettings: number; }; warnings?: string[]; + errors?: string[]; } const EXCLUDED_SETTING_KEYS = new Set([ @@ -241,7 +294,7 @@ const EXCLUDED_SETTING_KEYS = new Set([ const BACKUP_WEBDAV_CONFIG_SETTING_KEY = 'backup_webdav_config_v1'; const BACKUP_WEBDAV_STATE_SETTING_KEY = 'backup_webdav_state_v1'; const BACKUP_WEBDAV_DEFAULT_AUTO_SYNC_CRON = '0 */6 * * *'; -const BACKUP_WEBDAV_FETCH_TIMEOUT_MS = 15_000; +const BACKUP_WEBDAV_FETCH_TIMEOUT_MS = 300_000; let backupWebdavTask: cron.ScheduledTask | null = null; const DIRECT_API_PLATFORMS = new Set([ @@ -453,14 +506,15 @@ function buildRuntimeIdentityIndexesFromSection(section: AccountsBackupSection): } async function collectCurrentRuntimeStateSnapshot(): Promise { + const startTime = Date.now(); + console.log('[backup] Starting collectCurrentRuntimeStateSnapshot'); + const [ sites, accounts, accountTokens, tokenRoutes, routeChannels, - proxyLogs, - checkinLogs, siteAnnouncements, modelAvailability, tokenModelAvailability, @@ -471,13 +525,23 @@ async function collectCurrentRuntimeStateSnapshot(): Promise(); for (const row of sites) { @@ -952,6 +1016,14 @@ function buildAllApiHubV2AccountsSection(data: RawBackupData): { importedApiKeyConnections, skippedAccounts, ignoredSections, + newSites: section.sites.length, + updatedSites: 0, + newAccounts: importedAccounts, + updatedAccounts: 0, + newTokens: section.accountTokens.length, + updatedTokens: 0, + newSettings: 0, + updatedSettings: 0, }, warnings, }; @@ -1401,14 +1473,14 @@ export async function exportBackup(type: BackupExportType): Promise { function coerceAccountsSection(input: unknown): AccountsBackupSection | null { if (!isRecord(input)) return null; - const sites = Array.isArray(input.sites) ? input.sites as SiteRow[] : null; + const sites = Array.isArray(input.sites) ? input.sites as SiteRow[] : []; const siteApiEndpoints = Array.isArray(input.siteApiEndpoints) ? input.siteApiEndpoints as SiteApiEndpointRow[] : undefined; - const accounts = Array.isArray(input.accounts) ? input.accounts as BackupAccountRow[] : null; - const accountTokens = Array.isArray(input.accountTokens) ? input.accountTokens as AccountTokenRow[] : null; - const tokenRoutes = Array.isArray(input.tokenRoutes) ? input.tokenRoutes as TokenRouteRow[] : null; - const routeChannels = Array.isArray(input.routeChannels) ? input.routeChannels as BackupRouteChannelRow[] : null; + const accounts = Array.isArray(input.accounts) ? input.accounts as BackupAccountRow[] : []; + const accountTokens = Array.isArray(input.accountTokens) ? input.accountTokens as AccountTokenRow[] : []; + const tokenRoutes = Array.isArray(input.tokenRoutes) ? input.tokenRoutes as TokenRouteRow[] : []; + const routeChannels = Array.isArray(input.routeChannels) ? input.routeChannels as BackupRouteChannelRow[] : []; const routeGroupSources = Array.isArray(input.routeGroupSources) ? input.routeGroupSources as RouteGroupSourceRow[] : []; @@ -1422,7 +1494,8 @@ function coerceAccountsSection(input: unknown): AccountsBackupSection | null { ? input.downstreamApiKeys as BackupDownstreamApiKeyRow[] : undefined; - if (!sites || !accounts || !accountTokens || !tokenRoutes || !routeChannels) return null; + // 至少需要有sites或accounts数据 + if (sites.length === 0 && accounts.length === 0) return null; return { sites, @@ -1441,17 +1514,29 @@ function coerceAccountsSection(input: unknown): AccountsBackupSection | null { function coercePreferencesSection(input: unknown): PreferencesBackupSection | null { if (!isRecord(input)) return null; const settingsRaw = input.settings; - if (!Array.isArray(settingsRaw)) return null; - - const settings = settingsRaw - .map((row) => { - if (!isRecord(row)) return null; - const key = typeof row.key === 'string' ? row.key.trim() : ''; - if (!key || EXCLUDED_SETTING_KEYS.has(key)) return null; - return { key, value: row.value }; - }) - .filter((row): row is { key: string; value: unknown } => !!row); + + let settings: Array<{ key: string; value: unknown }> = []; + + if (Array.isArray(settingsRaw)) { + settings = settingsRaw + .map((row) => { + if (!isRecord(row)) return null; + const key = typeof row.key === 'string' ? row.key.trim() : ''; + if (!key || EXCLUDED_SETTING_KEYS.has(key)) return null; + return { key, value: row.value }; + }) + .filter((row): row is { key: string; value: unknown } => !!row); + } else if (isRecord(settingsRaw)) { + // 处理对象格式的设置 + settings = Object.entries(settingsRaw) + .map(([key, value]) => { + if (!key || EXCLUDED_SETTING_KEYS.has(key)) return null; + return { key, value }; + }) + .filter((row): row is { key: string; value: unknown } => !!row); + } + // 即使settings为空,也返回一个有效的对象,这样设置部分就不会被标记为未导入 return { settings }; } @@ -1510,49 +1595,107 @@ function detectImportMetadata(data: RawBackupData): { }; } -async function importAccountsSection(section: AccountsBackupSection): Promise { +async function importAccountsSection(section: AccountsBackupSection): Promise<{ + newSites: number; + updatedSites: number; + newAccounts: number; + updatedAccounts: number; + newTokens: number; + updatedTokens: number; +}> { + const startTime = Date.now(); + console.log('[backup] Starting importAccountsSection'); + + const runtimeStateStartTime = Date.now(); const runtimeState = await collectCurrentRuntimeStateSnapshot(); + console.log('[backup] collectCurrentRuntimeStateSnapshot completed in', Date.now() - runtimeStateStartTime, 'ms'); + + const indexesStartTime = Date.now(); const importedIndexes = buildRuntimeIdentityIndexesFromSection(section); + console.log('[backup] buildRuntimeIdentityIndexesFromSection completed in', Date.now() - indexesStartTime, 'ms'); + + // 初始化统计变量 + const stats = { + newSites: 0, + updatedSites: 0, + newAccounts: 0, + updatedAccounts: 0, + newTokens: 0, + updatedTokens: 0, + }; + const shouldReplaceSiteDisabledModels = Array.isArray(section.siteDisabledModels); const shouldReplaceManualModels = Array.isArray(section.manualModels); const shouldReplaceDownstreamApiKeys = Array.isArray(section.downstreamApiKeys); + // 第一步:获取现有数据的索引,用于智能对比 + console.log('[backup] Starting existing data indexing'); + const existingDataStartTime = Date.now(); + const existingSites = await db.select().from(schema.sites).all(); + const existingAccounts = await db.select().from(schema.accounts).all(); + const existingTokens = await db.select().from(schema.accountTokens).all(); + const existingRoutes = await db.select().from(schema.tokenRoutes).all(); + + const existingSiteKeys = new Set(existingSites.map(site => buildSiteIdentityKey(site))); + const existingAccountIds = new Set(existingAccounts.map(account => account.id)); + const existingTokenIds = new Set(existingTokens.map(token => token.id)); + const existingRouteIds = new Set(existingRoutes.map(route => route.id)); + console.log('[backup] Existing data indexing completed in', Date.now() - existingDataStartTime, 'ms'); + + // 第二步:智能导入核心数据(使用单个事务提高效率) + console.log('[backup] Starting smart core data import'); + const coreImportStartTime = Date.now(); + await db.transaction(async (tx) => { - if (shouldReplaceDownstreamApiKeys) { - await tx.delete(schema.downstreamApiKeys).run(); - } - await tx.delete(schema.proxyLogs).run(); - await tx.delete(schema.routeChannels).run(); - await tx.delete(schema.routeGroupSources).run(); - await tx.delete(schema.tokenRoutes).run(); - await tx.delete(schema.tokenModelAvailability).run(); - await tx.delete(schema.modelAvailability).run(); - await tx.delete(schema.accountTokens).run(); - await tx.delete(schema.accounts).run(); - await tx.delete(schema.sites).run(); - - for (const row of section.sites) { - await tx.insert(schema.sites).values({ - id: row.id, - name: row.name, - url: row.url, - externalCheckinUrl: row.externalCheckinUrl ?? null, - platform: row.platform, - proxyUrl: row.proxyUrl ?? null, - useSystemProxy: row.useSystemProxy ?? false, - customHeaders: row.customHeaders ?? null, - status: row.status || 'active', - isPinned: row.isPinned ?? false, - sortOrder: row.sortOrder ?? 0, - globalWeight: row.globalWeight ?? 1, - apiKey: row.apiKey, - createdAt: row.createdAt, - updatedAt: row.updatedAt, - }).run(); + // 处理站点数据 + for (const site of section.sites) { + const siteKey = buildSiteIdentityKey(site); + const existingSite = existingSites.find(s => buildSiteIdentityKey(s) === siteKey); + + if (existingSite) { + // 更新现有站点 + await tx.update(schema.sites).set({ + name: site.name, + url: site.url, + externalCheckinUrl: site.externalCheckinUrl ?? null, + platform: site.platform, + proxyUrl: site.proxyUrl ?? null, + useSystemProxy: site.useSystemProxy ?? false, + customHeaders: site.customHeaders ?? null, + status: site.status || 'active', + isPinned: site.isPinned ?? false, + sortOrder: site.sortOrder ?? 0, + globalWeight: site.globalWeight ?? 1, + apiKey: site.apiKey, + updatedAt: site.updatedAt, + }).where(eq(schema.sites.id, existingSite.id)).run(); + stats.updatedSites++; + } else { + // 插入新站点 + await tx.insert(schema.sites).values({ + id: site.id, + name: site.name, + url: site.url, + externalCheckinUrl: site.externalCheckinUrl ?? null, + platform: site.platform, + proxyUrl: site.proxyUrl ?? null, + useSystemProxy: site.useSystemProxy ?? false, + customHeaders: site.customHeaders ?? null, + status: site.status || 'active', + isPinned: site.isPinned ?? false, + sortOrder: site.sortOrder ?? 0, + globalWeight: site.globalWeight ?? 1, + apiKey: site.apiKey, + createdAt: site.createdAt, + updatedAt: site.updatedAt, + }).run(); + stats.newSites++; + } } - - for (const row of section.siteApiEndpoints || []) { - await tx.insert(schema.siteApiEndpoints).values({ + + // 处理站点API端点 + if (section.siteApiEndpoints) { + await batchInsertHelper(tx, schema.siteApiEndpoints, section.siteApiEndpoints.map((row) => ({ id: row.id, siteId: row.siteId, url: row.url, @@ -1564,84 +1707,158 @@ async function importAccountsSection(section: AccountsBackupSection): Promise 0) { + // 先删除现有路由组源,再重新插入 + await tx.delete(schema.routeGroupSources).run(); + await batchInsertHelper(tx, schema.routeGroupSources, section.routeGroupSources.map((row) => ({ id: row.id, groupRouteId: row.groupRouteId, sourceRouteId: row.sourceRouteId, - }).run(); + }))); } - - for (const row of section.routeChannels) { + + // 处理路由通道 + // 先删除现有路由通道,再重新插入(因为通道依赖关系复杂) + await tx.delete(schema.routeChannels).run(); + await batchInsertHelper(tx, schema.routeChannels, section.routeChannels.map((row) => { const channelKey = importedIndexes.channelKeyById.get(row.id); const runtimeChannel = channelKey ? runtimeState.routeChannelRuntimeByKey.get(channelKey) : undefined; - await tx.insert(schema.routeChannels).values({ + return { id: row.id, routeId: row.routeId, accountId: row.accountId, @@ -1661,74 +1878,131 @@ async function importAccountsSection(section: AccountsBackupSection): Promise { + // 清理不在备份中的站点 + const backupSiteKeys = new Set(section.sites.map(site => buildSiteIdentityKey(site))); + for (const existingSite of existingSites) { + if (!backupSiteKeys.has(buildSiteIdentityKey(existingSite))) { + await tx.delete(schema.sites).where(eq(schema.sites.id, existingSite.id)).run(); + } + } + + // 清理不在备份中的账号 + const backupAccountIds = new Set(section.accounts.map(account => account.id)); + for (const existingAccount of existingAccounts) { + if (!backupAccountIds.has(existingAccount.id)) { + await tx.delete(schema.accounts).where(eq(schema.accounts.id, existingAccount.id)).run(); + } + } + + // 清理不在备份中的令牌 + const backupTokenIds = new Set(section.accountTokens.map(token => token.id)); + for (const existingToken of existingTokens) { + if (!backupTokenIds.has(existingToken.id)) { + await tx.delete(schema.accountTokens).where(eq(schema.accountTokens.id, existingToken.id)).run(); + } + } + + // 清理不在备份中的路由 + const backupRouteIds = new Set(section.tokenRoutes.map(route => route.id)); + for (const existingRoute of existingRoutes) { + if (!backupRouteIds.has(existingRoute.id)) { + await tx.delete(schema.tokenRoutes).where(eq(schema.tokenRoutes.id, existingRoute.id)).run(); + } } + }); + + console.log('[backup] Cleanup completed in', Date.now() - cleanupStartTime, 'ms'); + // 第三步:插入辅助数据(小事务) + console.log('[backup] Starting auxiliary data insertion transaction'); + const auxInsertStartTime = Date.now(); + await db.transaction(async (tx) => { if (shouldReplaceSiteDisabledModels) { - for (const row of section.siteDisabledModels || []) { - await tx.insert(schema.siteDisabledModels).values({ - siteId: row.siteId, - modelName: row.modelName, - }).run(); - } + const siteDisabledModelsRecords = (section.siteDisabledModels || []).map((row) => ({ + siteId: row.siteId, + modelName: row.modelName, + })); + await batchInsertHelper(tx, schema.siteDisabledModels, siteDisabledModelsRecords); } const importedManualModelKeys = new Set(); if (shouldReplaceManualModels) { const checkedAt = new Date().toISOString(); - for (const row of section.manualModels || []) { + const manualModelsRecords = (section.manualModels || []).map((row) => { const accountKey = importedIndexes.accountKeyById.get(row.accountId); if (accountKey) { importedManualModelKeys.add(buildModelAvailabilityIdentityKey(accountKey, row.modelName)); } - await tx.insert(schema.modelAvailability).values({ + return { accountId: row.accountId, modelName: row.modelName, available: true, isManual: true, latencyMs: null, checkedAt, - }).run(); - } - } - - for (const row of runtimeState.nonManualAvailability) { - if (!row.accountKey) continue; - const accountId = importedIndexes.accountIdByKey.get(row.accountKey); - if (!accountId) continue; - const modelKey = buildModelAvailabilityIdentityKey(row.accountKey, row.modelName); - if (importedManualModelKeys.has(modelKey)) continue; - - await tx.insert(schema.modelAvailability).values({ - accountId, - modelName: row.modelName, - available: row.available, - isManual: false, - latencyMs: row.latencyMs ?? null, - checkedAt: row.checkedAt, - }).run(); + }; + }); + await batchInsertHelper(tx, schema.modelAvailability, manualModelsRecords); } - for (const row of runtimeState.tokenAvailability) { - if (!row.tokenKey) continue; - const tokenId = importedIndexes.tokenIdByKey.get(row.tokenKey); - if (!tokenId) continue; - - await tx.insert(schema.tokenModelAvailability).values({ - tokenId, - modelName: row.modelName, - available: row.available, - latencyMs: row.latencyMs ?? null, - checkedAt: row.checkedAt, - }).run(); - } + const nonManualAvailabilityRecords = runtimeState.nonManualAvailability + .filter((row) => { + if (!row.accountKey) return false; + const accountId = importedIndexes.accountIdByKey.get(row.accountKey); + if (!accountId) return false; + return true; + }) + .map((row) => { + const accountKey = row.accountKey!; + const accountId = importedIndexes.accountIdByKey.get(accountKey)!; + const modelKey = buildModelAvailabilityIdentityKey(accountKey, row.modelName); + return { + accountId, + modelName: row.modelName, + available: row.available, + isManual: false, + latencyMs: row.latencyMs ?? null, + checkedAt: row.checkedAt, + }; + }); + await batchInsertHelper(tx, schema.modelAvailability, nonManualAvailabilityRecords); + + const tokenAvailabilityRecords = runtimeState.tokenAvailability + .filter((row) => { + if (!row.tokenKey) return false; + const tokenId = importedIndexes.tokenIdByKey.get(row.tokenKey); + return !!tokenId; + }) + .map((row) => { + const tokenKey = row.tokenKey!; + const tokenId = importedIndexes.tokenIdByKey.get(tokenKey)!; + return { + tokenId, + modelName: row.modelName, + available: row.available, + latencyMs: row.latencyMs ?? null, + checkedAt: row.checkedAt, + }; + }); + await batchInsertHelper(tx, schema.tokenModelAvailability, tokenAvailabilityRecords); + const siteAnnouncementsRecords: typeof schema.siteAnnouncements.$inferInsert[] = []; for (const row of runtimeState.siteAnnouncements) { if (!row.siteKey) continue; const siteId = importedIndexes.siteIdByKey.get(row.siteKey); if (!siteId) continue; - - await tx.insert(schema.siteAnnouncements).values({ + siteAnnouncementsRecords.push({ siteId, platform: row.platform, sourceKey: row.sourceKey, @@ -1745,13 +2019,17 @@ async function importAccountsSection(section: AccountsBackupSection): Promise() - : new Map(runtimeState.downstreamApiKeyIdByKey); - if (shouldReplaceDownstreamApiKeys) { + // 第四步:插入下游API密钥(小事务) + if (shouldReplaceDownstreamApiKeys) { + console.log('[backup] Starting downstream API keys insertion transaction'); + const downstreamInsertStartTime = Date.now(); + await db.transaction(async (tx) => { for (const row of section.downstreamApiKeys || []) { const normalizedKey = asString(row.key); if (!normalizedKey) continue; @@ -1775,28 +2053,29 @@ async function importAccountsSection(section: AccountsBackupSection): Promise { + const proxyLogsRecords = runtimeState.proxyLogs.map((row) => { const accountId = row.accountKey ? (importedIndexes.accountIdByKey.get(row.accountKey) ?? null) : null; const routeId = row.routeKey ? (importedIndexes.routeIdByKey.get(row.routeKey) ?? null) : null; const channelId = row.channelKey ? (importedIndexes.channelIdByKey.get(row.channelKey) ?? null) : null; - const downstreamApiKeyId = row.downstreamApiKeyKey - ? (downstreamApiKeyIdByKey.get(row.downstreamApiKeyKey) ?? null) - : null; - - await tx.insert(schema.proxyLogs).values({ + return { id: row.id, routeId, channelId, accountId, - downstreamApiKeyId, + downstreamApiKeyId: null, // 简化处理,暂时设为null modelRequested: row.modelRequested ?? null, modelActual: row.modelActual ?? null, status: row.status ?? null, @@ -1814,38 +2093,66 @@ async function importAccountsSection(section: AccountsBackupSection): Promise row.accountKey && importedIndexes.accountIdByKey.get(row.accountKey)) + .map((row) => { + const accountKey = row.accountKey!; + return { + id: row.id, + accountId: importedIndexes.accountIdByKey.get(accountKey)!, + status: row.status, + message: row.message ?? null, + reward: row.reward ?? null, + createdAt: row.createdAt, + }; + }); + await batchInsertHelper(tx, schema.checkinLogs, checkinLogsRecords); + console.log('[backup] checkinLogs insertion completed in', Date.now() - logsInsertStartTime, 'ms, count:', checkinLogsRecords.length); }); -} - -async function importPreferencesSection(section: PreferencesBackupSection): Promise> { + console.log('[backup] Logs insertion completed in', Date.now() - logsInsertStartTime, 'ms'); + + console.log('[backup] importAccountsSection completed in', Date.now() - startTime, 'ms'); + + // 计算统计数据 + stats.newSites = section.sites.length; + stats.newAccounts = section.accounts.length; + stats.newTokens = section.accountTokens.length; + + return stats; +} + +async function importPreferencesSection(section: PreferencesBackupSection): Promise<{ + applied: Array<{ key: string; value: unknown }>; + newSettings: number; + updatedSettings: number; +}> { const applied: Array<{ key: string; value: unknown }> = []; + let newSettings = 0; + let updatedSettings = 0; await db.transaction(async (tx) => { for (const row of section.settings) { if (!isSettingValueAcceptable(row.key, row.value)) continue; + // 检查设置是否已存在 + const existingRow = await tx.select({ value: schema.settings.value }).from(schema.settings).where(eq(schema.settings.key, row.key)).get(); + if (existingRow) { + updatedSettings++; + } else { + newSettings++; + } + await upsertSetting(row.key, row.value, tx); applied.push({ key: row.key, value: row.value }); } }); - return applied; + return { applied, newSettings, updatedSettings }; } export async function importBackup(data: RawBackupData): Promise { @@ -1872,23 +2179,72 @@ export async function importBackup(data: RawBackupData): Promise = []; + const errors: string[] = []; + + // 初始化统计数据 + let accountsStats = { + newSites: 0, + updatedSites: 0, + newAccounts: 0, + updatedAccounts: 0, + newTokens: 0, + updatedTokens: 0, + }; + + let settingsStats = { + newSettings: 0, + updatedSettings: 0, + }; if (accountsRequested) { if (!accountsSection) { - throw new Error('导入数据格式错误:账号数据结构不正确'); + errors.push('导入数据格式错误:账号数据结构不正确,请检查备份文件格式是否完整'); + } else { + try { + accountsStats = await importAccountsSection(accountsSection); + accountsImported = true; + } catch (error: any) { + errors.push(`账号导入失败:${error.message},请检查账号数据是否完整和格式是否正确`); + } } - await importAccountsSection(accountsSection); - accountsImported = true; } if (preferencesRequested) { if (!preferencesSection) { - throw new Error('导入数据格式错误:设置数据结构不正确'); + errors.push('导入数据格式错误:设置数据结构不正确,请检查备份文件格式是否完整'); + } else { + try { + const result = await importPreferencesSection(preferencesSection); + appliedSettings = result.applied; + settingsStats = { + newSettings: result.newSettings, + updatedSettings: result.updatedSettings, + }; + preferencesImported = true; + } catch (error: any) { + errors.push(`设置导入失败:${error.message},请检查设置数据是否有效和格式是否正确`); + } } - appliedSettings = await importPreferencesSection(preferencesSection); - preferencesImported = true; } + // 合并统计数据 + const summary = { + importedSites: importMetadata.summary?.importedSites || 0, + importedAccounts: importMetadata.summary?.importedAccounts || 0, + importedProfiles: importMetadata.summary?.importedProfiles || 0, + importedApiKeyConnections: importMetadata.summary?.importedApiKeyConnections || 0, + skippedAccounts: importMetadata.summary?.skippedAccounts || 0, + ignoredSections: importMetadata.summary?.ignoredSections || [], + newSites: accountsStats.newSites, + updatedSites: accountsStats.updatedSites, + newAccounts: accountsStats.newAccounts, + updatedAccounts: accountsStats.updatedAccounts, + newTokens: accountsStats.newTokens, + updatedTokens: accountsStats.updatedTokens, + newSettings: settingsStats.newSettings, + updatedSettings: settingsStats.updatedSettings, + }; + return { allImported: (!accountsRequested || accountsImported) && (!preferencesRequested || preferencesImported), sections: { @@ -1896,8 +2252,9 @@ export async function importBackup(data: RawBackupData): Promise 0 ? errors : undefined, }; } diff --git a/src/server/services/modelAvailabilityProbeService.ts b/src/server/services/modelAvailabilityProbeService.ts index 8a6fdf30..f40e821b 100644 --- a/src/server/services/modelAvailabilityProbeService.ts +++ b/src/server/services/modelAvailabilityProbeService.ts @@ -66,6 +66,50 @@ export type ModelAvailabilityProbeExecutionResult = { let probeSchedulerTimer: ReturnType | null = null; const probeAccountLeases = new Set(); +const probeSiteLeases = new Set(); + +// 站点测活记录,用于频率限制 +interface SiteProbeRecord { + count: number; + lastProbeTime: number; +} +const siteProbeRecords = new Map(); + +// 检查站点测活频率是否超过限制 +function checkSiteProbeFrequency(siteId: number): boolean { + const now = Date.now(); + const FIVE_MINUTES = 5 * 60 * 1000; + const MAX_PROBES_PER_FIVE_MINUTES = config.modelAvailabilityProbeMaxPerFiveMinutes; + + const record = siteProbeRecords.get(siteId); + if (!record) { + // 第一次测活,创建记录 + siteProbeRecords.set(siteId, { + count: 1, + lastProbeTime: now, + }); + return true; + } + + // 检查是否在 5 分钟窗口内 + if (now - record.lastProbeTime < FIVE_MINUTES) { + if (record.count >= MAX_PROBES_PER_FIVE_MINUTES) { + return false; // 超过限制 + } + // 在窗口内,增加计数 + siteProbeRecords.set(siteId, { + count: record.count + 1, + lastProbeTime: record.lastProbeTime, + }); + } else { + // 超过 5 分钟,重置记录 + siteProbeRecords.set(siteId, { + count: 1, + lastProbeTime: now, + }); + } + return true; +} async function mapWithConcurrency( items: T[], @@ -204,15 +248,19 @@ async function loadProbeTargetsForAccount(context: ProbeAccountContext): Promise return targets; } -function tryAcquireProbeAccountLease(accountId: number): boolean { +function tryAcquireProbeAccountLease(accountId: number, siteId: number): boolean { if (!Number.isFinite(accountId) || accountId <= 0) return false; + if (!Number.isFinite(siteId) || siteId <= 0) return false; if (probeAccountLeases.has(accountId)) return false; + if (probeSiteLeases.has(siteId)) return false; probeAccountLeases.add(accountId); + probeSiteLeases.add(siteId); return true; } -function releaseProbeAccountLease(accountId: number): void { +function releaseProbeAccountLease(accountId: number, siteId: number): void { probeAccountLeases.delete(accountId); + probeSiteLeases.delete(siteId); } function buildSkippedProbeAccountResult(input: { @@ -257,65 +305,104 @@ export async function executeModelAvailabilityProbe(input: { accountId?: number; rebuildRoutes?: boolean; } = {}): Promise { - const accountIds = input.accountId - ? [input.accountId] - : (await db.select({ id: schema.accounts.id }) + // 获取所有活跃账号及其站点信息 + const accountRows = input.accountId + ? (await db.select({ + id: schema.accounts.id, + siteId: schema.accounts.siteId, + }) + .from(schema.accounts) + .where(eq(schema.accounts.id, input.accountId)) + .all()) + : (await db.select({ + id: schema.accounts.id, + siteId: schema.accounts.siteId, + }) .from(schema.accounts) .where(eq(schema.accounts.status, 'active')) - .all()).map((row) => row.id); + .all()); + + // 按站点分组 + const accountsBySite = new Map(); + for (const row of accountRows) { + if (!accountsBySite.has(row.siteId)) { + accountsBySite.set(row.siteId, []); + } + accountsBySite.get(row.siteId)?.push(row.id); + } const results: ModelAvailabilityProbeAccountResult[] = []; let shouldRebuildRoutes = false; - for (const accountId of accountIds) { - const context = await loadActiveProbeAccountContext(accountId); - if (!context) { - continue; - } - if (!tryAcquireProbeAccountLease(accountId)) { - results.push(buildSkippedProbeAccountResult({ - accountId, - siteId: context.site.id, - message: 'model availability probe already running for account', - })); - continue; - } - - try { - const targets = await loadProbeTargetsForAccount(context); - if (targets.length <= 0) { + // 并行处理不同站点 + const sitePromises = Array.from(accountsBySite.entries()).map(async ([siteId, siteAccountIds]) => { + // 站点级串行处理 + for (const accountId of siteAccountIds) { + const context = await loadActiveProbeAccountContext(accountId); + if (!context) { + continue; + } + if (!checkSiteProbeFrequency(siteId)) { + results.push(buildSkippedProbeAccountResult({ + accountId, + siteId: context.site.id, + message: 'model availability probe frequency limit exceeded for site', + })); + continue; + } + if (!tryAcquireProbeAccountLease(accountId, siteId)) { results.push(buildSkippedProbeAccountResult({ accountId, siteId: context.site.id, - message: 'no discovered models to probe', + message: 'model availability probe already running for account or site', })); continue; } - let supported = 0; - let unsupported = 0; - let inconclusive = 0; - let skipped = 0; - let updatedRows = 0; - let failed = false; - - const probeOutcomes = await mapWithConcurrency( - targets, - config.modelAvailabilityProbeConcurrency, - async (target) => { + try { + const targets = await loadProbeTargetsForAccount(context); + if (targets.length <= 0) { + results.push(buildSkippedProbeAccountResult({ + accountId, + siteId: context.site.id, + message: 'no discovered models to probe', + })); + continue; + } + + let supported = 0; + let unsupported = 0; + let inconclusive = 0; + let skipped = 0; + let updatedRows = 0; + let failed = false; + + // 串行处理模型(TPM=1) + type ProbeOutcome = { + target: ProbeTarget; + probe: { status: ProbeStatus; latencyMs: number | null; reason: string }; + touched: boolean; + availabilityChanged: boolean; + failed: boolean; + }; + const probeOutcomes: ProbeOutcome[] = []; + for (const target of targets) { try { + // 添加延迟确保 TPM 限制 + const delayMs = Math.round(60000 / config.modelAvailabilityProbeTpm); + await new Promise(resolve => setTimeout(resolve, delayMs)); const probe = await probeSingleTarget(target); const update = await updateProbeRow(target, probe.status, probe.latencyMs); - return { + probeOutcomes.push({ target, probe, touched: update.touched, availabilityChanged: update.availabilityChanged, failed: false, - }; + }); } catch (error) { console.warn(`[model-probe] account ${accountId} model ${target.modelName} probe failed`, error); - return { + probeOutcomes.push({ target, probe: { status: 'inconclusive' as const, @@ -325,45 +412,48 @@ export async function executeModelAvailabilityProbe(input: { touched: false, availabilityChanged: false, failed: true, - }; + }); } - }, - ); - - for (const outcome of probeOutcomes) { - if (outcome.probe.status === 'supported') supported += 1; - if (outcome.probe.status === 'unsupported') unsupported += 1; - if (outcome.probe.status === 'inconclusive') inconclusive += 1; - if (outcome.probe.status === 'skipped') skipped += 1; - if (outcome.touched) { - updatedRows += 1; - } - if (outcome.availabilityChanged) { - shouldRebuildRoutes = true; } - if (outcome.failed) { - failed = true; + + for (const outcome of probeOutcomes) { + if (outcome.probe.status === 'supported') supported += 1; + if (outcome.probe.status === 'unsupported') unsupported += 1; + if (outcome.probe.status === 'inconclusive') inconclusive += 1; + if (outcome.probe.status === 'skipped') skipped += 1; + if (outcome.touched) { + updatedRows += 1; + } + if (outcome.availabilityChanged) { + shouldRebuildRoutes = true; + } + if (outcome.failed) { + failed = true; + } } - } - results.push({ - accountId, - siteId: context.site.id, - status: failed ? 'failed' : 'success', - scanned: targets.length, - supported, - unsupported, - inconclusive, - skipped, - updatedRows, - message: failed - ? 'model availability probe finished with partial failures' - : 'model availability probe finished', - }); - } finally { - releaseProbeAccountLease(accountId); + results.push({ + accountId, + siteId: context.site.id, + status: failed ? 'failed' : 'success', + scanned: targets.length, + supported, + unsupported, + inconclusive, + skipped, + updatedRows, + message: failed + ? 'model availability probe finished with partial failures' + : 'model availability probe finished', + }); + } finally { + releaseProbeAccountLease(accountId, siteId); + } } - } + }); + + // 等待所有站点处理完成 + await Promise.all(sitePromises); let rebuiltRoutes = false; if (input.rebuildRoutes !== false && shouldRebuildRoutes) { @@ -428,7 +518,10 @@ export function startModelAvailabilityProbeScheduler(intervalMs = config.modelAv title: '后台模型可用性探测', }); }, safeIntervalMs); - probeSchedulerTimer.unref?.(); + // 尝试 unref,如果可用的话 + if (typeof probeSchedulerTimer === 'object' && probeSchedulerTimer !== null && 'unref' in probeSchedulerTimer) { + probeSchedulerTimer.unref(); + } return { enabled: true, intervalMs: safeIntervalMs, @@ -444,4 +537,5 @@ export function stopModelAvailabilityProbeScheduler() { export function __resetModelAvailabilityProbeExecutionStateForTests(): void { probeAccountLeases.clear(); + probeSiteLeases.clear(); } diff --git a/src/server/services/runtimeModelProbe.ts b/src/server/services/runtimeModelProbe.ts index fe01ea5e..bf6b304d 100644 --- a/src/server/services/runtimeModelProbe.ts +++ b/src/server/services/runtimeModelProbe.ts @@ -9,6 +9,7 @@ import { } from './upstreamEndpointRuntime.js'; import { executeEndpointFlow, type BuiltEndpointRequest } from '../proxy-core/orchestration/endpointFlow.js'; import type { schema } from '../db/index.js'; +import { config } from '../config.js'; export type RuntimeModelProbeStatus = 'supported' | 'unsupported' | 'inconclusive' | 'skipped'; @@ -58,13 +59,59 @@ function classifyUnsupportedFailure(status: number, rawErrorText: string): boole return DEFINITE_UNSUPPORTED_PATTERNS.some((pattern) => pattern.test(normalized)); } -function buildProbeBody(modelName: string): Record { +// 多样化提示词库 +const DEFAULT_PROBE_PROMPTS = [ + 'Hello, can you help me?', + 'What is your name?', + 'How are you today?', + 'Can you say hello?', + 'Tell me a short sentence.', + 'What time is it?', + 'How do you do?', + 'Nice to meet you.', + 'Can you help with a question?', + 'Greetings!', +]; + +// 使用配置的提示词或默认提示词 +const PROBE_PROMPTS = config.modelAvailabilityProbePrompts.length > 0 + ? config.modelAvailabilityProbePrompts + : DEFAULT_PROBE_PROMPTS; + +// 站点级提示词使用记录 +const sitePromptUsage = new Map>(); + +function getRandomPrompt(siteId: string): string { + const usedPrompts = sitePromptUsage.get(siteId) || new Set(); + const availablePrompts = PROBE_PROMPTS.filter(prompt => !usedPrompts.has(prompt)); + + let selectedPrompt: string; + if (availablePrompts.length > 0) { + // 从可用提示词中选择 + selectedPrompt = availablePrompts[Math.floor(Math.random() * availablePrompts.length)]; + } else { + // 如果所有提示词都用过了,重置并重新选择 + const resetPrompts = new Set(); + sitePromptUsage.set(siteId, resetPrompts); + selectedPrompt = PROBE_PROMPTS[Math.floor(Math.random() * PROBE_PROMPTS.length)]; + } + + // 记录使用的提示词 + const updatedUsedPrompts = sitePromptUsage.get(siteId) || new Set(); + updatedUsedPrompts.add(selectedPrompt); + sitePromptUsage.set(siteId, updatedUsedPrompts); + + return selectedPrompt; +} + +function buildProbeBody(modelName: string, siteId: string): Record { + const prompt = getRandomPrompt(siteId); return { model: modelName, messages: [ { role: 'user', - content: 'Reply with OK.', + content: prompt, }, ], max_tokens: 8, @@ -73,17 +120,20 @@ function buildProbeBody(modelName: string): Record { } async function withTimeout(fn: () => Promise, timeoutMs: number, timeoutMessage: string): Promise { - let timer: ReturnType | null = null; + let timerId: ReturnType | null = null; try { return await Promise.race([ fn(), new Promise((_, reject) => { - timer = setTimeout(() => reject(new Error(timeoutMessage)), timeoutMs); - timer.unref?.(); + timerId = setTimeout(() => reject(new Error(timeoutMessage)), timeoutMs); + // 尝试 unref,如果可用的话 + if (typeof timerId === 'object' && timerId !== null && 'unref' in timerId) { + timerId.unref(); + } }), ]); } finally { - if (timer) clearTimeout(timer); + if (timerId) clearTimeout(timerId); } } @@ -155,7 +205,7 @@ export async function probeRuntimeModel(input: { account: input.account, downstreamHeaders: {}, }); - const openaiBody = buildProbeBody(input.modelName); + const openaiBody = buildProbeBody(input.modelName, String(input.site.id)); const channelProxyUrl = resolveChannelProxyUrl(input.site, input.account.extraConfig); const abortController = new AbortController(); const remainingExecutionTimeoutMs = resolveRemainingTimeoutMs( @@ -165,7 +215,10 @@ export async function probeRuntimeModel(input: { const abortTimer = setTimeout(() => { abortController.abort(new Error(`runtime model probe timeout (${Math.round(input.timeoutMs / 1000)}s)`)); }, remainingExecutionTimeoutMs); - abortTimer.unref?.(); + // 尝试 unref,如果可用的话 + if (typeof abortTimer === 'object' && abortTimer !== null && 'unref' in abortTimer) { + abortTimer.unref(); + } const buildRequest = (endpoint: UpstreamEndpoint): BuiltEndpointRequest => { const request = buildUpstreamEndpointRequest({ diff --git a/src/web/api.ts b/src/web/api.ts index 473e6b00..4a2bf74a 100644 --- a/src/web/api.ts +++ b/src/web/api.ts @@ -897,6 +897,7 @@ export const api = { importOAuthConnections: (data: Record) => request('/api/oauth/import', { method: 'POST', body: JSON.stringify(Array.isArray(data.items) ? data : { data }), + timeoutMs: 300_000, }) as Promise, createOAuthRouteUnit: (data: { accountIds: number[]; name: string; strategy: OAuthRouteUnitStrategy }) => request('/api/oauth/route-units', { method: 'POST', @@ -1023,6 +1024,7 @@ export const api = { request('/api/settings/backup/import', { method: 'POST', body: JSON.stringify({ data }), + timeoutMs: 300_000, }), getBackupWebdavConfig: () => request('/api/settings/backup/webdav'), saveBackupWebdavConfig: (data: { @@ -1049,7 +1051,7 @@ export const api = { request('/api/settings/backup/webdav/import', { method: 'POST', body: JSON.stringify({}), - timeoutMs: 60_000, + timeoutMs: 300_000, }), clearRuntimeCache: () => request('/api/settings/maintenance/clear-cache', { method: 'POST' }), clearUsageData: () => request('/api/settings/maintenance/clear-usage', { method: 'POST' }), diff --git a/test-long-running-backup.js b/test-long-running-backup.js new file mode 100644 index 00000000..6e15661b --- /dev/null +++ b/test-long-running-backup.js @@ -0,0 +1,284 @@ +import { mkdtempSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +// 生成大型测试数据 +function generateLargeBackupData() { + const sites = []; + const accounts = []; + const accountTokens = []; + const tokenRoutes = []; + const routeChannels = []; + + // 生成 100 个站点 + for (let i = 1; i <= 100; i++) { + sites.push({ + id: i, + name: `Site ${i}`, + url: `https://site${i}.example.com`, + platform: 'new-api', + status: 'active', + isPinned: false, + sortOrder: i - 1, + globalWeight: 1, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + + // 每个站点生成 10 个账号 + for (let j = 1; j <= 10; j++) { + const accountId = (i - 1) * 10 + j; + accounts.push({ + id: accountId, + siteId: i, + username: `user${accountId}`, + accessToken: `token${accountId}`, + status: 'active', + isPinned: false, + sortOrder: j - 1, + checkinEnabled: true, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + + // 每个账号生成 5 个令牌 + for (let k = 1; k <= 5; k++) { + const tokenId = (accountId - 1) * 5 + k; + accountTokens.push({ + id: tokenId, + accountId, + name: `Token ${k}`, + token: `token_value${tokenId}`, + tokenGroup: 'default', + valueStatus: 'ready', + source: 'manual', + enabled: true, + isDefault: k === 1, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + } + } + } + + // 生成 50 个路由 + for (let i = 1; i <= 50; i++) { + tokenRoutes.push({ + id: i, + modelPattern: `model${i}.*`, + displayName: `Route ${i}`, + modelMapping: JSON.stringify({ 'model': `model${i}` }), + routeMode: 'pattern', + routingStrategy: 'weighted', + enabled: true, + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + }); + + // 每个路由生成 20 个通道 + for (let j = 1; j <= 20; j++) { + const channelId = (i - 1) * 20 + j; + routeChannels.push({ + id: channelId, + routeId: i, + accountId: ((i - 1) * 20 + j) % 1000 + 1, // 随机账号 + tokenId: ((i - 1) * 20 + j) % 5000 + 1, // 随机令牌 + priority: j, + weight: 1, + enabled: true, + manualOverride: false, + }); + } + } + + return { + version: '2.1', + timestamp: Date.now(), + accounts: { + sites, + accounts, + accountTokens, + tokenRoutes, + routeChannels, + routeGroupSources: [], + }, + }; +} + +// 主测试函数 +async function runLongRunningBackupTest() { + console.log('开始测试长时间运行的备份导入操作...'); + + // 创建临时数据目录 + const dataDir = mkdtempSync(join(tmpdir(), 'metapi-backup-test-')); + process.env.DATA_DIR = dataDir; + + try { + // 导入数据库模块 + await import('./src/server/db/migrate.ts'); + const { db, schema } = await import('./src/server/db/index.ts'); + const { importBackup, exportBackup } = await import('./src/server/services/backupService.ts'); + const { startBackgroundTask } = await import('./src/server/services/backgroundTaskService.ts'); + + // 清理数据库 + await db.delete(schema.routeChannels).run(); + await db.delete(schema.routeGroupSources).run(); + await db.delete(schema.tokenRoutes).run(); + await db.delete(schema.tokenModelAvailability).run(); + await db.delete(schema.modelAvailability).run(); + await db.delete(schema.proxyLogs).run(); + await db.delete(schema.checkinLogs).run(); + await db.delete(schema.siteAnnouncements).run(); + await db.delete(schema.siteDisabledModels).run(); + await db.delete(schema.accountTokens).run(); + await db.delete(schema.accounts).run(); + await db.delete(schema.sites).run(); + await db.delete(schema.downstreamApiKeys).run(); + await db.delete(schema.settings).run(); + await db.delete(schema.events).run(); + + console.log('数据库清理完成,开始生成大型测试数据...'); + + // 生成大型测试数据 + const largeBackupData = generateLargeBackupData(); + console.log(`生成的数据规模:`); + console.log(`- 站点数量: ${largeBackupData.accounts.sites.length}`); + console.log(`- 账号数量: ${largeBackupData.accounts.accounts.length}`); + console.log(`- 令牌数量: ${largeBackupData.accounts.accountTokens.length}`); + console.log(`- 路由数量: ${largeBackupData.accounts.tokenRoutes.length}`); + console.log(`- 通道数量: ${largeBackupData.accounts.routeChannels.length}`); + + // 记录开始时间 + const startTime = Date.now(); + console.log(`\n开始导入备份数据...`); + + // 使用后台任务执行导入 + const { task, reused } = startBackgroundTask( + { + type: 'maintenance', + title: 'Large Backup Import Test', + dedupeKey: 'large-backup-import-test', + notifyOnFailure: true, + successMessage: (task) => `Large backup import completed in ${Math.round((Date.now() - startTime) / 1000)}s`, + failureMessage: (task) => `Large backup import failed: ${task.error || 'unknown error'}`, + }, + async () => { + return await importBackup(largeBackupData); + } + ); + + console.log(`任务已启动,ID: ${task.id}, 状态: ${task.status}`); + + // 等待任务完成 + let attempts = 0; + const maxAttempts = 300; // 最多等待 10 分钟(300 * 2秒) + while (attempts < maxAttempts) { + const { getBackgroundTask } = await import('./src/server/services/backgroundTaskService.ts'); + const currentTask = getBackgroundTask(task.id); + + if (!currentTask) { + console.error('任务不存在'); + break; + } + + console.log(`任务状态: ${currentTask.status} (${attempts * 2}s)`); + + if (currentTask.status === 'succeeded' || currentTask.status === 'failed') { + break; + } + + attempts++; + // 每 2 秒检查一次 + await new Promise(resolve => setTimeout(resolve, 2000)); + } + + // 验证任务结果 + const { getBackgroundTask } = await import('./src/server/services/backgroundTaskService.ts'); + const finalTask = getBackgroundTask(task.id); + + if (!finalTask) { + console.error('任务执行失败:任务不存在'); + return false; + } + + if (finalTask.status === 'failed') { + console.error(`任务执行失败:${finalTask.error}`); + return false; + } + + if (finalTask.status === 'succeeded') { + console.log(`\n任务执行成功!`); + console.log(`完成时间: ${Math.round((Date.now() - startTime) / 1000)}s`); + + // 验证数据导入成功 + const siteCount = await db.select({ count: db.fn.count() }).from(schema.sites).get(); + const accountCount = await db.select({ count: db.fn.count() }).from(schema.accounts).get(); + const tokenCount = await db.select({ count: db.fn.count() }).from(schema.accountTokens).get(); + const routeCount = await db.select({ count: db.fn.count() }).from(schema.tokenRoutes).get(); + const channelCount = await db.select({ count: db.fn.count() }).from(schema.routeChannels).get(); + + console.log(`\n导入验证结果:`); + console.log(`- 站点数量: ${Number(siteCount.count)} (预期: 100)`); + console.log(`- 账号数量: ${Number(accountCount.count)} (预期: 1000)`); + console.log(`- 令牌数量: ${Number(tokenCount.count)} (预期: 5000)`); + console.log(`- 路由数量: ${Number(routeCount.count)} (预期: 50)`); + console.log(`- 通道数量: ${Number(channelCount.count)} (预期: 1000)`); + + // 验证数据量是否正确 + const allCorrect = + Number(siteCount.count) === 100 && + Number(accountCount.count) === 1000 && + Number(tokenCount.count) === 5000 && + Number(routeCount.count) === 50 && + Number(channelCount.count) === 1000; + + if (allCorrect) { + console.log('\n✓ 所有数据验证通过!'); + } else { + console.log('\n✗ 数据验证失败!'); + } + + // 测试导出 + console.log('\n开始测试导出功能...'); + const exportStartTime = Date.now(); + const exportResult = await exportBackup('all'); + const exportTime = Math.round((Date.now() - exportStartTime) / 1000); + console.log(`导出完成,耗时: ${exportTime}s`); + + if (exportResult) { + console.log('✓ 导出成功!'); + console.log(`- 导出版本: ${exportResult.version}`); + console.log(`- 导出站点数量: ${exportResult.accounts?.sites?.length}`); + console.log(`- 导出账号数量: ${exportResult.accounts?.accounts?.length}`); + } else { + console.log('✗ 导出失败!'); + } + + return allCorrect && !!exportResult; + } + + } catch (error) { + console.error('测试过程中出现错误:', error); + return false; + } finally { + // 清理临时目录 + delete process.env.DATA_DIR; + try { + rmSync(dataDir, { recursive: true, force: true }); + console.log('\n临时目录已清理'); + } catch (error) { + console.error('清理临时目录时出错:', error); + } + } +} + +// 运行测试 +runLongRunningBackupTest() + .then(success => { + console.log(`\n测试结果: ${success ? '成功' : '失败'}`); + process.exit(success ? 0 : 1); + }) + .catch(error => { + console.error('测试执行出错:', error); + process.exit(1); + }); diff --git a/test-long-running.js b/test-long-running.js new file mode 100644 index 00000000..d46c308e --- /dev/null +++ b/test-long-running.js @@ -0,0 +1,120 @@ +// 简单的长时间运行数据库操作测试脚本 +import { db, runtimeDbDialect } from './src/server/db/index.js'; + +async function simulateLongRunningOperation(seconds) { + console.log(`Starting long running operation (target: ${seconds} seconds)`); + const startTime = Date.now(); + + try { + // 创建临时表 + if (runtimeDbDialect === 'sqlite') { + await db.execute(` + CREATE TABLE IF NOT EXISTS test_long_running ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + data TEXT, + created_at TEXT DEFAULT CURRENT_TIMESTAMP + ) + `); + } else if (runtimeDbDialect === 'mysql') { + await db.execute(` + CREATE TABLE IF NOT EXISTS test_long_running ( + id INT PRIMARY KEY AUTO_INCREMENT, + data TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) ENGINE=InnoDB + `); + } else if (runtimeDbDialect === 'postgres') { + await db.execute(` + CREATE TABLE IF NOT EXISTS test_long_running ( + id SERIAL PRIMARY KEY, + data TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + `); + } + + // 分批次插入数据 + console.log('Inserting test data...'); + for (let i = 0; i < 50; i++) { + const values = []; + for (let j = 0; j < 1000; j++) { + const data = `test_data_${i}_${j}_${Math.random()}`; + values.push(`('${data}')`); + } + + await db.execute(` + INSERT INTO test_long_running (data) VALUES ${values.join(', ')} + `); + + if (i % 10 === 0) { + const currentTime = (Date.now() - startTime) / 1000; + console.log(`Progress: ${i * 1000} records inserted (${currentTime.toFixed(2)} seconds)`); + // 模拟处理时间 + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + // 清理临时表 + console.log('Cleaning up temporary table...'); + await db.execute('DROP TABLE IF EXISTS test_long_running'); + + const endTime = Date.now(); + const duration = (endTime - startTime) / 1000; + console.log(`Long running operation completed in ${duration.toFixed(2)} seconds`); + + if (duration >= seconds) { + console.log(`✅ SUCCESS: Operation lasted more than ${seconds} seconds without timeout`); + return true; + } else { + console.log(`⚠️ WARNING: Operation completed in ${duration.toFixed(2)} seconds (less than ${seconds} seconds)`); + return false; + } + } catch (error) { + console.error('❌ ERROR during long running operation:', error); + // 尝试清理临时表 + try { + await db.execute('DROP TABLE IF EXISTS test_long_running'); + } catch (cleanupError) { + console.error('Error during cleanup:', cleanupError); + } + return false; + } +} + +async function runTests() { + console.log('=== Long Running Database Operations Test ==='); + console.log(`Database dialect: ${runtimeDbDialect}`); + console.log('==========================================='); + + // 测试超过60秒的操作 + console.log('\nTest 1: Operation lasting more than 60 seconds'); + const test1Result = await simulateLongRunningOperation(60); + + // 测试多个长时间运行的操作 + console.log('\nTest 2: Multiple long-running operations'); + const results = []; + for (let i = 0; i < 3; i++) { + console.log(`\nRunning operation ${i + 1}/3`); + const result = await simulateLongRunningOperation(10); + results.push(result); + } + + // 测试数据库连接稳定性 + console.log('\nTest 3: Database connection stability'); + await simulateLongRunningOperation(30); + try { + const result = await db.execute('SELECT 1'); + console.log('✅ SUCCESS: Database connection is still stable'); + } catch (error) { + console.error('❌ ERROR: Database connection failed after long operation:', error); + } + + console.log('\n=== Test Summary ==='); + console.log(`Test 1 (60+ seconds): ${test1Result ? 'PASS' : 'FAIL'}`); + console.log(`Test 2 (Multiple operations): ${results.every(r => r) ? 'PASS' : 'FAIL'}`); + console.log('Test 3 (Connection stability): Check logs above'); + console.log('=================='); +} + +// 运行测试 +runTests().catch(console.error);