diff --git a/cloudflare-worker/src/index.ts b/cloudflare-worker/src/index.ts index 55b687ec..67aa8483 100644 --- a/cloudflare-worker/src/index.ts +++ b/cloudflare-worker/src/index.ts @@ -113,7 +113,7 @@ export default { for (let round = 0; round < 10; round++) { const result = await env.VECTORIZE.query(zeroVector, { topK: 100, - returnMetadata: false, + returnMetadata: 'none', }); const staleIds = result.matches .filter((m) => !keepSet.has(m.id)) diff --git a/cloudflare-worker/worker.js b/cloudflare-worker/worker.js index 79e8922b..6ddd496e 100644 --- a/cloudflare-worker/worker.js +++ b/cloudflare-worker/worker.js @@ -96,7 +96,7 @@ export default { for (let round = 0; round < 10; round++) { const result = await env.VECTORIZE.query(zeroVector, { topK: 100, - returnMetadata: false, + returnMetadata: 'none', }); const staleIds = result.matches .filter((m) => !keepSet.has(m.id)) diff --git a/server/src/db/schema.ts b/server/src/db/schema.ts index f5bfdc8d..e45b5931 100644 --- a/server/src/db/schema.ts +++ b/server/src/db/schema.ts @@ -149,4 +149,5 @@ export function initializeSchema(db: Database.Database): void { addColumnIfMissing(db, 'asset_filters', 'sort_order', 'INTEGER DEFAULT 0'); addColumnIfMissing(db, 'vector_search_configs', 'index_mode', "TEXT NOT NULL DEFAULT 'readme'"); addColumnIfMissing(db, 'vector_search_configs', 'readme_max_chars', 'INTEGER NOT NULL DEFAULT 6000'); + addColumnIfMissing(db, 'repositories', 'vector_indexed_at', 'TEXT'); } diff --git a/server/src/routes/repositories.ts b/server/src/routes/repositories.ts index 8199d6d7..dbf6779f 100644 --- a/server/src/routes/repositories.ts +++ b/server/src/routes/repositories.ts @@ -39,6 +39,7 @@ function transformRepo(row: Record) { category_locked: !!row.category_locked, last_edited: row.last_edited, subscribed_to_releases: !!row.subscribed_to_releases, + vector_indexed_at: row.vector_indexed_at ?? undefined, }; } @@ -117,6 +118,13 @@ router.put('/api/repositories', (req, res) => { res.status(400).json({ error: 'Each repository must have a valid non-negative stargazers_count', code: 'INVALID_STARGAZERS_COUNT' }); return; } + // 校验 vector_indexed_at:允许 null/undefined 或合法 ISO 8601 字符串 + if (repo.vector_indexed_at !== null && repo.vector_indexed_at !== undefined && repo.vector_indexed_at !== '') { + if (typeof repo.vector_indexed_at !== 'string' || isNaN(Date.parse(repo.vector_indexed_at))) { + res.status(400).json({ error: 'vector_indexed_at must be an ISO 8601 string or null', code: 'INVALID_VECTOR_INDEXED_AT' }); + return; + } + } } const stmt = db.prepare(` @@ -126,8 +134,8 @@ router.put('/api/repositories', (req, res) => { owner_login, owner_avatar_url, topics, ai_summary, ai_tags, ai_platforms, analyzed_at, analysis_failed, custom_description, custom_tags, custom_category, category_locked, last_edited, - subscribed_to_releases - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + subscribed_to_releases, vector_indexed_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(id) DO UPDATE SET name = excluded.name, full_name = excluded.full_name, @@ -152,7 +160,8 @@ router.put('/api/repositories', (req, res) => { custom_category = excluded.custom_category, category_locked = excluded.category_locked, last_edited = CASE WHEN excluded.last_edited IS NOT NULL AND excluded.last_edited != '' THEN excluded.last_edited ELSE repositories.last_edited END, - subscribed_to_releases = excluded.subscribed_to_releases + subscribed_to_releases = excluded.subscribed_to_releases, + vector_indexed_at = excluded.vector_indexed_at `); const deleteAllReleases = db.prepare('DELETE FROM releases'); @@ -198,7 +207,8 @@ router.put('/api/repositories', (req, res) => { repo.custom_description ?? null, JSON.stringify(Array.isArray(repo.custom_tags) ? repo.custom_tags : []), repo.custom_category ?? null, (repo.category_locked === true || repo.category_locked === 1) ? 1 : 0, repo.last_edited ?? null, - (repo.subscribed_to_releases === true || repo.subscribed_to_releases === 1) ? 1 : 0 + (repo.subscribed_to_releases === true || repo.subscribed_to_releases === 1) ? 1 : 0, + repo.vector_indexed_at ?? null ); count++; } @@ -232,10 +242,25 @@ router.patch('/api/repositories/:id', (req, res) => { category_locked: (v) => (v === true || v === 1) ? 1 : 0, last_edited: (v) => v, subscribed_to_releases: (v) => (v === true || v === 1) ? 1 : 0, + // 规范化:null/undefined/空字符串 → null;仅接受字符串(ISO 时间戳) + vector_indexed_at: (v) => + (v === null || v === undefined || v === '') ? null : v, description: (v) => v, name: (v) => v, }; + // 校验 vector_indexed_at 类型:只允许 null 或 ISO 8601 字符串,拒绝数字/布尔/对象/非日期字符串 + if ('vector_indexed_at' in updates) { + const v = updates.vector_indexed_at; + if (v !== null && v !== undefined && v !== '' && (typeof v !== 'string' || isNaN(Date.parse(v)))) { + res.status(400).json({ + error: 'vector_indexed_at must be an ISO string or null', + code: 'INVALID_VECTOR_INDEXED_AT', + }); + return; + } + } + const setClauses: string[] = []; const values: unknown[] = []; diff --git a/server/src/routes/sync.ts b/server/src/routes/sync.ts index b53daf7e..265a0fa3 100644 --- a/server/src/routes/sync.ts +++ b/server/src/routes/sync.ts @@ -108,8 +108,8 @@ router.post('/api/sync/import', (req, res) => { owner_login, owner_avatar_url, topics, ai_summary, ai_tags, ai_platforms, analyzed_at, analysis_failed, custom_description, custom_tags, custom_category, category_locked, last_edited, - subscribed_to_releases - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + subscribed_to_releases, vector_indexed_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `); for (const r of repos) { // 验证必需的字段 @@ -130,7 +130,8 @@ router.post('/api/sync/import', (req, res) => { r.custom_description ?? null, typeof r.custom_tags === 'string' ? r.custom_tags : JSON.stringify(r.custom_tags ?? []), r.custom_category ?? null, (r.category_locked === true || r.category_locked === 1) ? 1 : 0, r.last_edited ?? null, - r.subscribed_to_releases ? 1 : 0 + r.subscribed_to_releases ? 1 : 0, + r.vector_indexed_at ?? null ); } counts.repositories = repos.length; diff --git a/src/components/SearchBar.test.tsx b/src/components/SearchBar.test.tsx index 3fa5929d..9d188327 100644 --- a/src/components/SearchBar.test.tsx +++ b/src/components/SearchBar.test.tsx @@ -9,6 +9,13 @@ vi.mock('../store/useAppStore', () => ({ getAllCategories: vi.fn(() => []), })); +vi.mock('../hooks/useDialog', () => ({ + useDialog: () => ({ + toast: vi.fn(), + confirm: vi.fn(), + }), +})); + const localStorageMock = (() => { let store: Record = {}; @@ -62,25 +69,34 @@ const baseStoreState = () => ({ customCategories: [], hiddenDefaultCategoryIds: [], defaultCategoryOverrides: {}, + vectorSearchConfig: { enabled: false, workerUrl: '', authToken: '', embeddingConfigId: '', indexMode: 'readme' as const, readmeMaxChars: 6000 }, + vectorSearchStatus: { connected: false, vectorCount: 0, dimensions: 0 }, + embeddingConfigs: [], }); const mockUseAppStore = vi.mocked(useAppStore); +// Track the current mock state so getState() returns the same overrides as the hook. +let currentState = baseStoreState(); +(mockUseAppStore as unknown as { getState: () => ReturnType }).getState = + () => currentState; describe('SearchBar', () => { beforeEach(() => { vi.clearAllMocks(); localStorage.clear(); + currentState = baseStoreState(); }); it('clears the committed query when the search input is manually emptied', () => { const setSearchFilters = vi.fn(); - mockUseAppStore.mockReturnValue(createStoreState({ + currentState = createStoreState({ searchFilters: { ...defaultSearchFilters, query: 'react', }, setSearchFilters, - }) as ReturnType); + }); + mockUseAppStore.mockReturnValue(currentState as ReturnType); render(); @@ -106,6 +122,7 @@ describe('SearchBar', () => { }); storeState.setSearchFilters = setSearchFilters; + currentState = storeState; mockUseAppStore.mockReturnValue(storeState as ReturnType); const { rerender } = render(); diff --git a/src/components/SearchBar.tsx b/src/components/SearchBar.tsx index 86ed856e..dc0b6c8a 100644 --- a/src/components/SearchBar.tsx +++ b/src/components/SearchBar.tsx @@ -880,27 +880,6 @@ export const SearchBar: React.FC = () => { toast(t('同步完成!所有仓库都是最新的。', 'Sync completed! All repositories are up to date.'), 'info'); } - // 向量搜索开启时,后台自动索引新仓库 - const vsCfg = useAppStore.getState().vectorSearchConfig; - const embCfgs = useAppStore.getState().embeddingConfigs; - const activeEmb = embCfgs.find(c => c.id === vsCfg?.embeddingConfigId); - if (vsCfg?.enabled && vsCfg?.workerUrl && activeEmb && newRepoCount > 0) { - const { VectorSearchService, EmbeddingClient, indexAllRepos } = await import('../services/vectorSearchService'); - const embClient = new EmbeddingClient(activeEmb); - const vecService = new VectorSearchService(vsCfg); - const readmeFetcher = githubToken - ? (owner: string, repo: string, signal?: AbortSignal) => new GitHubApiService(githubToken).getRepositoryReadme(owner, repo, signal) - : undefined; - // 只索引新增仓库,不重复索引已有仓库 - const newRepos = mergedRepositories.filter(repo => !existingRepoIds.has(repo.id)); - if (newRepos.length > 0) { - indexAllRepos(newRepos, embClient, vecService, { - readmeFetcher, - indexMode: vsCfg.indexMode, - readmeMaxChars: vsCfg.readmeMaxChars, - }).catch(() => {}); - } - } } catch (error) { console.error('Sync failed:', error); if (error instanceof Error && error.message.includes('token')) { diff --git a/src/components/settings/VectorSearchSettings.tsx b/src/components/settings/VectorSearchSettings.tsx index c2c7d4eb..69060678 100644 --- a/src/components/settings/VectorSearchSettings.tsx +++ b/src/components/settings/VectorSearchSettings.tsx @@ -58,6 +58,7 @@ export const VectorSearchSettings: React.FC = ({ t }) setVectorIndexingState, repositories, githubToken, + updateRepositoriesMetadata, } = useAppStore(); // Local form state for embedding config @@ -208,9 +209,19 @@ export const VectorSearchSettings: React.FC = ({ t }) } }, [formWorkerUrl, formAuthToken, setVectorSearchStatus]); - const runIndexAll = useCallback(async (withCleanup: boolean) => { - if (!activeConfig) return; + // 未索引数量(已分析、未失败、未向量索引或内容已更新) + const unindexedCount = repositories.filter((r) => { + if (!r.analyzed_at || r.analysis_failed) return false; + if (!r.vector_indexed_at) return true; + const contentTime = [r.last_edited, r.analyzed_at] + .filter((t): t is string => !!t) + .sort() + .pop() || ''; + return contentTime > r.vector_indexed_at; + }).length; + const createClients = useCallback(() => { + if (!activeConfig) return null; const embeddingClient = new EmbeddingClient({ ...activeConfig, apiType: formApiType, @@ -225,39 +236,70 @@ export const VectorSearchSettings: React.FC = ({ t }) authToken: formAuthToken, embeddingConfigId: activeEmbeddingConfig || '', }); + // 复用单个 GitHubApiService 实例,保留 rate-limit state + const githubApi = githubToken ? new GitHubApiService(githubToken) : null; + const readmeFetcher = githubApi + ? (owner: string, repo: string, signal?: AbortSignal) => + githubApi.getRepositoryReadme(owner, repo, signal) + : undefined; + return { embeddingClient, vectorService, readmeFetcher }; + }, [activeConfig, formApiType, formBaseUrl, formApiKey, formModel, formDimensions, formWorkerUrl, formAuthToken, activeEmbeddingConfig, githubToken]); + + const handleRebuildIndex = useCallback(async () => { + const clients = createClients(); + if (!clients) return; + const controller = new AbortController(); setAbortController(controller); setVectorIndexingState({ isIndexing: true, phase: null, phaseDone: 0, phaseTotal: 0, result: null }); try { - if (withCleanup) { - const keepIds = repositories.map(r => String(r.id)); - try { - await vectorService.cleanup(keepIds, controller.signal); - } catch (cleanupErr) { - // Cleanup 失败不阻塞重建,记录警告继续 - console.warn('Vector cleanup failed, continuing with rebuild:', cleanupErr); - } - } + // 每次点击时读取最新的 repositories,避免闭包捕获过期数据 + const currentRepos = useAppStore.getState().repositories; - const readmeFetcher = githubToken - ? (owner: string, repo: string, signal?: AbortSignal) => { - const api = new GitHubApiService(githubToken); - return api.getRepositoryReadme(owner, repo, signal); - } - : undefined; + // 1. 清除所有 vector_indexed_at(包括之前失败/不可索引的 repo 的残留值) + // 用 updateRepositoriesMetadata 避免重置当前过滤的 searchResults + updateRepositoriesMetadata( + currentRepos.filter(r => r.vector_indexed_at).map(r => ({ id: r.id, patch: { vector_indexed_at: undefined } })) + ); - const result = await indexAllRepos(repositories, embeddingClient, vectorService, { + // 2. 全量索引,逐批确认后立即 stamp(中断不丢失已确认进度) + const now = new Date().toISOString(); + const stampedRepoIds: number[] = []; + const result = await indexAllRepos(currentRepos, clients.embeddingClient, clients.vectorService, { onProgress: (progress) => setVectorIndexingState({ phase: progress.phase, phaseDone: progress.done, phaseTotal: progress.total, }), signal: controller.signal, - readmeFetcher, + readmeFetcher: clients.readmeFetcher, indexMode: formIndexMode, readmeMaxChars: formReadmeMaxChars, + incremental: false, + onRepoIndexed: (repoId) => { + stampedRepoIds.push(repoId); + // 批量 stamp:每 32 个(一个 batch)刷新一次,减少 UI 刷新频率 + if (stampedRepoIds.length % 32 === 0) { + const batch = stampedRepoIds.splice(0, stampedRepoIds.length); + updateRepositoriesMetadata(batch.map(id => ({ id, patch: { vector_indexed_at: now } }))); + } + }, }); + + // stamp 剩余未刷新的 + if (stampedRepoIds.length > 0) { + updateRepositoriesMetadata(stampedRepoIds.map(id => ({ id, patch: { vector_indexed_at: now } }))); + } + + // 3. cleanup:全量重建后只保留本次成功重建的向量 + try { + await clients.vectorService.cleanup(result.indexedRepoIds.map(String), controller.signal); + } catch (cleanupErr) { + console.warn('Vector cleanup failed after rebuild:', cleanupErr); + throw cleanupErr; + } + setVectorIndexingState({ result, isIndexing: false, phase: null }); setVectorSearchStatus({ connected: true, @@ -269,15 +311,102 @@ export const VectorSearchSettings: React.FC = ({ t }) if (err instanceof Error && err.message === 'Aborted') { setVectorIndexingState({ isIndexing: false, phase: null, result: null }); } else { - setVectorIndexingState({ isIndexing: false, phase: null, result: { indexed: 0, skipped: 0, errors: repositories.length } }); + const msg = err instanceof Error ? err.message : String(err); + const currentRepos = useAppStore.getState().repositories; + const indexableCount = currentRepos.filter((r) => r.analyzed_at && !r.analysis_failed).length; + setVectorIndexingState({ isIndexing: false, phase: null, result: { indexed: 0, skipped: currentRepos.length - indexableCount, errors: indexableCount, error: msg } }); } } finally { setAbortController(null); } - }, [activeConfig, formApiType, formBaseUrl, formApiKey, formModel, formDimensions, formWorkerUrl, formAuthToken, formIndexMode, formReadmeMaxChars, activeEmbeddingConfig, repositories, githubToken, setVectorSearchStatus, setVectorIndexingState]); + }, [createClients, formIndexMode, formReadmeMaxChars, formDimensions, updateRepositoriesMetadata, setVectorSearchStatus, setVectorIndexingState]); + + const handleIncrementalIndex = useCallback(async () => { + const clients = createClients(); + if (!clients) return; + + const controller = new AbortController(); + setAbortController(controller); + setVectorIndexingState({ isIndexing: true, phase: null, phaseDone: 0, phaseTotal: 0, result: null }); + + try { + // 每次点击时读取最新的 repositories,避免闭包捕获过期数据 + const currentRepos = useAppStore.getState().repositories; + + // 记录索引前无 vector_indexed_at 的 repo,用于精确计算新增数量 + const newlyIndexedRepoIds = new Set( + currentRepos.filter(r => !r.vector_indexed_at).map(r => r.id) + ); + + const now = new Date().toISOString(); + const stampedRepoIds: number[] = []; + const result = await indexAllRepos(currentRepos, clients.embeddingClient, clients.vectorService, { + onProgress: (progress) => setVectorIndexingState({ + phase: progress.phase, + phaseDone: progress.done, + phaseTotal: progress.total, + }), + signal: controller.signal, + readmeFetcher: clients.readmeFetcher, + indexMode: formIndexMode, + readmeMaxChars: formReadmeMaxChars, + incremental: true, + onRepoIndexed: (repoId) => { + stampedRepoIds.push(repoId); + if (stampedRepoIds.length % 32 === 0) { + const batch = stampedRepoIds.splice(0, stampedRepoIds.length); + updateRepositoriesMetadata(batch.map(id => ({ id, patch: { vector_indexed_at: now } }))); + } + }, + }); + + // stamp 剩余未刷新的 + if (stampedRepoIds.length > 0) { + updateRepositoriesMetadata(stampedRepoIds.map(id => ({ id, patch: { vector_indexed_at: now } }))); + } - const handleRebuildIndex = useCallback(() => runIndexAll(true), [runIndexAll]); - const handleIncrementalIndex = useCallback(() => runIndexAll(false), [runIndexAll]); + setVectorIndexingState({ result, isIndexing: false, phase: null }); + // 只计算本次新增索引的 repo(之前无 vector_indexed_at),不包含重新索引的 + // 用可选链避免 vectorSearchStatus 为 undefined 时抛错(旧版本持久化状态或未测试连接) + const newlyIndexedCount = result.indexedRepoIds.filter(id => newlyIndexedRepoIds.has(id)).length; + const prevCount = useAppStore.getState().vectorSearchStatus?.vectorCount ?? 0; + try { + setVectorSearchStatus({ + connected: true, + vectorCount: prevCount + newlyIndexedCount, + dimensions: formDimensions, + lastSyncAt: new Date().toISOString(), + }); + } catch (statusErr) { + // 状态更新失败不应回滚已成功的索引结果 + console.warn('Failed to update vector search status:', statusErr); + } + } catch (err) { + if (err instanceof Error && err.message === 'Aborted') { + setVectorIndexingState({ isIndexing: false, phase: null, result: null }); + } else { + const msg = err instanceof Error ? err.message : String(err); + const currentRepos = useAppStore.getState().repositories; + const attemptedCount = currentRepos.filter((r) => { + if (!r.analyzed_at || r.analysis_failed) return false; + if (!r.vector_indexed_at) return true; + const contentTime = [r.last_edited, r.analyzed_at] + .filter((t): t is string => !!t) + .sort() + .pop() || ''; + return contentTime > r.vector_indexed_at; + }).length; + const skippedCount = currentRepos.length - attemptedCount; + setVectorIndexingState({ + isIndexing: false, + phase: null, + result: { indexed: 0, skipped: skippedCount, errors: attemptedCount, error: msg }, + }); + } + } finally { + setAbortController(null); + } + }, [createClients, formIndexMode, formReadmeMaxChars, formDimensions, updateRepositoriesMetadata, setVectorSearchStatus, setVectorIndexingState]); const handleAbortIndexing = useCallback(() => { abortController?.abort(); @@ -735,16 +864,21 @@ export const VectorSearchSettings: React.FC = ({ t }) {isIndexing && (