Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions server/src/db/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,5 @@ export function initializeSchema(db: Database.Database): void {
addColumnIfMissing(db, 'asset_filters', 'sort_order', 'INTEGER DEFAULT 0');
addColumnIfMissing(db, 'vector_search_configs', 'index_mode', "TEXT NOT NULL DEFAULT 'readme'");
addColumnIfMissing(db, 'vector_search_configs', 'readme_max_chars', 'INTEGER NOT NULL DEFAULT 6000');
addColumnIfMissing(db, 'repositories', 'vector_indexed_at', 'TEXT');
}
12 changes: 8 additions & 4 deletions server/src/routes/repositories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ function transformRepo(row: Record<string, unknown>) {
category_locked: !!row.category_locked,
last_edited: row.last_edited,
subscribed_to_releases: !!row.subscribed_to_releases,
vector_indexed_at: row.vector_indexed_at ?? undefined,
};
}

Expand Down Expand Up @@ -126,8 +127,8 @@ router.put('/api/repositories', (req, res) => {
owner_login, owner_avatar_url, topics,
ai_summary, ai_tags, ai_platforms, analyzed_at, analysis_failed,
custom_description, custom_tags, custom_category, category_locked, last_edited,
subscribed_to_releases
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
subscribed_to_releases, vector_indexed_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
name = excluded.name,
full_name = excluded.full_name,
Expand All @@ -152,7 +153,8 @@ router.put('/api/repositories', (req, res) => {
custom_category = excluded.custom_category,
category_locked = excluded.category_locked,
last_edited = CASE WHEN excluded.last_edited IS NOT NULL AND excluded.last_edited != '' THEN excluded.last_edited ELSE repositories.last_edited END,
subscribed_to_releases = excluded.subscribed_to_releases
subscribed_to_releases = excluded.subscribed_to_releases,
vector_indexed_at = excluded.vector_indexed_at
`);

const deleteAllReleases = db.prepare('DELETE FROM releases');
Expand Down Expand Up @@ -198,7 +200,8 @@ router.put('/api/repositories', (req, res) => {
repo.custom_description ?? null,
JSON.stringify(Array.isArray(repo.custom_tags) ? repo.custom_tags : []),
repo.custom_category ?? null, (repo.category_locked === true || repo.category_locked === 1) ? 1 : 0, repo.last_edited ?? null,
(repo.subscribed_to_releases === true || repo.subscribed_to_releases === 1) ? 1 : 0
(repo.subscribed_to_releases === true || repo.subscribed_to_releases === 1) ? 1 : 0,
repo.vector_indexed_at ?? null
Comment thread
coderabbitai[bot] marked this conversation as resolved.
);
count++;
}
Expand Down Expand Up @@ -232,6 +235,7 @@ router.patch('/api/repositories/:id', (req, res) => {
category_locked: (v) => (v === true || v === 1) ? 1 : 0,
last_edited: (v) => v,
subscribed_to_releases: (v) => (v === true || v === 1) ? 1 : 0,
vector_indexed_at: (v) => v,
description: (v) => v,
name: (v) => v,
};
Expand Down
7 changes: 4 additions & 3 deletions server/src/routes/sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ router.post('/api/sync/import', (req, res) => {
owner_login, owner_avatar_url, topics,
ai_summary, ai_tags, ai_platforms, analyzed_at, analysis_failed,
custom_description, custom_tags, custom_category, category_locked, last_edited,
subscribed_to_releases
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
subscribed_to_releases, vector_indexed_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
for (const r of repos) {
// 验证必需的字段
Expand All @@ -130,7 +130,8 @@ router.post('/api/sync/import', (req, res) => {
r.custom_description ?? null,
typeof r.custom_tags === 'string' ? r.custom_tags : JSON.stringify(r.custom_tags ?? []),
r.custom_category ?? null, (r.category_locked === true || r.category_locked === 1) ? 1 : 0, r.last_edited ?? null,
r.subscribed_to_releases ? 1 : 0
r.subscribed_to_releases ? 1 : 0,
r.vector_indexed_at ?? null
);
}
counts.repositories = repos.length;
Expand Down
21 changes: 0 additions & 21 deletions src/components/SearchBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -880,27 +880,6 @@ export const SearchBar: React.FC = () => {
toast(t('同步完成!所有仓库都是最新的。', 'Sync completed! All repositories are up to date.'), 'info');
}

// 向量搜索开启时,后台自动索引新仓库
const vsCfg = useAppStore.getState().vectorSearchConfig;
const embCfgs = useAppStore.getState().embeddingConfigs;
const activeEmb = embCfgs.find(c => c.id === vsCfg?.embeddingConfigId);
if (vsCfg?.enabled && vsCfg?.workerUrl && activeEmb && newRepoCount > 0) {
const { VectorSearchService, EmbeddingClient, indexAllRepos } = await import('../services/vectorSearchService');
const embClient = new EmbeddingClient(activeEmb);
const vecService = new VectorSearchService(vsCfg);
const readmeFetcher = githubToken
? (owner: string, repo: string, signal?: AbortSignal) => new GitHubApiService(githubToken).getRepositoryReadme(owner, repo, signal)
: undefined;
// 只索引新增仓库,不重复索引已有仓库
const newRepos = mergedRepositories.filter(repo => !existingRepoIds.has(repo.id));
if (newRepos.length > 0) {
indexAllRepos(newRepos, embClient, vecService, {
readmeFetcher,
indexMode: vsCfg.indexMode,
readmeMaxChars: vsCfg.readmeMaxChars,
}).catch(() => {});
}
}
} catch (error) {
console.error('Sync failed:', error);
if (error instanceof Error && error.message.includes('token')) {
Expand Down
139 changes: 114 additions & 25 deletions src/components/settings/VectorSearchSettings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
setVectorIndexingState,
repositories,
githubToken,
setRepositories,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
} = useAppStore();
Comment on lines 58 to 62

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Destructure setRepositories from useAppStore so that we can perform efficient batch updates of the repository list instead of calling updateRepository in a loop.

Suggested change
setVectorIndexingState,
repositories,
githubToken,
updateRepository,
} = useAppStore();
setVectorIndexingState,
repositories,
githubToken,
updateRepository,
setRepositories,
} = useAppStore();


// Local form state for embedding config
Expand Down Expand Up @@ -208,9 +209,16 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
}
}, [formWorkerUrl, formAuthToken, setVectorSearchStatus]);

const runIndexAll = useCallback(async (withCleanup: boolean) => {
if (!activeConfig) return;
// 未索引数量(已分析、未失败、未向量索引或内容已更新)
const unindexedCount = repositories.filter((r) => {
if (!r.analyzed_at || r.analysis_failed) return false;
if (!r.vector_indexed_at) return true;
const contentTime = r.last_edited || r.analyzed_at || '';
return contentTime > r.vector_indexed_at;
}).length;

const createClients = useCallback(() => {
if (!activeConfig) return null;
const embeddingClient = new EmbeddingClient({
...activeConfig,
apiType: formApiType,
Expand All @@ -225,39 +233,58 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
authToken: formAuthToken,
embeddingConfigId: activeEmbeddingConfig || '',
});
const readmeFetcher = githubToken
? (owner: string, repo: string, signal?: AbortSignal) => {
const api = new GitHubApiService(githubToken);
return api.getRepositoryReadme(owner, repo, signal);
}
: undefined;
return { embeddingClient, vectorService, readmeFetcher };
}, [activeConfig, formApiType, formBaseUrl, formApiKey, formModel, formDimensions, formWorkerUrl, formAuthToken, activeEmbeddingConfig, githubToken]);

const handleRebuildIndex = useCallback(async () => {
const clients = createClients();
if (!clients) return;

const controller = new AbortController();
setAbortController(controller);
setVectorIndexingState({ isIndexing: true, phase: null, phaseDone: 0, phaseTotal: 0, result: null });

try {
if (withCleanup) {
const keepIds = repositories.map(r => String(r.id));
try {
await vectorService.cleanup(keepIds, controller.signal);
} catch (cleanupErr) {
// Cleanup 失败不阻塞重建,记录警告继续
console.warn('Vector cleanup failed, continuing with rebuild:', cleanupErr);
}
}

const readmeFetcher = githubToken
? (owner: string, repo: string, signal?: AbortSignal) => {
const api = new GitHubApiService(githubToken);
return api.getRepositoryReadme(owner, repo, signal);
}
: undefined;

const result = await indexAllRepos(repositories, embeddingClient, vectorService, {
// 1. 清除所有 vector_indexed_at(包括之前失败/不可索引的 repo 的残留值)
setRepositories(repositories.map(repo =>
repo.vector_indexed_at ? { ...repo, vector_indexed_at: undefined } : repo
));

// 2. 全量索引
const now = new Date().toISOString();
const result = await indexAllRepos(repositories, clients.embeddingClient, clients.vectorService, {
onProgress: (progress) => setVectorIndexingState({
phase: progress.phase,
phaseDone: progress.done,
phaseTotal: progress.total,
}),
signal: controller.signal,
readmeFetcher,
readmeFetcher: clients.readmeFetcher,
indexMode: formIndexMode,
readmeMaxChars: formReadmeMaxChars,
incremental: false,
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// 3. cleanup:全量重建后只保留本次成功重建的向量
try {
await clients.vectorService.cleanup(result.indexedRepoIds.map(String), controller.signal);
} catch (cleanupErr) {
console.warn('Vector cleanup failed after rebuild:', cleanupErr);
throw cleanupErr;
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// 4. 为成功索引的 repo 设置 vector_indexed_at(批量更新)
const indexedSet = new Set(result.indexedRepoIds);
setRepositories(useAppStore.getState().repositories.map(repo =>
indexedSet.has(repo.id) ? { ...repo, vector_indexed_at: now } : repo
));

setVectorIndexingState({ result, isIndexing: false, phase: null });
setVectorSearchStatus({
connected: true,
Expand All @@ -274,10 +301,67 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
} finally {
setAbortController(null);
}
}, [activeConfig, formApiType, formBaseUrl, formApiKey, formModel, formDimensions, formWorkerUrl, formAuthToken, formIndexMode, formReadmeMaxChars, activeEmbeddingConfig, repositories, githubToken, setVectorSearchStatus, setVectorIndexingState]);
}, [createClients, repositories, formIndexMode, formReadmeMaxChars, formDimensions, setRepositories, setVectorSearchStatus, setVectorIndexingState]);

const handleIncrementalIndex = useCallback(async () => {
const clients = createClients();
if (!clients) return;

const handleRebuildIndex = useCallback(() => runIndexAll(true), [runIndexAll]);
const handleIncrementalIndex = useCallback(() => runIndexAll(false), [runIndexAll]);
const controller = new AbortController();
setAbortController(controller);
setVectorIndexingState({ isIndexing: true, phase: null, phaseDone: 0, phaseTotal: 0, result: null });

try {
// 记录索引前无 vector_indexed_at 的 repo,用于精确计算新增数量
const newlyIndexedRepoIds = new Set(
repositories.filter(r => !r.vector_indexed_at).map(r => r.id)
);

const now = new Date().toISOString();
const result = await indexAllRepos(repositories, clients.embeddingClient, clients.vectorService, {
onProgress: (progress) => setVectorIndexingState({
phase: progress.phase,
phaseDone: progress.done,
phaseTotal: progress.total,
}),
signal: controller.signal,
readmeFetcher: clients.readmeFetcher,
indexMode: formIndexMode,
readmeMaxChars: formReadmeMaxChars,
incremental: true,
});

// 批量设置 vector_indexed_at(一次性更新,避免逐个 updateRepository)
const indexedSet = new Set(result.indexedRepoIds);
setRepositories(useAppStore.getState().repositories.map(repo =>
indexedSet.has(repo.id) ? { ...repo, vector_indexed_at: now } : repo
));

setVectorIndexingState({ result, isIndexing: false, phase: null });
// 只计算本次新增索引的 repo(之前无 vector_indexed_at),不包含重新索引的
const newlyIndexedCount = result.indexedRepoIds.filter(id => newlyIndexedRepoIds.has(id)).length;
const prevCount = useAppStore.getState().vectorSearchStatus.vectorCount || 0;
setVectorSearchStatus({
connected: true,
vectorCount: prevCount + newlyIndexedCount,
dimensions: formDimensions,
lastSyncAt: new Date().toISOString(),
});
} catch (err) {
if (err instanceof Error && err.message === 'Aborted') {
setVectorIndexingState({ isIndexing: false, phase: null, result: null });
} else {
const msg = err instanceof Error ? err.message : String(err);
setVectorIndexingState({
isIndexing: false,
phase: null,
result: { indexed: 0, skipped: 0, errors: repositories.length, error: msg },
});
}
} finally {
setAbortController(null);
}
}, [createClients, repositories, formIndexMode, formReadmeMaxChars, formDimensions, setRepositories, setVectorSearchStatus, setVectorIndexingState]);

const handleAbortIndexing = useCallback(() => {
abortController?.abort();
Expand Down Expand Up @@ -735,11 +819,16 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
</button>
<button
onClick={handleIncrementalIndex}
disabled={isIndexing || !isConfigComplete}
disabled={isIndexing || !isConfigComplete || unindexedCount === 0}
className="flex items-center gap-2 px-4 py-2 text-sm bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-md hover:bg-gray-300 dark:hover:bg-gray-600 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isIndexing ? <Loader2 className="w-4 h-4 animate-spin" /> : <RefreshCw className="w-4 h-4" />}
{t('增量索引', 'Incremental Index')}
{unindexedCount > 0 && (
<span className="ml-1 px-1.5 py-0.5 text-xs bg-purple-500 text-white rounded-full">
{unindexedCount}
</span>
)}
</button>
{isIndexing && (
<button
Expand Down
Loading
Loading