Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions server/src/db/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,5 @@ export function initializeSchema(db: Database.Database): void {
addColumnIfMissing(db, 'asset_filters', 'sort_order', 'INTEGER DEFAULT 0');
addColumnIfMissing(db, 'vector_search_configs', 'index_mode', "TEXT NOT NULL DEFAULT 'readme'");
addColumnIfMissing(db, 'vector_search_configs', 'readme_max_chars', 'INTEGER NOT NULL DEFAULT 6000');
addColumnIfMissing(db, 'repositories', 'vector_indexed_at', 'TEXT');
}
12 changes: 8 additions & 4 deletions server/src/routes/repositories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ function transformRepo(row: Record<string, unknown>) {
category_locked: !!row.category_locked,
last_edited: row.last_edited,
subscribed_to_releases: !!row.subscribed_to_releases,
vector_indexed_at: row.vector_indexed_at ?? undefined,
};
}

Expand Down Expand Up @@ -126,8 +127,8 @@ router.put('/api/repositories', (req, res) => {
owner_login, owner_avatar_url, topics,
ai_summary, ai_tags, ai_platforms, analyzed_at, analysis_failed,
custom_description, custom_tags, custom_category, category_locked, last_edited,
subscribed_to_releases
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
subscribed_to_releases, vector_indexed_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
name = excluded.name,
full_name = excluded.full_name,
Expand All @@ -152,7 +153,8 @@ router.put('/api/repositories', (req, res) => {
custom_category = excluded.custom_category,
category_locked = excluded.category_locked,
last_edited = CASE WHEN excluded.last_edited IS NOT NULL AND excluded.last_edited != '' THEN excluded.last_edited ELSE repositories.last_edited END,
subscribed_to_releases = excluded.subscribed_to_releases
subscribed_to_releases = excluded.subscribed_to_releases,
vector_indexed_at = CASE WHEN excluded.vector_indexed_at IS NOT NULL AND excluded.vector_indexed_at != '' THEN excluded.vector_indexed_at ELSE repositories.vector_indexed_at END

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The CASE WHEN clause prevents vector_indexed_at from ever being cleared (set to NULL) during a rebuild or reset, since any NULL or empty string sent by the frontend will just fall back to the existing database value. Since the frontend already preserves local metadata (including vector_indexed_at) during sync merges, it is safe and correct to directly assign excluded.vector_indexed_at.

Suggested change
vector_indexed_at = CASE WHEN excluded.vector_indexed_at IS NOT NULL AND excluded.vector_indexed_at != '' THEN excluded.vector_indexed_at ELSE repositories.vector_indexed_at END
vector_indexed_at = excluded.vector_indexed_at

`);

const deleteAllReleases = db.prepare('DELETE FROM releases');
Expand Down Expand Up @@ -198,7 +200,8 @@ router.put('/api/repositories', (req, res) => {
repo.custom_description ?? null,
JSON.stringify(Array.isArray(repo.custom_tags) ? repo.custom_tags : []),
repo.custom_category ?? null, (repo.category_locked === true || repo.category_locked === 1) ? 1 : 0, repo.last_edited ?? null,
(repo.subscribed_to_releases === true || repo.subscribed_to_releases === 1) ? 1 : 0
(repo.subscribed_to_releases === true || repo.subscribed_to_releases === 1) ? 1 : 0,
repo.vector_indexed_at ?? null
Comment thread
coderabbitai[bot] marked this conversation as resolved.
);
count++;
}
Expand Down Expand Up @@ -232,6 +235,7 @@ router.patch('/api/repositories/:id', (req, res) => {
category_locked: (v) => (v === true || v === 1) ? 1 : 0,
last_edited: (v) => v,
subscribed_to_releases: (v) => (v === true || v === 1) ? 1 : 0,
vector_indexed_at: (v) => v,
description: (v) => v,
name: (v) => v,
};
Expand Down
7 changes: 4 additions & 3 deletions server/src/routes/sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ router.post('/api/sync/import', (req, res) => {
owner_login, owner_avatar_url, topics,
ai_summary, ai_tags, ai_platforms, analyzed_at, analysis_failed,
custom_description, custom_tags, custom_category, category_locked, last_edited,
subscribed_to_releases
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
subscribed_to_releases, vector_indexed_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
for (const r of repos) {
// 验证必需的字段
Expand All @@ -130,7 +130,8 @@ router.post('/api/sync/import', (req, res) => {
r.custom_description ?? null,
typeof r.custom_tags === 'string' ? r.custom_tags : JSON.stringify(r.custom_tags ?? []),
r.custom_category ?? null, (r.category_locked === true || r.category_locked === 1) ? 1 : 0, r.last_edited ?? null,
r.subscribed_to_releases ? 1 : 0
r.subscribed_to_releases ? 1 : 0,
r.vector_indexed_at ?? null
);
}
counts.repositories = repos.length;
Expand Down
21 changes: 0 additions & 21 deletions src/components/SearchBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -880,27 +880,6 @@ export const SearchBar: React.FC = () => {
toast(t('同步完成!所有仓库都是最新的。', 'Sync completed! All repositories are up to date.'), 'info');
}

// 向量搜索开启时,后台自动索引新仓库
const vsCfg = useAppStore.getState().vectorSearchConfig;
const embCfgs = useAppStore.getState().embeddingConfigs;
const activeEmb = embCfgs.find(c => c.id === vsCfg?.embeddingConfigId);
if (vsCfg?.enabled && vsCfg?.workerUrl && activeEmb && newRepoCount > 0) {
const { VectorSearchService, EmbeddingClient, indexAllRepos } = await import('../services/vectorSearchService');
const embClient = new EmbeddingClient(activeEmb);
const vecService = new VectorSearchService(vsCfg);
const readmeFetcher = githubToken
? (owner: string, repo: string, signal?: AbortSignal) => new GitHubApiService(githubToken).getRepositoryReadme(owner, repo, signal)
: undefined;
// 只索引新增仓库,不重复索引已有仓库
const newRepos = mergedRepositories.filter(repo => !existingRepoIds.has(repo.id));
if (newRepos.length > 0) {
indexAllRepos(newRepos, embClient, vecService, {
readmeFetcher,
indexMode: vsCfg.indexMode,
readmeMaxChars: vsCfg.readmeMaxChars,
}).catch(() => {});
}
}
} catch (error) {
console.error('Sync failed:', error);
if (error instanceof Error && error.message.includes('token')) {
Expand Down
127 changes: 105 additions & 22 deletions src/components/settings/VectorSearchSettings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
setVectorIndexingState,
repositories,
githubToken,
updateRepository,
} = useAppStore();
Comment on lines 58 to 62

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Destructure setRepositories from useAppStore so that we can perform efficient batch updates of the repository list instead of calling updateRepository in a loop.

Suggested change
setVectorIndexingState,
repositories,
githubToken,
updateRepository,
} = useAppStore();
setVectorIndexingState,
repositories,
githubToken,
updateRepository,
setRepositories,
} = useAppStore();


// Local form state for embedding config
Expand Down Expand Up @@ -208,9 +209,16 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
}
}, [formWorkerUrl, formAuthToken, setVectorSearchStatus]);

const runIndexAll = useCallback(async (withCleanup: boolean) => {
if (!activeConfig) return;
// 未索引数量(已分析、未失败、未向量索引或内容已更新)
const unindexedCount = repositories.filter((r) => {
if (!r.analyzed_at || r.analysis_failed) return false;
if (!r.vector_indexed_at) return true;
const contentTime = r.last_edited || r.analyzed_at || '';
return contentTime > r.vector_indexed_at;
}).length;

const createClients = useCallback(() => {
if (!activeConfig) return null;
const embeddingClient = new EmbeddingClient({
...activeConfig,
apiType: formApiType,
Expand All @@ -225,40 +233,64 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
authToken: formAuthToken,
embeddingConfigId: activeEmbeddingConfig || '',
});
const readmeFetcher = githubToken
? (owner: string, repo: string, signal?: AbortSignal) => {
const api = new GitHubApiService(githubToken);
return api.getRepositoryReadme(owner, repo, signal);
}
: undefined;
return { embeddingClient, vectorService, readmeFetcher };
}, [activeConfig, formApiType, formBaseUrl, formApiKey, formModel, formDimensions, formWorkerUrl, formAuthToken, activeEmbeddingConfig, githubToken]);

const handleRebuildIndex = useCallback(async () => {
const clients = createClients();
if (!clients) return;

const controller = new AbortController();
setAbortController(controller);
setVectorIndexingState({ isIndexing: true, phase: null, phaseDone: 0, phaseTotal: 0, result: null });

try {
if (withCleanup) {
const keepIds = repositories.map(r => String(r.id));
try {
await vectorService.cleanup(keepIds, controller.signal);
} catch (cleanupErr) {
// Cleanup 失败不阻塞重建,记录警告继续
console.warn('Vector cleanup failed, continuing with rebuild:', cleanupErr);
}
// 1. cleanup:删除不在当前仓库列表中的向量
const keepIds = repositories.map(r => String(r.id));
try {
await clients.vectorService.cleanup(keepIds, controller.signal);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
} catch (cleanupErr) {
console.warn('Vector cleanup failed, continuing with rebuild:', cleanupErr);
}

const readmeFetcher = githubToken
? (owner: string, repo: string, signal?: AbortSignal) => {
const api = new GitHubApiService(githubToken);
return api.getRepositoryReadme(owner, repo, signal);
}
: undefined;
// 2. 清除所有 vector_indexed_at(包括之前失败/不可索引的 repo 的残留值)
for (const repo of repositories) {
if (repo.vector_indexed_at) {
updateRepository({ ...repo, vector_indexed_at: undefined });
}
}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Calling updateRepository in a loop triggers a state update and IndexedDB write for each repository. This is extremely inefficient and can cause significant UI lag or crash the app when there are many repositories. Instead, perform a single batch update using setRepositories.

      // 2. 清除所有 vector_indexed_at(包括之前失败/不可索引的 repo 的残留值)
      const clearedRepos = repositories.map(repo =>
        repo.vector_indexed_at ? { ...repo, vector_indexed_at: undefined } : repo
      );
      setRepositories(clearedRepos);


const result = await indexAllRepos(repositories, embeddingClient, vectorService, {
// 3. 全量索引
const now = new Date().toISOString();
const result = await indexAllRepos(repositories, clients.embeddingClient, clients.vectorService, {
onProgress: (progress) => setVectorIndexingState({
phase: progress.phase,
phaseDone: progress.done,
phaseTotal: progress.total,
}),
signal: controller.signal,
readmeFetcher,
readmeFetcher: clients.readmeFetcher,
indexMode: formIndexMode,
readmeMaxChars: formReadmeMaxChars,
incremental: false,
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// 4. 为成功索引的 repo 设置 vector_indexed_at
const indexedSet = new Set(result.indexedRepoIds);
for (const repo of useAppStore.getState().repositories) {
if (indexedSet.has(repo.id)) {
updateRepository({ ...repo, vector_indexed_at: now });
}
}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Calling updateRepository in a loop triggers a state update and IndexedDB write for each successfully indexed repository. This is extremely inefficient and can cause significant UI lag or crash the app when there are many repositories. Instead, perform a single batch update using setRepositories.

      // 4. 为成功索引的 repo 设置 vector_indexed_at
      const indexedSet = new Set(result.indexedRepoIds);
      const updatedRepos = useAppStore.getState().repositories.map(repo =>
        indexedSet.has(repo.id) ? { ...repo, vector_indexed_at: now } : repo
      );
      setRepositories(updatedRepos);


setVectorIndexingState({ result, isIndexing: false, phase: null });
// Rebuild 替换全部索引,vectorCount = 本次成功数量
setVectorSearchStatus({
connected: true,
vectorCount: result.indexed,
Expand All @@ -274,10 +306,56 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
} finally {
setAbortController(null);
}
}, [activeConfig, formApiType, formBaseUrl, formApiKey, formModel, formDimensions, formWorkerUrl, formAuthToken, formIndexMode, formReadmeMaxChars, activeEmbeddingConfig, repositories, githubToken, setVectorSearchStatus, setVectorIndexingState]);
}, [createClients, repositories, formIndexMode, formReadmeMaxChars, formDimensions, updateRepository, setVectorSearchStatus, setVectorIndexingState]);

const handleIncrementalIndex = useCallback(async () => {
const clients = createClients();
if (!clients) return;

const controller = new AbortController();
setAbortController(controller);
setVectorIndexingState({ isIndexing: true, phase: null, phaseDone: 0, phaseTotal: 0, result: null });

try {
const now = new Date().toISOString();
const result = await indexAllRepos(repositories, clients.embeddingClient, clients.vectorService, {
onProgress: (progress) => setVectorIndexingState({
phase: progress.phase,
phaseDone: progress.done,
phaseTotal: progress.total,
}),
signal: controller.signal,
readmeFetcher: clients.readmeFetcher,
indexMode: formIndexMode,
readmeMaxChars: formReadmeMaxChars,
incremental: true,
onRepoIndexed: (repoId) => {
// 用 getState() 获取最新 repo 数据,避免 stale closure 覆盖并发编辑
const repo = useAppStore.getState().repositories.find(r => r.id === repoId);
if (repo) {
updateRepository({ ...repo, vector_indexed_at: now });
}
},
});

const handleRebuildIndex = useCallback(() => runIndexAll(true), [runIndexAll]);
const handleIncrementalIndex = useCallback(() => runIndexAll(false), [runIndexAll]);
setVectorIndexingState({ result, isIndexing: false, phase: null });
const prevCount = useAppStore.getState().vectorSearchStatus.vectorCount || 0;
setVectorSearchStatus({
connected: true,
vectorCount: prevCount + result.indexed,
dimensions: formDimensions,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
lastSyncAt: new Date().toISOString(),
});
} catch (err) {
if (err instanceof Error && err.message === 'Aborted') {
setVectorIndexingState({ isIndexing: false, phase: null, result: null });
} else {
setVectorIndexingState({ isIndexing: false, phase: null, result: { indexed: 0, skipped: 0, errors: repositories.length } });
}
} finally {
setAbortController(null);
}
}, [createClients, repositories, formIndexMode, formReadmeMaxChars, formDimensions, updateRepository, setVectorSearchStatus, setVectorIndexingState]);

const handleAbortIndexing = useCallback(() => {
abortController?.abort();
Expand Down Expand Up @@ -735,11 +813,16 @@ export const VectorSearchSettings: React.FC<VectorSearchSettingsProps> = ({ t })
</button>
<button
onClick={handleIncrementalIndex}
disabled={isIndexing || !isConfigComplete}
disabled={isIndexing || !isConfigComplete || unindexedCount === 0}
className="flex items-center gap-2 px-4 py-2 text-sm bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-md hover:bg-gray-300 dark:hover:bg-gray-600 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isIndexing ? <Loader2 className="w-4 h-4 animate-spin" /> : <RefreshCw className="w-4 h-4" />}
{t('增量索引', 'Incremental Index')}
{unindexedCount > 0 && (
<span className="ml-1 px-1.5 py-0.5 text-xs bg-purple-500 text-white rounded-full">
{unindexedCount}
</span>
)}
</button>
{isIndexing && (
<button
Expand Down
59 changes: 43 additions & 16 deletions src/services/vectorSearchService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,8 @@ export function buildEmbeddingText(repo: Repository, readmeContent?: string, max
}

/**
* 全量重建向量索引
* 遍历所有已分析仓库,分批生成 embedding 并 upsert 到 Worker
* 全量/增量重建向量索引
* 遍历已分析仓库,分批生成 embedding 并 upsert 到 Worker
* @param readmeFetcher 可选:获取仓库 README 内容的函数 (owner, repo) => content
*/
export interface IndexProgress {
Expand All @@ -372,37 +372,60 @@ export async function indexAllRepos(
readmeFetcher?: (owner: string, repo: string, signal?: AbortSignal) => Promise<string>;
indexMode?: 'description' | 'readme';
readmeMaxChars?: number;
incremental?: boolean;
onRepoIndexed?: (repoId: number) => void;
} = {}
): Promise<{ indexed: number; skipped: number; errors: number; error?: string }> {
const { batchSize = 100, onProgress, signal, readmeFetcher, indexMode = 'readme', readmeMaxChars = 6000 } = options;
): Promise<{ indexed: number; skipped: number; errors: number; error?: string; indexedRepoIds: number[] }> {
const { batchSize = 100, onProgress, signal, readmeFetcher, indexMode = 'readme', readmeMaxChars = 6000, incremental, onRepoIndexed } = options;

if (!Number.isInteger(batchSize) || batchSize <= 0) {
throw new Error('batchSize must be a positive integer');
}

// 只索引已分析且未失败的仓库
const indexable = repos.filter((r) => r.analyzed_at && !r.analysis_failed);
let indexable = repos.filter((r) => r.analyzed_at && !r.analysis_failed);
// 增量模式下跳过已索引且内容未更新的仓库
if (incremental) {
indexable = indexable.filter((r) => {
if (!r.vector_indexed_at) return true; // 从未索引
// 内容更新后需要重新索引
const contentTime = r.last_edited || r.analyzed_at || '';
return contentTime > r.vector_indexed_at;
});
}
let indexed = 0;
let errors = 0;
let lastError = '';
const indexedRepoIds: number[] = [];

// 仅在 readme 模式下获取 README 内容
const shouldFetchReadme = indexMode === 'readme' && readmeFetcher;
const readmeCache = new Map<string, string>();
if (shouldFetchReadme) {
for (let i = 0; i < indexable.length; i++) {
const repo = indexable[i];
const CONCURRENCY = 5;
let completed = 0;

for (let i = 0; i < indexable.length; i += CONCURRENCY) {
if (signal?.aborted) throw new Error('Aborted');
onProgress?.({ phase: 'readme', done: i, total: indexable.length });
try {
const [owner, name] = repo.full_name.split('/');
const readme = await readmeFetcher(owner, name, signal);
if (readme) readmeCache.set(repo.full_name, readme);
} catch {
// README 获取失败不影响索引

const batch = indexable.slice(i, i + CONCURRENCY);
const results = await Promise.allSettled(
batch.map(async (repo) => {
const [owner, name] = repo.full_name.split('/');
const readme = await readmeFetcher(owner, name, signal);
return { fullName: repo.full_name, readme };
})
);

for (const result of results) {
if (result.status === 'fulfilled' && result.value.readme) {
readmeCache.set(result.value.fullName, result.value.readme);
}
}

completed = Math.min(completed + batch.length, indexable.length);
onProgress?.({ phase: 'readme', done: completed, total: indexable.length });
}
onProgress?.({ phase: 'readme', done: indexable.length, total: indexable.length });
}

const totalBatches = Math.ceil(indexable.length / batchSize);
Expand Down Expand Up @@ -443,6 +466,10 @@ export async function indexAllRepos(
onProgress?.({ phase: 'uploading', done: currentBatch, total: totalBatches });
await vectorService.upsert(vectorizeVectors, signal);
indexed += batch.length;
for (const repo of batch) {
indexedRepoIds.push(repo.id);
onRepoIndexed?.(repo.id);
}
} catch (err) {
if (signal?.aborted || (err instanceof Error && err.message === 'Aborted')) {
throw new Error('Aborted');
Expand All @@ -456,5 +483,5 @@ export async function indexAllRepos(
onProgress?.({ phase: 'embedding', done: currentBatch, total: totalBatches });
}

return { indexed, skipped: repos.length - indexable.length, errors, error: lastError || undefined };
return { indexed, skipped: repos.length - indexable.length, errors, error: lastError || undefined, indexedRepoIds };
}
1 change: 1 addition & 0 deletions src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export interface Repository {
custom_category?: string;
category_locked?: boolean;
last_edited?: string;
vector_indexed_at?: string; // ISO timestamp of last successful vector indexing
last_release_fetch_time?: string; // ISO timestamp, for incremental sync
has_fetched_releases?: boolean; // whether this repo has been synced for releases
}
Expand Down
Loading
Loading