Skip to content

Commit 2700847

Browse files
committed
fix: retry invalid AI summary generation
1 parent df55321 commit 2700847

2 files changed

Lines changed: 219 additions & 34 deletions

File tree

src/components/settings/AIConfigPanel.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ export const AIConfigPanel: React.FC<AIConfigPanelProps> = ({ t }) => {
303303
304304
要求:
305305
- summary:中文概述,说明仓库的主要功能和用途,不超过50字。
306+
禁止出现“我们被要求”“只输出JSON”“根据仓库信息”“summary/tags/platforms”等提示词复述。
306307
- tags:3-5个中文应用类型标签,请优先从提供的分类中选择。
307308
{CATEGORIES_INFO}
308309
- platforms:只能从 ["mac","windows","linux","ios","android","docker","web","cli"] 中选择;无法判断则为 []。
@@ -324,6 +325,7 @@ Dockerfile/docker-compose=docker;CLI/命令行/终端=cli;浏览器/前端/A
324325
325326
Requirements:
326327
- summary: A concise English overview explaining the main functionality and purpose, no more than 50 words.
328+
Do not include prompt restatements such as "asked to", "only output JSON", "based on repository information", or "summary/tags/platforms".
327329
- tags: 3-5 English application type tags, please prioritize from the provided categories.
328330
{CATEGORIES_INFO}
329331
- platforms: Must only choose from ["mac","windows","linux","ios","android","docker","web","cli"]; use [] if unable to determine.

src/services/aiService.ts

Lines changed: 217 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ export interface ConnectionTestResult {
3434
message: string;
3535
}
3636

37+
type RepositoryAnalysisResult = {
38+
summary: string;
39+
tags: string[];
40+
platforms: string[];
41+
};
42+
43+
type ParsedAIResponse = RepositoryAnalysisResult & {
44+
isValid: boolean;
45+
invalidReason?: string;
46+
};
47+
3748
function getStatusCodeMeaning(statusCode: number, language: string): string {
3849
const meanings: Record<number, { zh: string; en: string }> = {
3950
400: { zh: '请求参数错误', en: 'Bad Request' },
@@ -61,6 +72,7 @@ function getErrorTypeFromStatus(statusCode: number): ConnectionTestResult['error
6172
export class AIService {
6273
private config: AIConfig;
6374
private language: string;
75+
private static readonly ANALYSIS_MAX_ATTEMPTS = 3;
6476

6577
constructor(config: AIConfig, language: string = 'zh') {
6678
this.config = config;
@@ -475,11 +487,7 @@ ${options.user}` : options.user;
475487
throw new Error('No content received from AI service');
476488
}
477489

478-
async analyzeRepository(repository: Repository, readmeContent: string, customCategories?: string[], signal?: AbortSignal): Promise<{
479-
summary: string;
480-
tags: string[];
481-
platforms: string[];
482-
}> {
490+
async analyzeRepository(repository: Repository, readmeContent: string, customCategories?: string[], signal?: AbortSignal): Promise<RepositoryAnalysisResult> {
483491
const startTime = Date.now();
484492
const configId = this.config.id;
485493
const { full_name } = repository;
@@ -493,27 +501,105 @@ ${options.user}` : options.user;
493501

494502
try {
495503
const system = this.language === 'zh'
496-
? '你是一个专业的GitHub仓库分析助手。请严格按照用户指定的语言进行分析,无论原始内容是什么语言。请用中文简洁地分析仓库,提供实用的概述、分类标签和支持的平台类型。只输出合法JSON,不要输出思考过程、Markdown、代码块标记或任何额外文本。'
497-
: 'You are a professional GitHub repository analysis assistant. Please strictly analyze in the language specified by the user, regardless of the original content language. Please analyze repositories concisely in English, providing practical overviews, category tags, and supported platform types. Only output valid JSON. Do not output thinking process, Markdown, code block markers, or any extra text.';
504+
? '你是一个专业的GitHub仓库分析助手。请严格按照用户指定的语言进行分析,无论原始内容是什么语言。请用中文简洁地分析仓库,提供实用的概述、分类标签和支持的平台类型。只输出合法JSON,不要输出思考过程、Markdown、代码块标记或任何额外文本。summary字段只能描述仓库功能,不得复述提示词、输出格式或“只输出JSON”等要求。'
505+
: 'You are a professional GitHub repository analysis assistant. Please strictly analyze in the language specified by the user, regardless of the original content language. Please analyze repositories concisely in English, providing practical overviews, category tags, and supported platform types. Only output valid JSON. Do not output thinking process, Markdown, code block markers, or any extra text. The summary field must describe repository functionality only; never restate the prompt, output format, or JSON-only requirements.';
498506

499-
const content = await this.requestText({
500-
system,
501-
user: prompt,
502-
temperature: 0.3,
503-
maxTokens: 1000,
504-
signal,
505-
});
507+
let lastContent = '';
508+
let lastInvalidReason = '';
509+
510+
for (let attempt = 1; attempt <= AIService.ANALYSIS_MAX_ATTEMPTS; attempt++) {
511+
const content = await this.requestText({
512+
system,
513+
user: attempt === 1
514+
? prompt
515+
: this.createAnalysisRetryPrompt(prompt, lastContent, lastInvalidReason),
516+
temperature: attempt === 1 ? 0.3 : 0.1,
517+
maxTokens: 1000,
518+
signal,
519+
});
520+
521+
const result = this.parseAIResponse(content);
522+
if (result.isValid) {
523+
logger.info('ai', 'AI analysis completed', {
524+
owner,
525+
repo,
526+
configId,
527+
attempts: attempt,
528+
durationMs: Date.now() - startTime,
529+
});
530+
return {
531+
summary: result.summary,
532+
tags: result.tags,
533+
platforms: result.platforms,
534+
};
535+
}
506536

507-
const result = this.parseAIResponse(content);
508-
logger.info('ai', 'AI analysis completed', { owner, repo, configId, durationMs: Date.now() - startTime });
509-
return result;
537+
lastContent = content;
538+
lastInvalidReason = result.invalidReason || (this.language === 'zh' ? '返回内容不符合要求' : 'Response did not meet requirements');
539+
540+
if (attempt < AIService.ANALYSIS_MAX_ATTEMPTS) {
541+
logger.warn('ai', 'AI analysis response invalid, retrying', {
542+
owner,
543+
repo,
544+
configId,
545+
attempt,
546+
invalidReason: lastInvalidReason,
547+
});
548+
}
549+
}
550+
551+
throw new Error(this.language === 'zh'
552+
? `AI返回内容不符合要求,已重试${AIService.ANALYSIS_MAX_ATTEMPTS - 1}次:${lastInvalidReason}`
553+
: `AI response did not meet requirements after ${AIService.ANALYSIS_MAX_ATTEMPTS - 1} retries: ${lastInvalidReason}`);
510554
} catch (error) {
511555
logger.errorFromError('ai', 'AI analysis failed', error, { configId, durationMs: Date.now() - startTime });
512556
// 抛出错误,让调用方处理失败状态
513557
throw error;
514558
}
515559
}
516560

561+
private createAnalysisRetryPrompt(originalPrompt: string, previousContent: string, invalidReason: string): string {
562+
const previousOutput = this.sanitizeForPrompt(previousContent).slice(0, 1200);
563+
564+
if (this.language === 'zh') {
565+
return `
566+
上一次 AI 输出不符合要求,原因:${invalidReason}
567+
568+
请基于同一仓库信息重新生成结果。必须只输出一个合法 JSON 对象,不要 Markdown、代码块、解释或任何额外文本。
569+
570+
强制要求:
571+
- summary 必须是仓库功能和用途的中文概述,不超过50字。
572+
- summary 禁止复述提示词、输出格式、字段名或“只输出JSON”等要求。
573+
- tags 必须是字符串数组。
574+
- platforms 只能从 ["mac","windows","linux","ios","android","docker","web","cli"] 中选择。
575+
576+
原始分析任务:
577+
${originalPrompt}
578+
579+
上一次错误输出(仅用于纠错,不要复述):
580+
${previousOutput}
581+
`.trim();
582+
}
583+
584+
return `
585+
The previous AI output did not meet the requirements. Reason: ${invalidReason}
586+
587+
Regenerate the result for the same repository information. Output exactly one valid JSON object. Do not output Markdown, code fences, explanations, or any extra text.
588+
589+
Mandatory requirements:
590+
- summary must describe the repository functionality and purpose in no more than 50 words.
591+
- summary must not restate the prompt, output format, field names, or JSON-only requirements.
592+
- tags must be a string array.
593+
- platforms must only use ["mac","windows","linux","ios","android","docker","web","cli"].
594+
595+
Original analysis task:
596+
${originalPrompt}
597+
598+
Previous invalid output for correction only. Do not restate it:
599+
${previousOutput}
600+
`.trim();
601+
}
602+
517603
private createCustomAnalysisPrompt(repository: Repository, readmeContent: string, customCategories?: string[]): string {
518604
const repoInfo = `
519605
${this.language === 'zh' ? '仓库名称' : 'Repository Name'}: ${repository.full_name}
@@ -560,6 +646,7 @@ ${this.sanitizeForPrompt(readmeContent.substring(0, 2000))}
560646
561647
要求:
562648
- summary:中文概述,说明仓库的主要功能和用途,不超过50字。
649+
禁止出现“我们被要求”“只输出JSON”“根据仓库信息”“summary/tags/platforms”等提示词复述。
563650
- tags:3-5个中文应用类型标签${customCategories && customCategories.length > 0 ? ',请优先从上方的可用分类中选择' : ',类似应用商店的分类,如:开发工具、Web应用、移动应用、数据库、AI工具等'}${categoriesLine}
564651
- platforms:只能从 ["mac","windows","linux","ios","android","docker","web","cli"] 中选择;无法判断则为 []。
565652
@@ -585,6 +672,7 @@ Please analyze the following GitHub repository information and only output a val
585672
586673
Requirements:
587674
- summary: A concise English overview explaining the main functionality and purpose, no more than 50 words.
675+
Do not include prompt restatements such as "asked to", "only output JSON", "based on repository information", or "summary/tags/platforms".
588676
- tags: 3-5 English application type tags${customCategories && customCategories.length > 0 ? ', please prioritize from the available categories above' : ', similar to app store categories such as: development tools, web apps, mobile apps, database, AI tools, etc.'}.${categoriesLine}
589677
- platforms: Must only choose from ["mac","windows","linux","ios","android","docker","web","cli"]; use [] if unable to determine.
590678
@@ -606,7 +694,82 @@ ${repoInfo}
606694

607695
private static readonly VALID_PLATFORMS = ['mac', 'windows', 'linux', 'ios', 'android', 'docker', 'web', 'cli'];
608696

609-
private parseAIResponse(content: string): { summary: string; tags: string[]; platforms: string[] } {
697+
/**
698+
* 清理 summary 中的提示词复述。
699+
* 如果内容仍然像“我们被要求只输出 JSON”这类元叙述,直接判为无效。
700+
*/
701+
private sanitizeSummary(raw: string): string | null {
702+
if (!raw) return null;
703+
704+
let cleaned = raw
705+
.trim()
706+
.replace(/^["']+|["']+$/g, '')
707+
.trim();
708+
709+
const hardMetaPatterns: RegExp[] = [
710+
/(?:|)/,
711+
/(?:|)\s*(?:)?\s*JSON/i,
712+
/(?:)?(?:)?(?:|Markdown|||)/i,
713+
/(?:||)(?:?)?(?:||repo|repository)(?:|)?[,.\s]*(?:|||)/i,
714+
/(?:|)\s*summary[,\s]*(?:tags|platforms?)/i,
715+
/summary[,/\s]*(?:tags?)[,/\s]*(?:||and)?\s*platforms?/i,
716+
/(?:I|we)\s*(?:(?:have been|was|am)\s*)?(?:asked|instructed|told|requested)\b/i,
717+
/(?:only\s+output|output\s+only)\s+(?:valid\s+)?json/i,
718+
/(?:do\s+not|don't)\s+output\s+(?:any\s+)?(?:thinking|markdown|code\s+block|explanation|extra\s+text)/i,
719+
/(?:based|according)\s+(?:on|to)\s+(?:the\s+)?(?:repository|repo|project)\s+(?:information|description)/i,
720+
/(?:need|required|asked)\s+to\s+provide\s+summary/i,
721+
];
722+
723+
const isMetaText = (value: string) => hardMetaPatterns.some((pattern) => pattern.test(value));
724+
725+
const lastColon = Math.max(cleaned.lastIndexOf(':'), cleaned.lastIndexOf(':'));
726+
if (lastColon >= 0 && isMetaText(cleaned.slice(0, lastColon))) {
727+
cleaned = cleaned.slice(lastColon + 1).trim();
728+
}
729+
730+
const metaPrefixes: RegExp[] = [
731+
/^(?:|)[^.!?]{0,80}[.!?]\s*/i,
732+
/^(?:||)(?:?)?(?:||repo|repository)?(?:||||||)[^.!?]{0,80}[.!?]\s*/i,
733+
/^(?:|)\s*summary[^.!?]{0,80}[.!?]\s*/i,
734+
/^(?:||||)?(?:||)?(?:||repo)?[,.\s]*/i,
735+
/^(?:||||)?(?:||)?(?:||repo)?[,.\s]*/i,
736+
/^(?:||)?(?:||repo)?[:,.\s]*/i,
737+
/^(?:||)\s*(?:||||||)[,.\s]*/i,
738+
/^(?:?|?)\s*(?:?|?)\s*(?:||||)?(?:)?[:,.\s]*/i,
739+
/^(?:||||)\s*(?:|)[:,.\s]*/i,
740+
/^(?:||)\s*(?:||repo|project)\s*(?:|)[:,.\s]*/i,
741+
/^(?:|)\s*(?:||)[:,.\s]*/i,
742+
/^[,.\s]*/i,
743+
/^[,.\s]*/i,
744+
/^(?:I|we)\s*(?:(?:have been|was|am)\s*)?(?:asked|instructed|told|requested)\s*(?:to\s+)?(?:analyze|evaluate|summarize|review)?\s*(?:the|this)?\s*(?:repository|repo|project)?[.,:;\s]*/i,
745+
/^(?:only\s+output|output\s+only)\s+(?:valid\s+)?json[^.!?]{0,80}[.!?]\s*/i,
746+
/^(?:based|according)\s+(?:on|to)\s+(?:the\s+)?(?:request|prompt|instruction|information|description)[.,:;\s]*/i,
747+
/^(?:here|this)\s+(?:is|are)\s+(?:the|my|a)\s+(?:analysis|summary|result|overview)[.,:;\s]*/i,
748+
/^(?:the|this)\s+(?:repository|repo|project)\s+(?:appears|seems|looks|is)\s+(?:to\s+be|like)?\s*(?:a\s+|an\s+)?[.,:;\s]*/i,
749+
/^(?:analysis|summary|overview)\s*(?:result|of)?[.:]\s*/i,
750+
];
751+
752+
let previous = '';
753+
while (previous !== cleaned) {
754+
previous = cleaned;
755+
for (const pattern of metaPrefixes) {
756+
cleaned = cleaned.replace(pattern, '');
757+
}
758+
cleaned = cleaned.trim();
759+
}
760+
761+
cleaned = cleaned.trim();
762+
763+
if (isMetaText(cleaned)) return null;
764+
765+
if (cleaned.length < 3) return null;
766+
767+
if (/^[\s.,;:!?]+$/.test(cleaned)) return null;
768+
769+
return cleaned;
770+
}
771+
772+
private parseAIResponse(content: string): ParsedAIResponse {
610773
try {
611774
// Strip thinking tags that some models embed in the content field (e.g. <think>...</think>)
612775
// Also handle truncated tags (dangling <think> without </think>) from token exhaustion
@@ -620,35 +783,55 @@ ${repoInfo}
620783

621784
const parsed = this.extractAndParseAIJson(cleaned);
622785
if (parsed) {
786+
const rawSummary = typeof parsed.summary === 'string' ? parsed.summary.trim() : '';
787+
const summary = this.sanitizeSummary(rawSummary);
788+
const tags = Array.isArray(parsed.tags) ? parsed.tags.filter((v) => typeof v === 'string').slice(0, 5) : [];
789+
const platforms = Array.isArray(parsed.platforms)
790+
? Array.from(
791+
new Set(
792+
parsed.platforms
793+
.filter((v): v is string => typeof v === 'string')
794+
.map((v) => v.trim().toLowerCase())
795+
.filter((v) => AIService.VALID_PLATFORMS.includes(v))
796+
)
797+
).slice(0, 8)
798+
: [];
799+
800+
if (!summary) {
801+
return {
802+
summary: '',
803+
tags,
804+
platforms,
805+
isValid: false,
806+
invalidReason: rawSummary
807+
? (this.language === 'zh' ? 'summary包含提示词复述或不是仓库概述' : 'summary contains prompt restatement or is not a repository overview')
808+
: (this.language === 'zh' ? 'summary缺失或为空' : 'summary is missing or empty'),
809+
};
810+
}
811+
623812
return {
624-
summary: typeof parsed.summary === 'string' && parsed.summary.trim()
625-
? parsed.summary.trim()
626-
: (this.language === 'zh' ? '无法生成概述' : 'Unable to generate summary'),
627-
tags: Array.isArray(parsed.tags) ? parsed.tags.filter((v) => typeof v === 'string').slice(0, 5) : [],
628-
platforms: Array.isArray(parsed.platforms)
629-
? Array.from(
630-
new Set(
631-
parsed.platforms
632-
.filter((v): v is string => typeof v === 'string')
633-
.map((v) => v.trim().toLowerCase())
634-
.filter((v) => AIService.VALID_PLATFORMS.includes(v))
635-
)
636-
).slice(0, 8)
637-
: [],
813+
summary,
814+
tags,
815+
platforms,
816+
isValid: true,
638817
};
639818
}
640819

641820
return {
642-
summary: cleaned.substring(0, 50) + (cleaned.length > 50 ? '...' : ''),
821+
summary: '',
643822
tags: [],
644823
platforms: [],
824+
isValid: false,
825+
invalidReason: this.language === 'zh' ? '未返回合法JSON对象' : 'No valid JSON object returned',
645826
};
646827
} catch (error) {
647828
logger.errorFromError('ai', 'Failed to parse AI response', error);
648829
return {
649-
summary: this.language === 'zh' ? '分析失败' : 'Analysis failed',
830+
summary: '',
650831
tags: [],
651832
platforms: [],
833+
isValid: false,
834+
invalidReason: this.language === 'zh' ? '解析AI返回失败' : 'Failed to parse AI response',
652835
};
653836
}
654837
}

0 commit comments

Comments
 (0)