@@ -34,6 +34,17 @@ export interface ConnectionTestResult {
3434 message : string ;
3535}
3636
37+ type RepositoryAnalysisResult = {
38+ summary : string ;
39+ tags : string [ ] ;
40+ platforms : string [ ] ;
41+ } ;
42+
43+ type ParsedAIResponse = RepositoryAnalysisResult & {
44+ isValid : boolean ;
45+ invalidReason ?: string ;
46+ } ;
47+
3748function getStatusCodeMeaning ( statusCode : number , language : string ) : string {
3849 const meanings : Record < number , { zh : string ; en : string } > = {
3950 400 : { zh : '请求参数错误' , en : 'Bad Request' } ,
@@ -61,6 +72,7 @@ function getErrorTypeFromStatus(statusCode: number): ConnectionTestResult['error
6172export class AIService {
6273 private config : AIConfig ;
6374 private language : string ;
75+ private static readonly ANALYSIS_MAX_ATTEMPTS = 3 ;
6476
6577 constructor ( config : AIConfig , language : string = 'zh' ) {
6678 this . config = config ;
@@ -475,11 +487,7 @@ ${options.user}` : options.user;
475487 throw new Error ( 'No content received from AI service' ) ;
476488 }
477489
478- async analyzeRepository ( repository : Repository , readmeContent : string , customCategories ?: string [ ] , signal ?: AbortSignal ) : Promise < {
479- summary : string ;
480- tags : string [ ] ;
481- platforms : string [ ] ;
482- } > {
490+ async analyzeRepository ( repository : Repository , readmeContent : string , customCategories ?: string [ ] , signal ?: AbortSignal ) : Promise < RepositoryAnalysisResult > {
483491 const startTime = Date . now ( ) ;
484492 const configId = this . config . id ;
485493 const { full_name } = repository ;
@@ -493,27 +501,105 @@ ${options.user}` : options.user;
493501
494502 try {
495503 const system = this . language === 'zh'
496- ? '你是一个专业的GitHub仓库分析助手。请严格按照用户指定的语言进行分析,无论原始内容是什么语言。请用中文简洁地分析仓库,提供实用的概述、分类标签和支持的平台类型。只输出合法JSON,不要输出思考过程、Markdown、代码块标记或任何额外文本。'
497- : 'You are a professional GitHub repository analysis assistant. Please strictly analyze in the language specified by the user, regardless of the original content language. Please analyze repositories concisely in English, providing practical overviews, category tags, and supported platform types. Only output valid JSON. Do not output thinking process, Markdown, code block markers, or any extra text.' ;
504+ ? '你是一个专业的GitHub仓库分析助手。请严格按照用户指定的语言进行分析,无论原始内容是什么语言。请用中文简洁地分析仓库,提供实用的概述、分类标签和支持的平台类型。只输出合法JSON,不要输出思考过程、Markdown、代码块标记或任何额外文本。summary字段只能描述仓库功能,不得复述提示词、输出格式或“只输出JSON”等要求。 '
505+ : 'You are a professional GitHub repository analysis assistant. Please strictly analyze in the language specified by the user, regardless of the original content language. Please analyze repositories concisely in English, providing practical overviews, category tags, and supported platform types. Only output valid JSON. Do not output thinking process, Markdown, code block markers, or any extra text. The summary field must describe repository functionality only; never restate the prompt, output format, or JSON-only requirements. ' ;
498506
499- const content = await this . requestText ( {
500- system,
501- user : prompt ,
502- temperature : 0.3 ,
503- maxTokens : 1000 ,
504- signal,
505- } ) ;
507+ let lastContent = '' ;
508+ let lastInvalidReason = '' ;
509+
510+ for ( let attempt = 1 ; attempt <= AIService . ANALYSIS_MAX_ATTEMPTS ; attempt ++ ) {
511+ const content = await this . requestText ( {
512+ system,
513+ user : attempt === 1
514+ ? prompt
515+ : this . createAnalysisRetryPrompt ( prompt , lastContent , lastInvalidReason ) ,
516+ temperature : attempt === 1 ? 0.3 : 0.1 ,
517+ maxTokens : 1000 ,
518+ signal,
519+ } ) ;
520+
521+ const result = this . parseAIResponse ( content ) ;
522+ if ( result . isValid ) {
523+ logger . info ( 'ai' , 'AI analysis completed' , {
524+ owner,
525+ repo,
526+ configId,
527+ attempts : attempt ,
528+ durationMs : Date . now ( ) - startTime ,
529+ } ) ;
530+ return {
531+ summary : result . summary ,
532+ tags : result . tags ,
533+ platforms : result . platforms ,
534+ } ;
535+ }
506536
507- const result = this . parseAIResponse ( content ) ;
508- logger . info ( 'ai' , 'AI analysis completed' , { owner, repo, configId, durationMs : Date . now ( ) - startTime } ) ;
509- return result ;
537+ lastContent = content ;
538+ lastInvalidReason = result . invalidReason || ( this . language === 'zh' ? '返回内容不符合要求' : 'Response did not meet requirements' ) ;
539+
540+ if ( attempt < AIService . ANALYSIS_MAX_ATTEMPTS ) {
541+ logger . warn ( 'ai' , 'AI analysis response invalid, retrying' , {
542+ owner,
543+ repo,
544+ configId,
545+ attempt,
546+ invalidReason : lastInvalidReason ,
547+ } ) ;
548+ }
549+ }
550+
551+ throw new Error ( this . language === 'zh'
552+ ? `AI返回内容不符合要求,已重试${ AIService . ANALYSIS_MAX_ATTEMPTS - 1 } 次:${ lastInvalidReason } `
553+ : `AI response did not meet requirements after ${ AIService . ANALYSIS_MAX_ATTEMPTS - 1 } retries: ${ lastInvalidReason } ` ) ;
510554 } catch ( error ) {
511555 logger . errorFromError ( 'ai' , 'AI analysis failed' , error , { configId, durationMs : Date . now ( ) - startTime } ) ;
512556 // 抛出错误,让调用方处理失败状态
513557 throw error ;
514558 }
515559 }
516560
561+ private createAnalysisRetryPrompt ( originalPrompt : string , previousContent : string , invalidReason : string ) : string {
562+ const previousOutput = this . sanitizeForPrompt ( previousContent ) . slice ( 0 , 1200 ) ;
563+
564+ if ( this . language === 'zh' ) {
565+ return `
566+ 上一次 AI 输出不符合要求,原因:${ invalidReason }
567+
568+ 请基于同一仓库信息重新生成结果。必须只输出一个合法 JSON 对象,不要 Markdown、代码块、解释或任何额外文本。
569+
570+ 强制要求:
571+ - summary 必须是仓库功能和用途的中文概述,不超过50字。
572+ - summary 禁止复述提示词、输出格式、字段名或“只输出JSON”等要求。
573+ - tags 必须是字符串数组。
574+ - platforms 只能从 ["mac","windows","linux","ios","android","docker","web","cli"] 中选择。
575+
576+ 原始分析任务:
577+ ${ originalPrompt }
578+
579+ 上一次错误输出(仅用于纠错,不要复述):
580+ ${ previousOutput }
581+ ` . trim ( ) ;
582+ }
583+
584+ return `
585+ The previous AI output did not meet the requirements. Reason: ${ invalidReason }
586+
587+ Regenerate the result for the same repository information. Output exactly one valid JSON object. Do not output Markdown, code fences, explanations, or any extra text.
588+
589+ Mandatory requirements:
590+ - summary must describe the repository functionality and purpose in no more than 50 words.
591+ - summary must not restate the prompt, output format, field names, or JSON-only requirements.
592+ - tags must be a string array.
593+ - platforms must only use ["mac","windows","linux","ios","android","docker","web","cli"].
594+
595+ Original analysis task:
596+ ${ originalPrompt }
597+
598+ Previous invalid output for correction only. Do not restate it:
599+ ${ previousOutput }
600+ ` . trim ( ) ;
601+ }
602+
517603 private createCustomAnalysisPrompt ( repository : Repository , readmeContent : string , customCategories ?: string [ ] ) : string {
518604 const repoInfo = `
519605${ this . language === 'zh' ? '仓库名称' : 'Repository Name' } : ${ repository . full_name }
@@ -560,6 +646,7 @@ ${this.sanitizeForPrompt(readmeContent.substring(0, 2000))}
560646
561647要求:
562648- summary:中文概述,说明仓库的主要功能和用途,不超过50字。
649+ 禁止出现“我们被要求”“只输出JSON”“根据仓库信息”“summary/tags/platforms”等提示词复述。
563650- tags:3-5个中文应用类型标签${ customCategories && customCategories . length > 0 ? ',请优先从上方的可用分类中选择' : ',类似应用商店的分类,如:开发工具、Web应用、移动应用、数据库、AI工具等' } 。${ categoriesLine }
564651- platforms:只能从 ["mac","windows","linux","ios","android","docker","web","cli"] 中选择;无法判断则为 []。
565652
@@ -585,6 +672,7 @@ Please analyze the following GitHub repository information and only output a val
585672
586673Requirements:
587674- summary: A concise English overview explaining the main functionality and purpose, no more than 50 words.
675+ Do not include prompt restatements such as "asked to", "only output JSON", "based on repository information", or "summary/tags/platforms".
588676- tags: 3-5 English application type tags${ customCategories && customCategories . length > 0 ? ', please prioritize from the available categories above' : ', similar to app store categories such as: development tools, web apps, mobile apps, database, AI tools, etc.' } .${ categoriesLine }
589677- platforms: Must only choose from ["mac","windows","linux","ios","android","docker","web","cli"]; use [] if unable to determine.
590678
@@ -606,7 +694,82 @@ ${repoInfo}
606694
607695 private static readonly VALID_PLATFORMS = [ 'mac' , 'windows' , 'linux' , 'ios' , 'android' , 'docker' , 'web' , 'cli' ] ;
608696
609- private parseAIResponse ( content : string ) : { summary : string ; tags : string [ ] ; platforms : string [ ] } {
697+ /**
698+ * 清理 summary 中的提示词复述。
699+ * 如果内容仍然像“我们被要求只输出 JSON”这类元叙述,直接判为无效。
700+ */
701+ private sanitizeSummary ( raw : string ) : string | null {
702+ if ( ! raw ) return null ;
703+
704+ let cleaned = raw
705+ . trim ( )
706+ . replace ( / ^ [ " ' “ ” ] + | [ " ' “ ” ] + $ / g, '' )
707+ . trim ( ) ;
708+
709+ const hardMetaPatterns : RegExp [ ] = [
710+ / (?: 我 们 | 我 ) 被 要 求 / ,
711+ / (?: 只 输 出 | 输 出 ) \s * (?: 合 法 ) ? \s * J S O N / i,
712+ / 不 要 (?: 输 出 ) ? (?: 任 何 ) ? (?: 思 考 过 程 | M a r k d o w n | 代 码 块 | 解 释 | 额 外 文 本 ) / i,
713+ / (?: 根 据 | 按 照 | 基 于 ) (?: 给 定 的 ? ) ? (?: 仓 库 | 项 目 | r e p o | r e p o s i t o r y ) (?: 信 息 | 描 述 ) ? [ , , 。 . \s ] * (?: 需 要 | 分 析 | 判 断 | 提 供 ) / i,
714+ / (?: 需 要 | 要 求 ) 提 供 \s * s u m m a r y [ 、 , , \s ] * (?: t a g s | p l a t f o r m s ? ) / i,
715+ / s u m m a r y [ 、 , , / \s ] * (?: t a g s ? ) [ 、 , , / \s ] * (?: 和 | 与 | a n d ) ? \s * p l a t f o r m s ? / i,
716+ / (?: I | w e ) \s * (?: (?: h a v e b e e n | w a s | a m ) \s * ) ? (?: a s k e d | i n s t r u c t e d | t o l d | r e q u e s t e d ) \b / i,
717+ / (?: o n l y \s + o u t p u t | o u t p u t \s + o n l y ) \s + (?: v a l i d \s + ) ? j s o n / i,
718+ / (?: d o \s + n o t | d o n ' t ) \s + o u t p u t \s + (?: a n y \s + ) ? (?: t h i n k i n g | m a r k d o w n | c o d e \s + b l o c k | e x p l a n a t i o n | e x t r a \s + t e x t ) / i,
719+ / (?: b a s e d | a c c o r d i n g ) \s + (?: o n | t o ) \s + (?: t h e \s + ) ? (?: r e p o s i t o r y | r e p o | p r o j e c t ) \s + (?: i n f o r m a t i o n | d e s c r i p t i o n ) / i,
720+ / (?: n e e d | r e q u i r e d | a s k e d ) \s + t o \s + p r o v i d e \s + s u m m a r y / i,
721+ ] ;
722+
723+ const isMetaText = ( value : string ) => hardMetaPatterns . some ( ( pattern ) => pattern . test ( value ) ) ;
724+
725+ const lastColon = Math . max ( cleaned . lastIndexOf ( ':' ) , cleaned . lastIndexOf ( ':' ) ) ;
726+ if ( lastColon >= 0 && isMetaText ( cleaned . slice ( 0 , lastColon ) ) ) {
727+ cleaned = cleaned . slice ( lastColon + 1 ) . trim ( ) ;
728+ }
729+
730+ const metaPrefixes : RegExp [ ] = [
731+ / ^ (?: 我 们 | 我 ) 被 要 求 [ ^ 。 ! ? . ! ? ] { 0 , 80 } [ 。 ! ? . ! ? ] \s * / i,
732+ / ^ (?: 根 据 | 按 照 | 基 于 ) (?: 给 定 的 ? ) ? (?: 仓 库 | 项 目 | r e p o | r e p o s i t o r y ) ? (?: 信 息 | 描 述 | 要 求 | 提 示 | 指 示 | 需 求 | 以 上 信 息 ) [ ^ 。 ! ? . ! ? ] { 0 , 80 } [ 。 ! ? . ! ? ] \s * / i,
733+ / ^ (?: 需 要 | 要 求 ) 提 供 \s * s u m m a r y [ ^ 。 ! ? . ! ? ] { 0 , 80 } [ 。 ! ? . ! ? ] \s * / i,
734+ / ^ 我 们 被 要 求 (?: 分 析 | 评 估 | 总 结 | 概 述 | 介 绍 ) ? (?: 该 | 这 个 | 此 ) ? (?: 仓 库 | 项 目 | r e p o ) ? [ , , 。 . \s ] * / i,
735+ / ^ 我 被 要 求 (?: 分 析 | 评 估 | 总 结 | 概 述 | 介 绍 ) ? (?: 该 | 这 个 | 此 ) ? (?: 仓 库 | 项 目 | r e p o ) ? [ , , 。 . \s ] * / i,
736+ / ^ 请 分 析 (?: 该 | 这 个 | 此 ) ? (?: 仓 库 | 项 目 | r e p o ) ? [ : : , , 。 . \s ] * / i,
737+ / ^ (?: 根 据 | 按 照 | 基 于 ) \s * (?: 要 求 | 提 示 | 指 示 | 需 求 | 描 述 | 信 息 | 以 上 信 息 ) [ , , 。 . \s ] * / i,
738+ / ^ (?: 以 下 是 ? | 下 面 是 ? ) \s * (?: 对 ? | 的 ? ) \s * (?: 分 析 | 评 估 | 总 结 | 概 述 | 介 绍 ) ? (?: 结 果 ) ? [ : : , , 。 . \s ] * / i,
739+ / ^ (?: 分 析 | 评 估 | 总 结 | 概 述 | 介 绍 ) \s * (?: 结 果 | 如 下 ) [ : : , , 。 . \s ] * / i,
740+ / ^ (?: 该 | 这 个 | 此 ) \s * (?: 仓 库 | 项 目 | r e p o | p r o j e c t ) \s * (?: 是 | 为 ) [ : : , , 。 . \s ] * / i,
741+ / ^ (?: 仓 库 | 项 目 ) \s * (?: 分 析 | 概 述 | 总 结 ) [ : : , , 。 . \s ] * / i,
742+ / ^ 根 据 以 上 信 息 [ , , 。 . \s ] * / i,
743+ / ^ 综 上 所 述 [ , , 。 . \s ] * / i,
744+ / ^ (?: I | w e ) \s * (?: (?: h a v e b e e n | w a s | a m ) \s * ) ? (?: a s k e d | i n s t r u c t e d | t o l d | r e q u e s t e d ) \s * (?: t o \s + ) ? (?: a n a l y z e | e v a l u a t e | s u m m a r i z e | r e v i e w ) ? \s * (?: t h e | t h i s ) ? \s * (?: r e p o s i t o r y | r e p o | p r o j e c t ) ? [ . , : ; \s ] * / i,
745+ / ^ (?: o n l y \s + o u t p u t | o u t p u t \s + o n l y ) \s + (?: v a l i d \s + ) ? j s o n [ ^ . ! ? ] { 0 , 80 } [ . ! ? ] \s * / i,
746+ / ^ (?: b a s e d | a c c o r d i n g ) \s + (?: o n | t o ) \s + (?: t h e \s + ) ? (?: r e q u e s t | p r o m p t | i n s t r u c t i o n | i n f o r m a t i o n | d e s c r i p t i o n ) [ . , : ; \s ] * / i,
747+ / ^ (?: h e r e | t h i s ) \s + (?: i s | a r e ) \s + (?: t h e | m y | a ) \s + (?: a n a l y s i s | s u m m a r y | r e s u l t | o v e r v i e w ) [ . , : ; \s ] * / i,
748+ / ^ (?: t h e | t h i s ) \s + (?: r e p o s i t o r y | r e p o | p r o j e c t ) \s + (?: a p p e a r s | s e e m s | l o o k s | i s ) \s + (?: t o \s + b e | l i k e ) ? \s * (?: a \s + | a n \s + ) ? [ . , : ; \s ] * / i,
749+ / ^ (?: a n a l y s i s | s u m m a r y | o v e r v i e w ) \s * (?: r e s u l t | o f ) ? [ . : ] \s * / i,
750+ ] ;
751+
752+ let previous = '' ;
753+ while ( previous !== cleaned ) {
754+ previous = cleaned ;
755+ for ( const pattern of metaPrefixes ) {
756+ cleaned = cleaned . replace ( pattern , '' ) ;
757+ }
758+ cleaned = cleaned . trim ( ) ;
759+ }
760+
761+ cleaned = cleaned . trim ( ) ;
762+
763+ if ( isMetaText ( cleaned ) ) return null ;
764+
765+ if ( cleaned . length < 3 ) return null ;
766+
767+ if ( / ^ [ \s . , ; : ! ? , 。 ; : ! ? 、 ] + $ / . test ( cleaned ) ) return null ;
768+
769+ return cleaned ;
770+ }
771+
772+ private parseAIResponse ( content : string ) : ParsedAIResponse {
610773 try {
611774 // Strip thinking tags that some models embed in the content field (e.g. <think>...</think>)
612775 // Also handle truncated tags (dangling <think> without </think>) from token exhaustion
@@ -620,35 +783,55 @@ ${repoInfo}
620783
621784 const parsed = this . extractAndParseAIJson ( cleaned ) ;
622785 if ( parsed ) {
786+ const rawSummary = typeof parsed . summary === 'string' ? parsed . summary . trim ( ) : '' ;
787+ const summary = this . sanitizeSummary ( rawSummary ) ;
788+ const tags = Array . isArray ( parsed . tags ) ? parsed . tags . filter ( ( v ) => typeof v === 'string' ) . slice ( 0 , 5 ) : [ ] ;
789+ const platforms = Array . isArray ( parsed . platforms )
790+ ? Array . from (
791+ new Set (
792+ parsed . platforms
793+ . filter ( ( v ) : v is string => typeof v === 'string' )
794+ . map ( ( v ) => v . trim ( ) . toLowerCase ( ) )
795+ . filter ( ( v ) => AIService . VALID_PLATFORMS . includes ( v ) )
796+ )
797+ ) . slice ( 0 , 8 )
798+ : [ ] ;
799+
800+ if ( ! summary ) {
801+ return {
802+ summary : '' ,
803+ tags,
804+ platforms,
805+ isValid : false ,
806+ invalidReason : rawSummary
807+ ? ( this . language === 'zh' ? 'summary包含提示词复述或不是仓库概述' : 'summary contains prompt restatement or is not a repository overview' )
808+ : ( this . language === 'zh' ? 'summary缺失或为空' : 'summary is missing or empty' ) ,
809+ } ;
810+ }
811+
623812 return {
624- summary : typeof parsed . summary === 'string' && parsed . summary . trim ( )
625- ? parsed . summary . trim ( )
626- : ( this . language === 'zh' ? '无法生成概述' : 'Unable to generate summary' ) ,
627- tags : Array . isArray ( parsed . tags ) ? parsed . tags . filter ( ( v ) => typeof v === 'string' ) . slice ( 0 , 5 ) : [ ] ,
628- platforms : Array . isArray ( parsed . platforms )
629- ? Array . from (
630- new Set (
631- parsed . platforms
632- . filter ( ( v ) : v is string => typeof v === 'string' )
633- . map ( ( v ) => v . trim ( ) . toLowerCase ( ) )
634- . filter ( ( v ) => AIService . VALID_PLATFORMS . includes ( v ) )
635- )
636- ) . slice ( 0 , 8 )
637- : [ ] ,
813+ summary,
814+ tags,
815+ platforms,
816+ isValid : true ,
638817 } ;
639818 }
640819
641820 return {
642- summary : cleaned . substring ( 0 , 50 ) + ( cleaned . length > 50 ? '...' : '' ) ,
821+ summary : '' ,
643822 tags : [ ] ,
644823 platforms : [ ] ,
824+ isValid : false ,
825+ invalidReason : this . language === 'zh' ? '未返回合法JSON对象' : 'No valid JSON object returned' ,
645826 } ;
646827 } catch ( error ) {
647828 logger . errorFromError ( 'ai' , 'Failed to parse AI response' , error ) ;
648829 return {
649- summary : this . language === 'zh' ? '分析失败' : 'Analysis failed ',
830+ summary : ' ',
650831 tags : [ ] ,
651832 platforms : [ ] ,
833+ isValid : false ,
834+ invalidReason : this . language === 'zh' ? '解析AI返回失败' : 'Failed to parse AI response' ,
652835 } ;
653836 }
654837 }
0 commit comments