From 229e7a18bc3e35aabbd0fe6f2bf4aae89875451c Mon Sep 17 00:00:00 2001 From: twwu Date: Thu, 26 Dec 2024 14:07:47 +0800 Subject: [PATCH 01/19] feat: add invalid document handling and improve modal close functionality --- .../create/embedding-process/index.tsx | 3 +++ .../documents/detail/completed/index.tsx | 24 ++++++++++++------- .../components/datasets/documents/list.tsx | 20 ++++++++-------- web/service/knowledge/use-document.ts | 4 ++++ 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/web/app/components/datasets/create/embedding-process/index.tsx b/web/app/components/datasets/create/embedding-process/index.tsx index 201333ffce4cbe..ead593d272c64c 100644 --- a/web/app/components/datasets/create/embedding-process/index.tsx +++ b/web/app/components/datasets/create/embedding-process/index.tsx @@ -30,6 +30,7 @@ import { useProviderContext } from '@/context/provider-context' import { sleep } from '@/utils' import { RETRIEVE_METHOD } from '@/types/app' import Tooltip from '@/app/components/base/tooltip' +import { useInvalidDocumentList } from '@/service/knowledge/use-document' type Props = { datasetId: string @@ -207,7 +208,9 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index }) const router = useRouter() + const invalidDocumentList = useInvalidDocumentList() const navToDocumentList = () => { + invalidDocumentList() router.push(`/datasets/${datasetId}/documents`) } const navToApiDocs = () => { diff --git a/web/app/components/datasets/documents/detail/completed/index.tsx b/web/app/components/datasets/documents/detail/completed/index.tsx index 8385bde04b26f5..1185c97e0ff80f 100644 --- a/web/app/components/datasets/documents/detail/completed/index.tsx +++ b/web/app/components/datasets/documents/detail/completed/index.tsx @@ -232,6 +232,16 @@ const Completed: FC = ({ setFullScreen(false) }, []) + const onCloseNewSegmentModal = useCallback(() => { + onNewSegmentModalChange(false) + setFullScreen(false) + }, [onNewSegmentModalChange]) + + const onCloseNewChildChunkModal = useCallback(() => { + setShowNewChildSegmentModal(false) + setFullScreen(false) + }, []) + const { mutateAsync: enableSegment } = useEnableSegment() const { mutateAsync: disableSegment } = useDisableSegment() @@ -623,6 +633,7 @@ const Completed: FC = ({ = ({ { - onNewSegmentModalChange(false) - setFullScreen(false) - }} + onCancel={onCloseNewSegmentModal} onSave={resetList} viewNewlyAddedChunk={viewNewlyAddedChunk} /> @@ -651,6 +660,7 @@ const Completed: FC = ({ = ({ { - setShowNewChildSegmentModal(false) - setFullScreen(false) - }} + onCancel={onCloseNewChildChunkModal} onSave={onSaveNewChildChunk} viewNewlyAddedChildChunk={viewNewlyAddedChildChunk} /> diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index 26c26414df675e..19d31fc32a51cc 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -133,6 +133,16 @@ export const StatusItem: FC<{ {DOC_INDEX_STATUS_MAP[localStatus]?.text} + { + errorMessage && ( + {errorMessage} + } + triggerClassName='ml-1 w-4 h-4' + /> + ) + } { scene === 'detail' && (
@@ -152,16 +162,6 @@ export const StatusItem: FC<{
) } - { - errorMessage && ( - {errorMessage} - } - triggerClassName='ml-1 w-4 h-4' - /> - ) - } } diff --git a/web/service/knowledge/use-document.ts b/web/service/knowledge/use-document.ts index 2b9981f22f32eb..f4828e120139c2 100644 --- a/web/service/knowledge/use-document.ts +++ b/web/service/knowledge/use-document.ts @@ -29,6 +29,10 @@ export const useDocumentList = (payload: { }) } +export const useInvalidDocumentList = () => { + return useInvalid(useDocumentListKey) +} + const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument'] export const useAutoDisabledDocuments = (datasetId: string) => { return useQuery({ From 9214cc840beb86f050734520d1fd41db0a155a78 Mon Sep 17 00:00:00 2001 From: NFish Date: Fri, 27 Dec 2024 11:51:20 +0800 Subject: [PATCH 02/19] fix: disabled retrieval setting in document settings page --- .../economical-retrieval-method-config/index.tsx | 5 ++++- .../common/retrieval-method-config/index.tsx | 8 +++++--- .../components/datasets/create/step-two/index.tsx | 14 ++++++++------ .../datasets/create/step-two/option-card.tsx | 4 ++-- .../model-selector/model-trigger.tsx | 1 + 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx index 9236858ae4c906..5183b7a94e33ce 100644 --- a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx +++ b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx @@ -10,11 +10,13 @@ import { RETRIEVE_METHOD } from '@/types/app' import type { RetrievalConfig } from '@/types/app' type Props = { + disabled?: boolean value: RetrievalConfig onChange: (value: RetrievalConfig) => void } const EconomicalRetrievalMethodConfig: FC = ({ + disabled = false, value, onChange, }) => { @@ -22,7 +24,8 @@ const EconomicalRetrievalMethodConfig: FC = ({ return (
- } + } title={t('dataset.retrieval.invertedIndex.title')} description={t('dataset.retrieval.invertedIndex.description')} isActive activeHeaderClassName='bg-dataset-option-card-purple-gradient' diff --git a/web/app/components/datasets/common/retrieval-method-config/index.tsx b/web/app/components/datasets/common/retrieval-method-config/index.tsx index 9ab157571b5b66..e9e4347069de56 100644 --- a/web/app/components/datasets/common/retrieval-method-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-method-config/index.tsx @@ -20,11 +20,13 @@ import { import Badge from '@/app/components/base/badge' type Props = { + disabled?: boolean value: RetrievalConfig onChange: (value: RetrievalConfig) => void } const RetrievalMethodConfig: FC = ({ + disabled = false, value: passValue, onChange, }) => { @@ -58,7 +60,7 @@ const RetrievalMethodConfig: FC = ({ return (
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={t('dataset.retrieval.semantic_search.title')} description={t('dataset.retrieval.semantic_search.description')} isActive={ @@ -79,7 +81,7 @@ const RetrievalMethodConfig: FC = ({ )} {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={t('dataset.retrieval.full_text_search.title')} description={t('dataset.retrieval.full_text_search.description')} isActive={ @@ -100,7 +102,7 @@ const RetrievalMethodConfig: FC = ({ )} {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={
{t('dataset.retrieval.hybrid_search.title')}
diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 0d7202967a5e9f..176405c1130655 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -984,12 +984,14 @@ const StepTwo = ({ getIndexing_technique() === IndexingType.QUALIFIED ? ( ) : ( @@ -1010,7 +1012,7 @@ const StepTwo = ({ ) : (
- + {!datasetId && }
)} @@ -1081,11 +1083,11 @@ const StepTwo = ({ } { currentDocForm !== ChunkingMode.qa - && + && }
} diff --git a/web/app/components/datasets/create/step-two/option-card.tsx b/web/app/components/datasets/create/step-two/option-card.tsx index d0efdaabb1cdb8..719e7d4e7bfbd9 100644 --- a/web/app/components/datasets/create/step-two/option-card.tsx +++ b/web/app/components/datasets/create/step-two/option-card.tsx @@ -4,7 +4,7 @@ import classNames from '@/utils/classnames' const TriangleArrow: FC> = props => ( - + ) @@ -63,7 +63,7 @@ export const OptionCard: FC = forwardRef((props, ref) => { (isActive && !noHighlight) ? 'border-[1.5px] border-components-option-card-option-selected-border' : 'border border-components-option-card-option-border', - disabled && 'opacity-50', + disabled && 'opacity-50 pointer-events-none', className, )} style={{ diff --git a/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx b/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx index 556a2ef66f33a3..aba7ff5f64599f 100644 --- a/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx +++ b/web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx @@ -36,6 +36,7 @@ const ModelTrigger: FC = ({ className={classNames( 'group flex items-center px-2 h-8 rounded-lg bg-components-input-bg-normal', !readonly && 'hover:bg-components-input-bg-hover cursor-pointer', + !!readonly && 'opacity-50', className, open && '!bg-components-input-bg-hover', model.status !== ModelStatusEnum.active && '!bg-[#FFFAEB]', From 12b67df8760cb9a92968c970e3e3e6d6e743559b Mon Sep 17 00:00:00 2001 From: NFish Date: Fri, 27 Dec 2024 14:54:43 +0800 Subject: [PATCH 03/19] fix: retrieval test trunk preview style update --- .../datasets/hit-testing/components/child-chunks-item.tsx | 2 +- .../components/datasets/hit-testing/components/result-item.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/web/app/components/datasets/hit-testing/components/child-chunks-item.tsx b/web/app/components/datasets/hit-testing/components/child-chunks-item.tsx index 043aa3cea7c948..3c01e3d0b3d083 100644 --- a/web/app/components/datasets/hit-testing/components/child-chunks-item.tsx +++ b/web/app/components/datasets/hit-testing/components/child-chunks-item.tsx @@ -17,7 +17,7 @@ const ChildChunks: FC = ({ const { id, score, content, position } = payload return (
C-{position}
diff --git a/web/app/components/datasets/hit-testing/components/result-item.tsx b/web/app/components/datasets/hit-testing/components/result-item.tsx index 36ee541161c964..0f9d52ec9f9685 100644 --- a/web/app/components/datasets/hit-testing/components/result-item.tsx +++ b/web/app/components/datasets/hit-testing/components/result-item.tsx @@ -66,7 +66,7 @@ const ResultItem: FC = ({ {/* Main */}
-
{content}
+
{content}
{isParentChildRetrieval && (
From beb96db350511ac61046364bedc76da7ec5be04f Mon Sep 17 00:00:00 2001 From: twwu Date: Fri, 27 Dec 2024 15:27:04 +0800 Subject: [PATCH 04/19] fix: update rerank model error messages for clarity and consistency --- .../datasets/common/check-rerank-model.ts | 8 +++ .../common/retrieval-method-config/index.tsx | 55 +++++++------- .../common/retrieval-param-config/index.tsx | 71 +++++++++---------- .../datasets/create/step-two/index.tsx | 62 ++++++---------- .../components/datasets/documents/list.tsx | 18 ++--- .../hit-testing/modify-retrieval-modal.tsx | 12 +--- .../datasets/settings/form/index.tsx | 22 ++---- web/i18n/en-US/app-debug.ts | 2 +- web/i18n/en-US/workflow.ts | 2 +- web/i18n/zh-Hans/app-debug.ts | 2 +- web/i18n/zh-Hans/workflow.ts | 2 +- 11 files changed, 115 insertions(+), 141 deletions(-) diff --git a/web/app/components/datasets/common/check-rerank-model.ts b/web/app/components/datasets/common/check-rerank-model.ts index 581c2bb69ac8b9..c78ffa02971b98 100644 --- a/web/app/components/datasets/common/check-rerank-model.ts +++ b/web/app/components/datasets/common/check-rerank-model.ts @@ -31,6 +31,14 @@ export const isReRankModelSelected = ({ return false })() + if ( + indexMethod === 'high_quality' + && ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrievalConfig.search_method)) + && retrievalConfig.reranking_enable + && !rerankModelSelected + ) + return false + if ( indexMethod === 'high_quality' && (retrievalConfig.search_method === RETRIEVE_METHOD.hybrid && retrievalConfig.reranking_mode !== RerankingModeEnum.WeightedScore) diff --git a/web/app/components/datasets/common/retrieval-method-config/index.tsx b/web/app/components/datasets/common/retrieval-method-config/index.tsx index 9ab157571b5b66..30be48476b7280 100644 --- a/web/app/components/datasets/common/retrieval-method-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-method-config/index.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import React from 'react' +import React, { useMemo } from 'react' import { useTranslation } from 'react-i18next' import Image from 'next/image' import RetrievalParamConfig from '../retrieval-param-config' @@ -31,30 +31,35 @@ const RetrievalMethodConfig: FC = ({ const { t } = useTranslation() const { supportRetrievalMethods } = useProviderContext() const { data: rerankDefaultModel } = useDefaultModel(ModelTypeEnum.rerank) - const value = (() => { - if (!passValue.reranking_model.reranking_model_name) { - return { - ...passValue, - reranking_model: { - reranking_provider_name: rerankDefaultModel?.provider.provider || '', - reranking_model_name: rerankDefaultModel?.model || '', - }, - reranking_mode: passValue.reranking_mode || (rerankDefaultModel ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore), - weights: passValue.weights || { - weight_type: WeightedScoreEnum.Customized, - vector_setting: { - vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic, - embedding_provider_name: '', - embedding_model_name: '', + const value = useMemo(() => { + return { + ...passValue, + ...(!passValue.reranking_model.reranking_model_name + ? { + reranking_model: { + reranking_provider_name: rerankDefaultModel?.provider.provider || '', + reranking_model_name: rerankDefaultModel?.model || '', }, - keyword_setting: { - keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword, + } + : {}), + ...(passValue.search_method === RETRIEVE_METHOD.hybrid + ? { + reranking_mode: passValue.reranking_mode || (rerankDefaultModel ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore), + weights: passValue.weights || { + weight_type: WeightedScoreEnum.Customized, + vector_setting: { + vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic, + embedding_provider_name: '', + embedding_model_name: '', + }, + keyword_setting: { + keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword, + }, }, - }, - } + } + : {}), } - return passValue - })() + }, [passValue, rerankDefaultModel]) return (
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( @@ -67,6 +72,7 @@ const RetrievalMethodConfig: FC = ({ onSwitched={() => onChange({ ...value, search_method: RETRIEVE_METHOD.semantic, + reranking_enable: false, })} effectImg={Effect.src} activeHeaderClassName='bg-dataset-option-card-purple-gradient' @@ -78,7 +84,7 @@ const RetrievalMethodConfig: FC = ({ /> )} - {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( + {supportRetrievalMethods.includes(RETRIEVE_METHOD.fullText) && ( } title={t('dataset.retrieval.full_text_search.title')} description={t('dataset.retrieval.full_text_search.description')} @@ -88,6 +94,7 @@ const RetrievalMethodConfig: FC = ({ onSwitched={() => onChange({ ...value, search_method: RETRIEVE_METHOD.fullText, + reranking_enable: false, })} effectImg={Effect.src} activeHeaderClassName='bg-dataset-option-card-purple-gradient' @@ -99,7 +106,7 @@ const RetrievalMethodConfig: FC = ({ /> )} - {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( + {supportRetrievalMethods.includes(RETRIEVE_METHOD.hybrid) && ( } title={
diff --git a/web/app/components/datasets/common/retrieval-param-config/index.tsx b/web/app/components/datasets/common/retrieval-param-config/index.tsx index 5136ac1659159d..e75e7e028aa10e 100644 --- a/web/app/components/datasets/common/retrieval-param-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-param-config/index.tsx @@ -2,6 +2,7 @@ import type { FC } from 'react' import React, { useCallback } from 'react' import { useTranslation } from 'react-i18next' +import { useBoolean } from 'ahooks' import Image from 'next/image' import ProgressIndicator from '../../create/assets/progress-indicator.svg' @@ -39,6 +40,7 @@ const RetrievalParamConfig: FC = ({ const { t } = useTranslation() const canToggleRerankModalEnable = type !== RETRIEVE_METHOD.hybrid const isEconomical = type === RETRIEVE_METHOD.invertedIndex + const isHybridSearch = type === RETRIEVE_METHOD.hybrid const { defaultModel: rerankDefaultModel, modelList: rerankModelList, @@ -56,12 +58,18 @@ const RetrievalParamConfig: FC = ({ : undefined, ) - const handleDisabledSwitchClick = useCallback(() => { - if (!currentModel) - Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) - }, [currentModel, rerankDefaultModel, t]) + const [rerankingEnable, { toggle: toggleRerankingEnable }] = useBoolean((isHybridSearch || currentModel) ? value.reranking_enable : false) - const isHybridSearch = type === RETRIEVE_METHOD.hybrid + const handleDisabledSwitchClick = useCallback((enable: boolean) => { + toggleRerankingEnable() + if (enable && !currentModel) + Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) + onChange({ + ...value, + reranking_enable: enable, + }) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [currentModel, rerankDefaultModel, onChange, value]) const rerankModel = (() => { if (value.reranking_model) { @@ -122,22 +130,11 @@ const RetrievalParamConfig: FC = ({
{canToggleRerankModalEnable && ( -
- { - onChange({ - ...value, - reranking_enable: v, - }) - }} - disabled={!currentModel} - /> -
+ )}
{t('common.modelProvider.rerankModel.key')} @@ -148,21 +145,23 @@ const RetrievalParamConfig: FC = ({ />
- { - onChange({ - ...value, - reranking_model: { - reranking_provider_name: v.provider, - reranking_model_name: v.model, - }, - }) - }} - /> + { + rerankingEnable && ( + { + onChange({ + ...value, + reranking_model: { + reranking_provider_name: v.provider, + reranking_model_name: v.model, + }, + }) + }} + /> + ) + }
)} { diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 0d7202967a5e9f..0742646b2f87bd 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -31,17 +31,17 @@ import LanguageSelect from './language-select' import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' import cn from '@/utils/classnames' import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DocumentItem, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' +import { ChunkingMode, DataSourceType, ProcessMode } from '@/models/datasets' import Button from '@/app/components/base/button' import FloatRightContainer from '@/app/components/base/float-right-container' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' import { type RetrievalConfig } from '@/types/app' -import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' +import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' import Toast from '@/app/components/base/toast' import type { NotionPage } from '@/models/common' import { DataSourceProvider } from '@/models/common' -import { ChunkingMode, DataSourceType, RerankingModeEnum } from '@/models/datasets' import { useDatasetDetailContext } from '@/context/dataset-detail' import I18n from '@/context/i18n' import { RETRIEVE_METHOD } from '@/types/app' @@ -90,17 +90,13 @@ type StepTwoProps = { onCancel?: () => void } -export enum SegmentType { - AUTO = 'automatic', - CUSTOM = 'custom', -} export enum IndexingType { QUALIFIED = 'high_quality', ECONOMICAL = 'economy', } const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' -const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500 +const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500 const DEFAULT_OVERLAP = 50 type ParentChildConfig = { @@ -162,12 +158,12 @@ const StepTwo = ({ const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type) const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type - const [segmentationType, setSegmentationType] = useState(SegmentType.CUSTOM) + const [segmentationType, setSegmentationType] = useState(ProcessMode.general) const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER) const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => { doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER)) }, []) - const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length + const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXIMUM_CHUNK_LENGTH) // default chunk length const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000) const [overlap, setOverlap] = useState(DEFAULT_OVERLAP) const [rules, setRules] = useState([]) @@ -198,7 +194,6 @@ const StepTwo = ({ ) // QA Related - const [isLanguageSelectDisabled, _setIsLanguageSelectDisabled] = useState(false) const [isQAConfirmDialogOpen, setIsQAConfirmDialogOpen] = useState(false) const [docForm, setDocForm] = useState( (datasetId && documentDetail) ? documentDetail.doc_form as ChunkingMode : ChunkingMode.text, @@ -348,7 +343,7 @@ const StepTwo = ({ } const updatePreview = () => { - if (segmentationType === SegmentType.CUSTOM && maxChunkLength > 4000) { + if (segmentationType === ProcessMode.general && maxChunkLength > 4000) { Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') }) return } @@ -373,13 +368,25 @@ const StepTwo = ({ model: defaultEmbeddingModel?.model || '', }, ) + const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || { + search_method: RETRIEVE_METHOD.semantic, + reranking_enable: false, + reranking_model: { + reranking_provider_name: rerankDefaultModel?.provider.provider, + reranking_model_name: rerankDefaultModel?.model, + }, + top_k: 3, + score_threshold_enabled: false, + score_threshold: 0.5, + } as RetrievalConfig) + const getCreationParams = () => { let params - if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) { + if (segmentationType === ProcessMode.general && overlap > maxChunkLength) { Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') }) return } - if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) { + if (segmentationType === ProcessMode.general && maxChunkLength > limitMaxChunkLength) { Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) }) return } @@ -389,7 +396,6 @@ const StepTwo = ({ doc_form: currentDocForm, doc_language: docLanguage, process_rule: getProcessRule(), - // eslint-disable-next-line @typescript-eslint/no-use-before-define retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page. embedding_model: embeddingModel.model, // Readonly embedding_model_provider: embeddingModel.provider, // Readonly @@ -403,7 +409,6 @@ const StepTwo = ({ rerankDefaultModel, isRerankDefaultModelValid: !!isRerankDefaultModelValid, rerankModelList, - // eslint-disable-next-line @typescript-eslint/no-use-before-define retrievalConfig, indexMethod: indexMethod as string, }) @@ -411,16 +416,6 @@ const StepTwo = ({ Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') }) return } - const postRetrievalConfig = ensureRerankModelSelected({ - rerankDefaultModel: rerankDefaultModel!, - retrievalConfig: { - // eslint-disable-next-line @typescript-eslint/no-use-before-define - ...retrievalConfig, - // eslint-disable-next-line @typescript-eslint/no-use-before-define - reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel, - }, - indexMethod: indexMethod as string, - }) params = { data_source: { type: dataSourceType, @@ -432,8 +427,7 @@ const StepTwo = ({ process_rule: getProcessRule(), doc_form: currentDocForm, doc_language: docLanguage, - - retrieval_model: postRetrievalConfig, + retrieval_model: retrievalConfig, embedding_model: embeddingModel.model, embedding_model_provider: embeddingModel.provider, } as CreateDocumentReq @@ -490,7 +484,6 @@ const StepTwo = ({ const getDefaultMode = () => { if (documentDetail) - // @ts-expect-error fix after api refactored setSegmentationType(documentDetail.dataset_process_rule.mode) } @@ -525,7 +518,6 @@ const StepTwo = ({ onSuccess(data) { updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) updateResultCache && updateResultCache(data) - // eslint-disable-next-line @typescript-eslint/no-use-before-define updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string) }, }, @@ -574,18 +566,6 @@ const StepTwo = ({ setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL) }, [isAPIKeySet, indexingType, datasetId]) - const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || { - search_method: RETRIEVE_METHOD.semantic, - reranking_enable: false, - reranking_model: { - reranking_provider_name: rerankDefaultModel?.provider.provider, - reranking_model_name: rerankDefaultModel?.model, - }, - top_k: 3, - score_threshold_enabled: false, - score_threshold: 0.5, - } as RetrievalConfig) - const economyDomRef = useRef(null) const isHoveringEconomy = useHover(economyDomRef) diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index 19d31fc32a51cc..00ccdfddce8c08 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -561,18 +561,14 @@ const DocumentList: FC = ({
-
- - {doc?.data_source_type === DataSourceType.NOTION && - } +
+
+ {doc?.data_source_type === DataSourceType.NOTION && } {doc?.data_source_type === DataSourceType.FILE && } - {doc?.data_source_type === DataSourceType.WEB && - } - { - doc.name - } - -
+ {doc?.data_source_type === DataSourceType.WEB && } +
+ {doc.name} +
diff --git a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx index 2790ea4d8b9207..ba733c7faedcab 100644 --- a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx +++ b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx @@ -9,9 +9,8 @@ import type { RetrievalConfig } from '@/types/app' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' import Button from '@/app/components/base/button' -import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' +import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks' -import { RerankingModeEnum } from '@/models/datasets' type Props = { indexMethod: string @@ -56,14 +55,7 @@ const ModifyRetrievalModal: FC = ({ Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') }) return } - onSave(ensureRerankModelSelected({ - rerankDefaultModel: rerankDefaultModel!, - retrievalConfig: { - ...retrievalConfig, - reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel, - }, - indexMethod, - })) + onSave(retrievalConfig) } if (!isShow) diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx index 5f6fc00eb7e89e..7b641253345e71 100644 --- a/web/app/components/datasets/settings/form/index.tsx +++ b/web/app/components/datasets/settings/form/index.tsx @@ -17,11 +17,11 @@ import Input from '@/app/components/base/input' import Textarea from '@/app/components/base/textarea' import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development' import { updateDatasetSetting } from '@/service/datasets' -import { type DataSetListResponse, RerankingModeEnum } from '@/models/datasets' +import { type DataSetListResponse } from '@/models/datasets' import DatasetDetailContext from '@/context/dataset-detail' import { type RetrievalConfig } from '@/types/app' import { useAppContext } from '@/context/app-context' -import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' +import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector' import { useModelList, @@ -119,17 +119,9 @@ const Form = () => { notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') }) return } - const postRetrievalConfig = ensureRerankModelSelected({ - rerankDefaultModel: rerankDefaultModel!, - retrievalConfig: { - ...retrievalConfig, - reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel, - }, - indexMethod, - }) - if (postRetrievalConfig.weights) { - postRetrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || '' - postRetrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || '' + if (retrievalConfig.weights) { + retrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || '' + retrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || '' } try { setLoading(true) @@ -141,8 +133,8 @@ const Form = () => { permission, indexing_technique: indexMethod, retrieval_model: { - ...postRetrievalConfig, - score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0, + ...retrievalConfig, + score_threshold: retrievalConfig.score_threshold_enabled ? retrievalConfig.score_threshold : 0, }, embedding_model: embeddingModel.model, embedding_model_provider: embeddingModel.provider, diff --git a/web/i18n/en-US/app-debug.ts b/web/i18n/en-US/app-debug.ts index 266da820a0dad2..9c1b8838715bb6 100644 --- a/web/i18n/en-US/app-debug.ts +++ b/web/i18n/en-US/app-debug.ts @@ -483,7 +483,7 @@ const translation = { title: 'Multi-path retrieval', description: 'Based on user intent, queries across all Knowledge, retrieves relevant text from multi-sources, and selects the best results matching the user query after reranking. ', }, - rerankModelRequired: 'Rerank model is required', + rerankModelRequired: 'A configured Rerank Model is required', params: 'Params', top_k: 'Top K', top_kTip: 'Used to filter chunks that are most similar to user questions. The system will also dynamically adjust the value of Top K, according to max_tokens of the selected model.', diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts index fab25fa50958e2..e8332804c1d63e 100644 --- a/web/i18n/en-US/workflow.ts +++ b/web/i18n/en-US/workflow.ts @@ -181,7 +181,7 @@ const translation = { }, errorMsg: { fieldRequired: '{{field}} is required', - rerankModelRequired: 'Before turning on the Rerank Model, please confirm that the model has been successfully configured in the settings.', + rerankModelRequired: 'A configured Rerank Model is required', authRequired: 'Authorization is required', invalidJson: '{{field}} is invalid JSON', fields: { diff --git a/web/i18n/zh-Hans/app-debug.ts b/web/i18n/zh-Hans/app-debug.ts index 4e3f18ad7f2ab8..ca75b26a4f74fd 100644 --- a/web/i18n/zh-Hans/app-debug.ts +++ b/web/i18n/zh-Hans/app-debug.ts @@ -475,7 +475,7 @@ const translation = { title: '多路召回', description: '根据用户意图同时匹配所有知识库,从多路知识库查询相关文本片段,经过重排序步骤,从多路查询结果中选择匹配用户问题的最佳结果。', }, - rerankModelRequired: '请选择 Rerank 模型', + rerankModelRequired: '请选择可用的 Rerank 模型', params: '参数设置', top_k: 'Top K', top_kTip: '用于筛选与用户问题相似度最高的文本片段。系统同时会根据选用模型上下文窗口大小动态调整分段数量。', diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts index dfad9208e73f4b..a1bbdca71318d7 100644 --- a/web/i18n/zh-Hans/workflow.ts +++ b/web/i18n/zh-Hans/workflow.ts @@ -181,7 +181,7 @@ const translation = { }, errorMsg: { fieldRequired: '{{field}} 不能为空', - rerankModelRequired: '开启 Rerank 模型前,请务必确认模型已在设置中成功配置。', + rerankModelRequired: '请选择可用的 Rerank 模型', authRequired: '请先授权', invalidJson: '{{field}} 是非法的 JSON', fields: { From 5b150a071ba8db535ba1bccd0faad421dc7cd009 Mon Sep 17 00:00:00 2001 From: NFish Date: Fri, 27 Dec 2024 15:42:19 +0800 Subject: [PATCH 05/19] fix: update Score component, return null if score is unavailable --- .../datasets/hit-testing/components/chunk-detail-modal.tsx | 2 +- .../components/datasets/hit-testing/components/score.tsx | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx b/web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx index 8d49cef3d0ef09..fe2f2b8f36093f 100644 --- a/web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx +++ b/web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx @@ -56,7 +56,7 @@ const ChunkDetailModal: FC = ({
-
+
{content}
{!isParentChildRetrieval && keywords && keywords.length > 0 && ( diff --git a/web/app/components/datasets/hit-testing/components/score.tsx b/web/app/components/datasets/hit-testing/components/score.tsx index 115141eaaa0406..c77ba23c2aa889 100644 --- a/web/app/components/datasets/hit-testing/components/score.tsx +++ b/web/app/components/datasets/hit-testing/components/score.tsx @@ -4,7 +4,7 @@ import React from 'react' import cn from '@/utils/classnames' type Props = { - value: number + value: number | null besideChunkName?: boolean } @@ -12,12 +12,14 @@ const Score: FC = ({ value, besideChunkName, }) => { + if (!value || isNaN(value)) + return null return (
score
-
{value.toFixed(2)}
+
{value?.toFixed(2)}
) From c09b46d4e9dae4a7f6f0af72189f8415bd2aefd2 Mon Sep 17 00:00:00 2001 From: NFish Date: Fri, 27 Dec 2024 16:04:16 +0800 Subject: [PATCH 06/19] Update build-push.yml add fix/parent-child-retrieval --- .github/workflows/build-push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 8e5279fb67659b..0c6ed8a7fb61dd 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -5,6 +5,7 @@ on: branches: - "main" - "deploy/dev" + - "fix/parent-child-retrieval" release: types: [published] From b0e302c79c41464d91c90d072f3065d2fa170557 Mon Sep 17 00:00:00 2001 From: NFish Date: Fri, 27 Dec 2024 16:06:06 +0800 Subject: [PATCH 07/19] test:format code, trigger build --- web/app/components/datasets/hit-testing/components/score.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/app/components/datasets/hit-testing/components/score.tsx b/web/app/components/datasets/hit-testing/components/score.tsx index c77ba23c2aa889..76914318e01141 100644 --- a/web/app/components/datasets/hit-testing/components/score.tsx +++ b/web/app/components/datasets/hit-testing/components/score.tsx @@ -15,7 +15,8 @@ const Score: FC = ({ if (!value || isNaN(value)) return null return ( -
+
score
From d1bb8eab824d60489573c7bd3fbc64da12cacf42 Mon Sep 17 00:00:00 2001 From: nite-knite Date: Sat, 28 Dec 2024 21:50:13 +0800 Subject: [PATCH 08/19] feat: add parent-child related parameters to dataset API doc --- .../datasets/template/template.en.mdx | 35 ++++++++++++++++- .../datasets/template/template.zh.mdx | 39 +++++++++++++++++-- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index d3dcfc4b24d598..f2db83e47ec727 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -52,6 +52,15 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - high_quality High quality: embedding using embedding model, built as vector database index - economy Economy: Build using inverted index of keyword table index + + Format of indexed content + - text_model Text documents are directly embedded; `economy` mode defaults to using this form + - hierarchical_model Parent-child mode + - qa_model Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions + + + In Q&A mode, specify the language of the document, for example: English, Chinese + Processing rules - mode (string) Cleaning, segmentation mode, automatic / custom @@ -65,6 +74,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 + - parent_mode Retrieval mode of parent chunks: full-doc full text retrieval / paragraph paragraph retrieval + - subchunk_segmentation (object) Child chunk rules + - separator Segmentation identifier. Currently, only one delimiter is allowed. The default is *** + - max_tokens The maximum length (tokens) must be validated to be shorter than the length of the parent chunk @@ -155,6 +168,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - high_quality High quality: embedding using embedding model, built as vector database index - economy Economy: Build using inverted index of keyword table index + - doc_form Format of indexed content + - text_model Text documents are directly embedded; `economy` mode defaults to using this form + - hierarchical_model Parent-child mode + - qa_model Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions + + - doc_language In Q&A mode, specify the language of the document, for example: English, Chinese + - process_rule Processing rules - mode (string) Cleaning, segmentation mode, automatic / custom - rules (object) Custom rules (in automatic mode, this field is empty) @@ -167,6 +187,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 + - parent_mode Retrieval mode of parent chunks: full-doc full text retrieval / paragraph paragraph retrieval + - subchunk_segmentation (object) Child chunk rules + - separator Segmentation identifier. Currently, only one delimiter is allowed. The default is *** + - max_tokens The maximum length (tokens) must be validated to be shorter than the length of the parent chunk Files that need to be uploaded. @@ -449,6 +473,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 + - parent_mode Retrieval mode of parent chunks: full-doc full text retrieval / paragraph paragraph retrieval + - subchunk_segmentation (object) Child chunk rules + - separator Segmentation identifier. Currently, only one delimiter is allowed. The default is *** + - max_tokens The maximum length (tokens) must be validated to be shorter than the length of the parent chunk @@ -546,6 +574,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 + - parent_mode Retrieval mode of parent chunks: full-doc full text retrieval / paragraph paragraph retrieval + - subchunk_segmentation (object) Child chunk rules + - separator Segmentation identifier. Currently, only one delimiter is allowed. The default is *** + - max_tokens The maximum length (tokens) must be validated to be shorter than the length of the parent chunk @@ -984,7 +1016,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from @@ -1009,6 +1041,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - answer (text) Answer content, passed if the knowledge is in Q&A mode (optional) - keywords (list) Keyword (optional) - enabled (bool) False / true (optional) + - regenerate_child_chunks (bool) Whether to regenerate child chunks (optional) diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index db15ede9fcabf2..24418dea579cfb 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -52,6 +52,15 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - economy 经济:使用 keyword table index 的倒排索引进行构建 + + 索引内容的形式 + - text_model text 文档直接 embedding,经济模式默认为该模式 + - hierarchical_model parent-child 模式 + - qa_model Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding + + + 在 Q&A 模式下,指定文档的语言,例如:EnglishChinese + 处理规则 - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 @@ -63,8 +72,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - remove_urls_emails 删除 URL、电子邮件地址 - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - segmentation (object) 分段规则 - - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n + - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - max_tokens 最大长度(token)默认为 1000 + - parent_mode 父分段的召回模式 full-doc 全文召回 / paragraph 段落召回 + - subchunk_segmentation (object) 子分段规则 + - separator 分段标识符,目前仅允许设置一个分隔符。默认为 *** + - max_tokens 最大长度 (token) 需要校验小于父级的长度 @@ -155,6 +168,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - economy 经济:使用 keyword table index 的倒排索引进行构建 + - doc_form 索引内容的形式 + - text_model text 文档直接 embedding,经济模式默认为该模式 + - hierarchical_model parent-child 模式 + - qa_model Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding + + - doc_language 在 Q&A 模式下,指定文档的语言,例如:EnglishChinese + - process_rule 处理规则 - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 - rules (object) 自定义规则(自动模式下,该字段为空) @@ -167,6 +187,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - segmentation (object) 分段规则 - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - max_tokens 最大长度(token)默认为 1000 + - parent_mode 父分段的召回模式 full-doc 全文召回 / paragraph 段落召回 + - subchunk_segmentation (object) 子分段规则 + - separator 分段标识符,目前仅允许设置一个分隔符。默认为 *** + - max_tokens 最大长度 (token) 需要校验小于父级的长度 需要上传的文件。 @@ -411,7 +435,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from @@ -449,6 +473,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - segmentation (object) 分段规则 - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - max_tokens 最大长度(token)默认为 1000 + - parent_mode 父分段的召回模式 full-doc 全文召回 / paragraph 段落召回 + - subchunk_segmentation (object) 子分段规则 + - separator 分段标识符,目前仅允许设置一个分隔符。默认为 *** + - max_tokens 最大长度 (token) 需要校验小于父级的长度 @@ -508,7 +536,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from @@ -546,6 +574,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - segmentation (object) 分段规则 - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - max_tokens 最大长度(token)默认为 1000 + - parent_mode 父分段的召回模式 full-doc 全文召回 / paragraph 段落召回 + - subchunk_segmentation (object) 子分段规则 + - separator 分段标识符,目前仅允许设置一个分隔符。默认为 *** + - max_tokens 最大长度 (token) 需要校验小于父级的长度 @@ -1009,6 +1041,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - answer (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值 - keywords (list) 关键字,非必填 - enabled (bool) false/true,非必填 + - regenerate_child_chunks (bool) 是否重新生成子分段,非必填 From 91e814d427c16d2d589befa2de5fdd5127c61bb5 Mon Sep 17 00:00:00 2001 From: twwu Date: Mon, 30 Dec 2024 09:56:22 +0800 Subject: [PATCH 09/19] refactor: modify retrieval model configuration in settings modal --- .../dataset-config/settings-modal/index.tsx | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx index 7a347a1899511f..2879e34790bc6e 100644 --- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx +++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx @@ -12,7 +12,7 @@ import Divider from '@/app/components/base/divider' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import Textarea from '@/app/components/base/textarea' -import { type DataSet, RerankingModeEnum } from '@/models/datasets' +import { type DataSet } from '@/models/datasets' import { useToastContext } from '@/app/components/base/toast' import { updateDatasetSetting } from '@/service/datasets' import { useAppContext } from '@/context/app-context' @@ -21,7 +21,7 @@ import type { RetrievalConfig } from '@/types/app' import RetrievalSettings from '@/app/components/datasets/external-knowledge-base/create/RetrievalSettings' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' -import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' +import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback' import PermissionSelector from '@/app/components/datasets/settings/permission-selector' import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector' @@ -109,14 +109,6 @@ const SettingsModal: FC = ({ notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') }) return } - const postRetrievalConfig = ensureRerankModelSelected({ - rerankDefaultModel: rerankDefaultModel!, - retrievalConfig: { - ...retrievalConfig, - reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel, - }, - indexMethod, - }) try { setLoading(true) const { id, name, description, permission } = localeCurrentDataset @@ -128,8 +120,8 @@ const SettingsModal: FC = ({ permission, indexing_technique: indexMethod, retrieval_model: { - ...postRetrievalConfig, - score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0, + ...retrievalConfig, + score_threshold: retrievalConfig.score_threshold_enabled ? retrievalConfig.score_threshold : 0, }, embedding_model: localeCurrentDataset.embedding_model, embedding_model_provider: localeCurrentDataset.embedding_model_provider, @@ -157,7 +149,7 @@ const SettingsModal: FC = ({ onSave({ ...localeCurrentDataset, indexing_technique: indexMethod, - retrieval_model_dict: postRetrievalConfig, + retrieval_model_dict: retrievalConfig, }) } catch (e) { From d876773c11fb7510e711adb210b5b01f1763930e Mon Sep 17 00:00:00 2001 From: twwu Date: Mon, 30 Dec 2024 12:00:33 +0800 Subject: [PATCH 10/19] fix: enhance rerank model validation and configuration --- .../params-config/config-content.tsx | 89 ++++++++----------- .../components/retrieval-config.tsx | 9 +- web/i18n/en-US/workflow.ts | 2 +- web/service/knowledge/use-document.ts | 4 +- 4 files changed, 47 insertions(+), 57 deletions(-) diff --git a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx index dcb2b1a3fd5e46..fdd45ff9cedd02 100644 --- a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx +++ b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx @@ -3,6 +3,7 @@ import { memo, useCallback, useEffect, useMemo } from 'react' import type { FC } from 'react' import { useTranslation } from 'react-i18next' +import { useBoolean } from 'ahooks' import WeightedScore from './weighted-score' import TopKItem from '@/app/components/base/param-item/top-k-item' import ScoreThresholdItem from '@/app/components/base/param-item/score-threshold-item' @@ -60,7 +61,6 @@ const ConfigContent: FC = ({ const { modelList: rerankModelList, defaultModel: rerankDefaultModel, - currentModel: isRerankDefaultModelValid, } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank) const { @@ -162,31 +162,29 @@ const ConfigContent: FC = ({ const canManuallyToggleRerank = useMemo(() => { return (selectedDatasetsMode.allInternal && selectedDatasetsMode.allEconomic) - || selectedDatasetsMode.allExternal + || selectedDatasetsMode.allExternal }, [selectedDatasetsMode.allEconomic, selectedDatasetsMode.allExternal, selectedDatasetsMode.allInternal]) const showRerankModel = useMemo(() => { if (!canManuallyToggleRerank) return true - else if (canManuallyToggleRerank && !isRerankDefaultModelValid) - return false return datasetConfigs.reranking_enable - }, [canManuallyToggleRerank, datasetConfigs.reranking_enable, isRerankDefaultModelValid]) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [canManuallyToggleRerank]) - const handleDisabledSwitchClick = useCallback(() => { - if (!currentRerankModel && !showRerankModel) - Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) - }, [currentRerankModel, showRerankModel, t]) + const [rerankingEnable, { toggle: toggleRerankingEnable }] = useBoolean(showRerankModel) - useEffect(() => { - if (canManuallyToggleRerank && showRerankModel !== datasetConfigs.reranking_enable) { - onChange({ - ...datasetConfigs, - reranking_enable: showRerankModel, - }) - } - }, [canManuallyToggleRerank, showRerankModel, datasetConfigs, onChange]) + const handleDisabledSwitchClick = useCallback((enable: boolean) => { + toggleRerankingEnable() + if (!currentRerankModel && enable) + Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) + onChange({ + ...datasetConfigs, + reranking_enable: enable, + }) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [currentRerankModel, datasetConfigs, onChange]) return (
@@ -267,24 +265,12 @@ const ConfigContent: FC = ({
{ selectedDatasetsMode.allEconomic && !selectedDatasetsMode.mixtureInternalAndExternal && ( -
- { - if (canManuallyToggleRerank) { - onChange({ - ...datasetConfigs, - reranking_enable: v, - }) - } - }} - /> -
+ ) }
{t('common.modelProvider.rerankModel.key')}
@@ -298,21 +284,24 @@ const ConfigContent: FC = ({ triggerClassName='ml-1 w-4 h-4' />
-
- { - onChange({ - ...datasetConfigs, - reranking_model: { - reranking_provider_name: v.provider, - reranking_model_name: v.model, - }, - }) - }} - modelList={rerankModelList} - /> -
+ { + rerankingEnable && ( +
+ { + onChange({ + ...datasetConfigs, + reranking_model: { + reranking_provider_name: v.provider, + reranking_model_name: v.model, + }, + }) + }} + modelList={rerankModelList} + /> +
+ )}
) } diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx index b335b62e339e7d..d3e20797335264 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx @@ -59,7 +59,8 @@ const RetrievalConfig: FC = ({ }, [onOpenFromPropsChange]) const { - defaultModel: rerankDefaultModel, + currentProvider: validRerankDefaultProvider, + currentModel: validRerankDefaultModel, } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank) const { multiple_retrieval_config } = payload @@ -75,8 +76,8 @@ const RetrievalConfig: FC = ({ ? undefined : (!configs.reranking_model?.reranking_provider_name ? { - provider: rerankDefaultModel?.provider?.provider || '', - model: rerankDefaultModel?.model || '', + provider: validRerankDefaultProvider?.provider || '', + model: validRerankDefaultModel?.model || '', } : { provider: configs.reranking_model?.reranking_provider_name, @@ -86,7 +87,7 @@ const RetrievalConfig: FC = ({ weights: configs.weights as any, reranking_enable: configs.reranking_enable, }) - }, [onMultipleRetrievalConfigChange, payload.retrieval_mode, rerankDefaultModel?.provider?.provider, rerankDefaultModel?.model, onRetrievalModeChange]) + }, [onMultipleRetrievalConfigChange, payload.retrieval_mode, validRerankDefaultProvider, validRerankDefaultModel, onRetrievalModeChange]) return ( { }) } -const useDocumentDetailKey = [NAME_SPACE, 'documentDetail'] +const useDocumentDetailKey = [NAME_SPACE, 'documentDetail', 'withoutMetaData'] export const useDocumentDetail = (payload: { datasetId: string documentId: string @@ -118,7 +118,7 @@ export const useDocumentMetadata = (payload: { }) => { const { datasetId, documentId, params } = payload return useQuery({ - queryKey: [...useDocumentDetailKey, 'withMetaData', datasetId, documentId], + queryKey: [...useDocumentDetailKey, 'onlyMetaData', datasetId, documentId], queryFn: () => get(`/datasets/${datasetId}/documents/${documentId}`, { params }), }) } From 14d34e6b44cc4e2c2eaea267ef02a446575fa757 Mon Sep 17 00:00:00 2001 From: twwu Date: Mon, 30 Dec 2024 12:38:22 +0800 Subject: [PATCH 11/19] fix: simplify click handler in ResultItem component --- .../datasets/hit-testing/components/result-item.tsx | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/web/app/components/datasets/hit-testing/components/result-item.tsx b/web/app/components/datasets/hit-testing/components/result-item.tsx index 0f9d52ec9f9685..3c8c146d530356 100644 --- a/web/app/components/datasets/hit-testing/components/result-item.tsx +++ b/web/app/components/datasets/hit-testing/components/result-item.tsx @@ -43,13 +43,8 @@ const ResultItem: FC = ({ setFalse: hideDetailModal, }] = useBoolean(false) - const handleClickCard = () => { - if (!isParentChildRetrieval) - showDetailModal() - } - return ( -
+
{/* Meta info */}
From 5c45ff7967081fa4a76eb79eaffa89556e13126d Mon Sep 17 00:00:00 2001 From: twwu Date: Mon, 30 Dec 2024 16:46:26 +0800 Subject: [PATCH 12/19] fix: fix layout responsiveness issue with flex box --- .../datasets/documents/detail/completed/segment-list.tsx | 2 +- web/app/components/datasets/documents/detail/index.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/web/app/components/datasets/documents/detail/completed/segment-list.tsx b/web/app/components/datasets/documents/detail/completed/segment-list.tsx index c31345ff3be977..885db49db86909 100644 --- a/web/app/components/datasets/documents/detail/completed/segment-list.tsx +++ b/web/app/components/datasets/documents/detail/completed/segment-list.tsx @@ -80,7 +80,7 @@ ref: ForwardedRef, checked={selectedSegmentIds.includes(segItem.id)} onCheck={() => onSelected(segItem.id)} /> -
+
= ({ datasetId, documentId }) => {
{isDetailLoading ? - :
{embedding From 6f1a7fdf540e6b27827897b6297d4ab14f03a69a Mon Sep 17 00:00:00 2001 From: twwu Date: Tue, 31 Dec 2024 10:13:19 +0800 Subject: [PATCH 13/19] fix: update translation for rerank model requirement and clean up SWR hook --- web/app/components/datasets/documents/index.tsx | 2 +- web/i18n/zh-Hans/app-debug.ts | 2 +- web/i18n/zh-Hans/workflow.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/web/app/components/datasets/documents/index.tsx b/web/app/components/datasets/documents/index.tsx index 7365ff98507acc..d5d8f329b0c6f1 100644 --- a/web/app/components/datasets/documents/index.tsx +++ b/web/app/components/datasets/documents/index.tsx @@ -99,7 +99,7 @@ const Documents: FC = ({ datasetId }) => { return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' } }, [currPage, debouncedSearchValue, isDataSourceNotion, limit]) - const { data: documentsRes, error, mutate, isLoading: isListLoading } = useSWR( + const { data: documentsRes, mutate, isLoading: isListLoading } = useSWR( { action: 'fetchDocuments', datasetId, diff --git a/web/i18n/zh-Hans/app-debug.ts b/web/i18n/zh-Hans/app-debug.ts index ca75b26a4f74fd..14f1358dd64c7a 100644 --- a/web/i18n/zh-Hans/app-debug.ts +++ b/web/i18n/zh-Hans/app-debug.ts @@ -475,7 +475,7 @@ const translation = { title: '多路召回', description: '根据用户意图同时匹配所有知识库,从多路知识库查询相关文本片段,经过重排序步骤,从多路查询结果中选择匹配用户问题的最佳结果。', }, - rerankModelRequired: '请选择可用的 Rerank 模型', + rerankModelRequired: '未配置 Rerank 模型', params: '参数设置', top_k: 'Top K', top_kTip: '用于筛选与用户问题相似度最高的文本片段。系统同时会根据选用模型上下文窗口大小动态调整分段数量。', diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts index f95b6d48be9393..93ebda4ce9aca7 100644 --- a/web/i18n/zh-Hans/workflow.ts +++ b/web/i18n/zh-Hans/workflow.ts @@ -183,7 +183,7 @@ const translation = { }, errorMsg: { fieldRequired: '{{field}} 不能为空', - rerankModelRequired: '请选择可用的 Rerank 模型', + rerankModelRequired: '未配置 Rerank 模型', authRequired: '请先授权', invalidJson: '{{field}} 是非法的 JSON', fields: { From 796a3278f452b323bd0b44dd4ea8e90cfacce518 Mon Sep 17 00:00:00 2001 From: twwu Date: Thu, 2 Jan 2025 10:29:41 +0800 Subject: [PATCH 14/19] fix: remove unused rerank model variables and improve retrieval config handling --- .../params-config/config-content.tsx | 46 +++----- .../dataset-config/params-config/index.tsx | 31 ++--- .../dataset-config/settings-modal/index.tsx | 2 - .../datasets/common/check-rerank-model.ts | 7 -- .../common/retrieval-method-config/index.tsx | 107 ++++++++++-------- .../common/retrieval-param-config/index.tsx | 43 +++---- .../datasets/create/step-two/index.tsx | 32 +++--- .../components/datasets/hit-testing/index.tsx | 2 +- .../hit-testing/modify-retrieval-modal.tsx | 4 - .../datasets/settings/form/index.tsx | 2 - .../nodes/knowledge-retrieval/use-config.ts | 2 +- .../nodes/knowledge-retrieval/utils.ts | 21 ++-- 12 files changed, 143 insertions(+), 156 deletions(-) diff --git a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx index fdd45ff9cedd02..3744c6a56b2787 100644 --- a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx +++ b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx @@ -3,7 +3,6 @@ import { memo, useCallback, useEffect, useMemo } from 'react' import type { FC } from 'react' import { useTranslation } from 'react-i18next' -import { useBoolean } from 'ahooks' import WeightedScore from './weighted-score' import TopKItem from '@/app/components/base/param-item/top-k-item' import ScoreThresholdItem from '@/app/components/base/param-item/score-threshold-item' @@ -60,35 +59,24 @@ const ConfigContent: FC = ({ const { modelList: rerankModelList, - defaultModel: rerankDefaultModel, } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank) const { currentModel: currentRerankModel, } = useCurrentProviderAndModel( rerankModelList, - rerankDefaultModel - ? { - ...rerankDefaultModel, - provider: rerankDefaultModel.provider.provider, - } - : undefined, + { + provider: datasetConfigs.reranking_model?.reranking_provider_name, + model: datasetConfigs.reranking_model?.reranking_model_name, + }, ) - const rerankModel = (() => { - if (datasetConfigs.reranking_model?.reranking_provider_name) { - return { - provider_name: datasetConfigs.reranking_model.reranking_provider_name, - model_name: datasetConfigs.reranking_model.reranking_model_name, - } + const rerankModel = useMemo(() => { + return { + provider_name: datasetConfigs?.reranking_model?.reranking_provider_name ?? '', + model_name: datasetConfigs?.reranking_model?.reranking_model_name ?? '', } - else if (rerankDefaultModel) { - return { - provider_name: rerankDefaultModel.provider.provider, - model_name: rerankDefaultModel.model, - } - } - })() + }, [datasetConfigs.reranking_model]) const handleParamChange = (key: string, value: number) => { if (key === 'top_k') { @@ -133,6 +121,12 @@ const ConfigContent: FC = ({ } const handleRerankModeChange = (mode: RerankingModeEnum) => { + if (mode === datasetConfigs.reranking_mode) + return + + if (mode === RerankingModeEnum.RerankingModel && !currentRerankModel) + Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) + onChange({ ...datasetConfigs, reranking_mode: mode, @@ -170,13 +164,9 @@ const ConfigContent: FC = ({ return true return datasetConfigs.reranking_enable - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [canManuallyToggleRerank]) - - const [rerankingEnable, { toggle: toggleRerankingEnable }] = useBoolean(showRerankModel) + }, [datasetConfigs.reranking_enable, canManuallyToggleRerank]) const handleDisabledSwitchClick = useCallback((enable: boolean) => { - toggleRerankingEnable() if (!currentRerankModel && enable) Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) onChange({ @@ -267,7 +257,7 @@ const ConfigContent: FC = ({ selectedDatasetsMode.allEconomic && !selectedDatasetsMode.mixtureInternalAndExternal && ( @@ -285,7 +275,7 @@ const ConfigContent: FC = ({ />
{ - rerankingEnable && ( + showRerankModel && (
{ let errMsg = '' if (tempDataSetConfigs.retrieval_model === RETRIEVE_TYPE.multiWay) { if (tempDataSetConfigs.reranking_enable && tempDataSetConfigs.reranking_mode === RerankingModeEnum.RerankingModel - && !isRerankDefaultModelValid + && !isCurrentRerankModelValid ) errMsg = t('appDebug.datasetConfig.rerankModelRequired') } @@ -66,16 +76,7 @@ const ParamsConfig = ({ const handleSave = () => { if (!isValid()) return - const config = { ...tempDataSetConfigs } - if (config.retrieval_model === RETRIEVE_TYPE.multiWay - && config.reranking_mode === RerankingModeEnum.RerankingModel - && !config.reranking_model) { - config.reranking_model = { - reranking_provider_name: rerankDefaultModel?.provider?.provider, - reranking_model_name: rerankDefaultModel?.model, - } as any - } - setDatasetConfigs(config) + setDatasetConfigs(tempDataSetConfigs) setRerankSettingModalOpen(false) } @@ -94,7 +95,7 @@ const ParamsConfig = ({ reranking_enable: restConfigs.reranking_enable, }, selectedDatasets, selectedDatasets, { provider: rerankDefaultProvider?.provider, - model: isRerankDefaultModelValid?.model, + model: rerankDefaultModel?.model, }) setTempDataSetConfigs({ diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx index 2879e34790bc6e..506406cfe08c2a 100644 --- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx +++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx @@ -99,8 +99,6 @@ const SettingsModal: FC = ({ } if ( !isReRankModelSelected({ - rerankDefaultModel, - isRerankDefaultModelValid: !!isRerankDefaultModelValid, rerankModelList, retrievalConfig, indexMethod, diff --git a/web/app/components/datasets/common/check-rerank-model.ts b/web/app/components/datasets/common/check-rerank-model.ts index c78ffa02971b98..ccb8c45a094d67 100644 --- a/web/app/components/datasets/common/check-rerank-model.ts +++ b/web/app/components/datasets/common/check-rerank-model.ts @@ -6,14 +6,10 @@ import type { import { RerankingModeEnum } from '@/models/datasets' export const isReRankModelSelected = ({ - rerankDefaultModel, - isRerankDefaultModelValid, retrievalConfig, rerankModelList, indexMethod, }: { - rerankDefaultModel?: DefaultModelResponse - isRerankDefaultModelValid: boolean retrievalConfig: RetrievalConfig rerankModelList: Model[] indexMethod?: string @@ -25,9 +21,6 @@ export const isReRankModelSelected = ({ return provider?.models.find(({ model }) => model === retrievalConfig.reranking_model?.reranking_model_name) } - if (isRerankDefaultModelValid) - return !!rerankDefaultModel - return false })() diff --git a/web/app/components/datasets/common/retrieval-method-config/index.tsx b/web/app/components/datasets/common/retrieval-method-config/index.tsx index 7f9bdaa1712043..aee978cc53f621 100644 --- a/web/app/components/datasets/common/retrieval-method-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-method-config/index.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import React, { useMemo } from 'react' +import React, { useCallback } from 'react' import { useTranslation } from 'react-i18next' import Image from 'next/image' import RetrievalParamConfig from '../retrieval-param-config' @@ -10,7 +10,7 @@ import { retrievalIcon } from '../../create/icons' import type { RetrievalConfig } from '@/types/app' import { RETRIEVE_METHOD } from '@/types/app' import { useProviderContext } from '@/context/provider-context' -import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' +import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import { DEFAULT_WEIGHTED_SCORE, @@ -27,41 +27,70 @@ type Props = { const RetrievalMethodConfig: FC = ({ disabled = false, - value: passValue, + value, onChange, }) => { const { t } = useTranslation() const { supportRetrievalMethods } = useProviderContext() - const { data: rerankDefaultModel } = useDefaultModel(ModelTypeEnum.rerank) - const value = useMemo(() => { - return { - ...passValue, - ...(!passValue.reranking_model.reranking_model_name - ? { - reranking_model: { - reranking_provider_name: rerankDefaultModel?.provider.provider || '', - reranking_model_name: rerankDefaultModel?.model || '', - }, - } - : {}), - ...(passValue.search_method === RETRIEVE_METHOD.hybrid - ? { - reranking_mode: passValue.reranking_mode || (rerankDefaultModel ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore), - weights: passValue.weights || { - weight_type: WeightedScoreEnum.Customized, - vector_setting: { - vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic, - embedding_provider_name: '', - embedding_model_name: '', + const { + defaultModel: rerankDefaultModel, + currentModel: isRerankDefaultModelValid, + } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank) + + const onSwitch = useCallback((retrieveMethod: RETRIEVE_METHOD) => { + if ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrieveMethod)) { + onChange({ + ...value, + search_method: retrieveMethod, + ...(!value.reranking_model.reranking_model_name + ? { + reranking_model: { + reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '', + reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '', + }, + reranking_enable: !!isRerankDefaultModelValid, + } + : { + reranking_enable: true, + }), + }) + } + if (retrieveMethod === RETRIEVE_METHOD.hybrid) { + onChange({ + ...value, + search_method: retrieveMethod, + ...(!value.reranking_model.reranking_model_name + ? { + reranking_model: { + reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '', + reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '', }, - keyword_setting: { - keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword, + reranking_enable: !!isRerankDefaultModelValid, + reranking_mode: isRerankDefaultModelValid ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore, + } + : { + reranking_enable: true, + reranking_mode: RerankingModeEnum.RerankingModel, + }), + ...(!value.weights + ? { + weights: { + weight_type: WeightedScoreEnum.Customized, + vector_setting: { + vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic, + embedding_provider_name: '', + embedding_model_name: '', + }, + keyword_setting: { + keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword, + }, }, - }, - } - : {}), + } + : {}), + }) } - }, [passValue, rerankDefaultModel]) + }, [value, rerankDefaultModel, isRerankDefaultModelValid, onChange]) + return (
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( @@ -71,11 +100,7 @@ const RetrievalMethodConfig: FC = ({ isActive={ value.search_method === RETRIEVE_METHOD.semantic } - onSwitched={() => onChange({ - ...value, - search_method: RETRIEVE_METHOD.semantic, - reranking_enable: false, - })} + onSwitched={() => onSwitch(RETRIEVE_METHOD.semantic)} effectImg={Effect.src} activeHeaderClassName='bg-dataset-option-card-purple-gradient' > @@ -93,11 +118,7 @@ const RetrievalMethodConfig: FC = ({ isActive={ value.search_method === RETRIEVE_METHOD.fullText } - onSwitched={() => onChange({ - ...value, - search_method: RETRIEVE_METHOD.fullText, - reranking_enable: false, - })} + onSwitched={() => onSwitch(RETRIEVE_METHOD.fullText)} effectImg={Effect.src} activeHeaderClassName='bg-dataset-option-card-purple-gradient' > @@ -119,11 +140,7 @@ const RetrievalMethodConfig: FC = ({ description={t('dataset.retrieval.hybrid_search.description')} isActive={ value.search_method === RETRIEVE_METHOD.hybrid } - onSwitched={() => onChange({ - ...value, - search_method: RETRIEVE_METHOD.hybrid, - reranking_enable: true, - })} + onSwitched={() => onSwitch(RETRIEVE_METHOD.hybrid)} effectImg={Effect.src} activeHeaderClassName='bg-dataset-option-card-purple-gradient' > diff --git a/web/app/components/datasets/common/retrieval-param-config/index.tsx b/web/app/components/datasets/common/retrieval-param-config/index.tsx index e75e7e028aa10e..6ee87b3a1969a3 100644 --- a/web/app/components/datasets/common/retrieval-param-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-param-config/index.tsx @@ -1,8 +1,7 @@ 'use client' import type { FC } from 'react' -import React, { useCallback } from 'react' +import React, { useCallback, useMemo } from 'react' import { useTranslation } from 'react-i18next' -import { useBoolean } from 'ahooks' import Image from 'next/image' import ProgressIndicator from '../../create/assets/progress-indicator.svg' @@ -42,7 +41,6 @@ const RetrievalParamConfig: FC = ({ const isEconomical = type === RETRIEVE_METHOD.invertedIndex const isHybridSearch = type === RETRIEVE_METHOD.hybrid const { - defaultModel: rerankDefaultModel, modelList: rerankModelList, } = useModelListAndDefaultModel(ModelTypeEnum.rerank) @@ -50,18 +48,13 @@ const RetrievalParamConfig: FC = ({ currentModel, } = useCurrentProviderAndModel( rerankModelList, - rerankDefaultModel - ? { - ...rerankDefaultModel, - provider: rerankDefaultModel.provider.provider, - } - : undefined, + { + provider: value.reranking_model?.reranking_provider_name ?? '', + model: value.reranking_model?.reranking_model_name ?? '', + }, ) - const [rerankingEnable, { toggle: toggleRerankingEnable }] = useBoolean((isHybridSearch || currentModel) ? value.reranking_enable : false) - const handleDisabledSwitchClick = useCallback((enable: boolean) => { - toggleRerankingEnable() if (enable && !currentModel) Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) onChange({ @@ -69,22 +62,14 @@ const RetrievalParamConfig: FC = ({ reranking_enable: enable, }) // eslint-disable-next-line react-hooks/exhaustive-deps - }, [currentModel, rerankDefaultModel, onChange, value]) + }, [currentModel, onChange, value]) - const rerankModel = (() => { - if (value.reranking_model) { - return { - provider_name: value.reranking_model.reranking_provider_name, - model_name: value.reranking_model.reranking_model_name, - } - } - else if (rerankDefaultModel) { - return { - provider_name: rerankDefaultModel.provider.provider, - model_name: rerankDefaultModel.model, - } + const rerankModel = useMemo(() => { + return { + provider_name: value.reranking_model.reranking_provider_name, + model_name: value.reranking_model.reranking_model_name, } - })() + }, [value.reranking_model]) const handleChangeRerankMode = (v: RerankingModeEnum) => { if (v === value.reranking_mode) @@ -108,6 +93,8 @@ const RetrievalParamConfig: FC = ({ }, } } + if (v === RerankingModeEnum.RerankingModel && !currentModel) + Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') }) onChange(result) } @@ -132,7 +119,7 @@ const RetrievalParamConfig: FC = ({ {canToggleRerankModalEnable && ( )} @@ -146,7 +133,7 @@ const RetrievalParamConfig: FC = ({
{ - rerankingEnable && ( + value.reranking_enable && ( { + if (currentDataset?.retrieval_model_dict) + return + setRetrievalConfig({ + search_method: RETRIEVE_METHOD.semantic, + reranking_enable: !!isRerankDefaultModelValid, + reranking_model: { + reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider.provider ?? '' : '', + reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '', + }, + top_k: 3, + score_threshold_enabled: false, + score_threshold: 0.5, + }) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [rerankDefaultModel, isRerankDefaultModelValid]) + const getCreationParams = () => { let params if (segmentationType === ProcessMode.general && overlap > maxChunkLength) { @@ -406,8 +422,6 @@ const StepTwo = ({ const indexMethod = getIndexing_technique() if ( !isReRankModelSelected({ - rerankDefaultModel, - isRerankDefaultModelValid: !!isRerankDefaultModelValid, rerankModelList, retrievalConfig, indexMethod: indexMethod as string, @@ -537,14 +551,6 @@ const StepTwo = ({ isSetting && onSave && onSave() } - const changeToEconomicalType = () => { - if (docForm !== ChunkingMode.text) - return - - if (!hasSetIndexType) - setIndexType(IndexingType.ECONOMICAL) - } - useEffect(() => { // fetch rules if (!isSetting) { diff --git a/web/app/components/datasets/hit-testing/index.tsx b/web/app/components/datasets/hit-testing/index.tsx index 30be6fb7e71ec0..ccc200bbe6be87 100644 --- a/web/app/components/datasets/hit-testing/index.tsx +++ b/web/app/components/datasets/hit-testing/index.tsx @@ -192,7 +192,7 @@ const HitTesting: FC = ({ datasetId }: Props) => { }
- setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'> + setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'> = ({ const { modelList: rerankModelList, - defaultModel: rerankDefaultModel, - currentModel: isRerankDefaultModelValid, } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank) const handleSave = () => { if ( !isReRankModelSelected({ - rerankDefaultModel, - isRerankDefaultModelValid: !!isRerankDefaultModelValid, rerankModelList, retrievalConfig, indexMethod, diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx index 7b641253345e71..f14ffc671ac068 100644 --- a/web/app/components/datasets/settings/form/index.tsx +++ b/web/app/components/datasets/settings/form/index.tsx @@ -109,8 +109,6 @@ const Form = () => { } if ( !isReRankModelSelected({ - rerankDefaultModel, - isRerankDefaultModelValid: !!isRerankDefaultModelValid, rerankModelList, retrievalConfig, indexMethod, diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts index e90fe2c2ff26b2..6b09c611f820a3 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts @@ -156,7 +156,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => { }) setInputs(newInput) // eslint-disable-next-line react-hooks/exhaustive-deps - }, [currentProvider?.provider, currentModel, rerankDefaultModel]) + }, [currentProvider?.provider, currentModel, currentRerankModel, rerankDefaultModel]) const [selectedDatasets, setSelectedDatasets] = useState([]) const [rerankModelOpen, setRerankModelOpen] = useState(false) const handleRetrievalModeChange = useCallback((newMode: RETRIEVE_TYPE) => { diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts b/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts index 794fcbca4aa216..e9c857f393b94e 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts @@ -126,7 +126,7 @@ export const getMultipleRetrievalConfig = ( reranking_mode, reranking_model, weights, - reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : true, + reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : (selectedDatasets ?? []).length > 0, } const setDefaultWeights = () => { @@ -152,16 +152,16 @@ export const getMultipleRetrievalConfig = ( if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) { result.reranking_mode = RerankingModeEnum.RerankingModel - - if (rerankModelIsValid) { - result.reranking_mode = RerankingModeEnum.RerankingModel - result.reranking_model = { - provider: validRerankModel?.provider || '', - model: validRerankModel?.model || '', + if (!result.reranking_model?.provider || !result.reranking_model?.model) { + if (rerankModelIsValid) { + result.reranking_model = { + provider: validRerankModel?.provider || '', + model: validRerankModel?.model || '', + } + } + else { + result.reranking_model = undefined } - } - else { - result.reranking_model = undefined } } @@ -201,6 +201,7 @@ export const getMultipleRetrievalConfig = ( } } + console.log('🚀 ~ result:', result) return result } From fa60a3dea2841e3120183ccb67a39ae07be9419a Mon Sep 17 00:00:00 2001 From: twwu Date: Thu, 2 Jan 2025 11:52:53 +0800 Subject: [PATCH 15/19] fix: simplify model selector logic in retrieval parameter configuration --- .../components/datasets/common/retrieval-param-config/index.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/web/app/components/datasets/common/retrieval-param-config/index.tsx b/web/app/components/datasets/common/retrieval-param-config/index.tsx index 6ee87b3a1969a3..93d555a34d6b0f 100644 --- a/web/app/components/datasets/common/retrieval-param-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-param-config/index.tsx @@ -241,10 +241,8 @@ const RetrievalParamConfig: FC = ({ { value.reranking_mode !== RerankingModeEnum.WeightedScore && ( { onChange({ ...value, From 3432c87b26e69b1e562d419e89bdc8b3804b9212 Mon Sep 17 00:00:00 2001 From: twwu Date: Thu, 2 Jan 2025 14:45:12 +0800 Subject: [PATCH 16/19] fix: update reranking model structure for consistency in retrieval configuration --- .../dataset-config/params-config/index.tsx | 9 +---- .../components/app/configuration/index.tsx | 9 +---- .../datasets/settings/form/index.tsx | 2 - .../components/retrieval-config.tsx | 14 +++---- .../nodes/knowledge-retrieval/types.ts | 4 +- .../nodes/knowledge-retrieval/use-config.ts | 2 +- .../nodes/knowledge-retrieval/utils.ts | 39 +++++++++++++------ 7 files changed, 39 insertions(+), 40 deletions(-) diff --git a/web/app/components/app/configuration/dataset-config/params-config/index.tsx b/web/app/components/app/configuration/dataset-config/params-config/index.tsx index 52d485d85f0172..3e8a3e3cdd413a 100644 --- a/web/app/components/app/configuration/dataset-config/params-config/index.tsx +++ b/web/app/components/app/configuration/dataset-config/params-config/index.tsx @@ -86,10 +86,7 @@ const ParamsConfig = ({ const retrievalConfig = getMultipleRetrievalConfig({ top_k: restConfigs.top_k, score_threshold: restConfigs.score_threshold, - reranking_model: restConfigs.reranking_model && { - provider: restConfigs.reranking_model.reranking_provider_name, - model: restConfigs.reranking_model.reranking_model_name, - }, + reranking_model: restConfigs.reranking_model, reranking_mode: restConfigs.reranking_mode, weights: restConfigs.weights, reranking_enable: restConfigs.reranking_enable, @@ -100,10 +97,6 @@ const ParamsConfig = ({ setTempDataSetConfigs({ ...retrievalConfig, - reranking_model: restConfigs.reranking_model && { - reranking_provider_name: restConfigs.reranking_model.reranking_provider_name, - reranking_model_name: restConfigs.reranking_model.reranking_model_name, - }, retrieval_model, score_threshold_enabled, datasets, diff --git a/web/app/components/app/configuration/index.tsx b/web/app/components/app/configuration/index.tsx index d3719a7696f753..f52b224a506311 100644 --- a/web/app/components/app/configuration/index.tsx +++ b/web/app/components/app/configuration/index.tsx @@ -273,10 +273,7 @@ const Configuration: FC = () => { const retrievalConfig = getMultipleRetrievalConfig({ top_k: restConfigs.top_k, score_threshold: restConfigs.score_threshold, - reranking_model: restConfigs.reranking_model && { - provider: restConfigs.reranking_model.reranking_provider_name, - model: restConfigs.reranking_model.reranking_model_name, - }, + reranking_model: restConfigs.reranking_model, reranking_mode: restConfigs.reranking_mode, weights: restConfigs.weights, reranking_enable: restConfigs.reranking_enable, @@ -287,10 +284,6 @@ const Configuration: FC = () => { setDatasetConfigs({ ...retrievalConfig, - reranking_model: restConfigs.reranking_model && { - reranking_provider_name: restConfigs.reranking_model.reranking_provider_name, - reranking_model_name: restConfigs.reranking_model.reranking_model_name, - }, retrieval_model, score_threshold_enabled, datasets, diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx index f14ffc671ac068..760954d6cbf4fa 100644 --- a/web/app/components/datasets/settings/form/index.tsx +++ b/web/app/components/datasets/settings/form/index.tsx @@ -74,8 +74,6 @@ const Form = () => { ) const { modelList: rerankModelList, - defaultModel: rerankDefaultModel, - currentModel: isRerankDefaultModelValid, } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank) const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding) diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx index d3e20797335264..c12f7fef756f0e 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx @@ -76,12 +76,12 @@ const RetrievalConfig: FC = ({ ? undefined : (!configs.reranking_model?.reranking_provider_name ? { - provider: validRerankDefaultProvider?.provider || '', - model: validRerankDefaultModel?.model || '', + reranking_provider_name: validRerankDefaultProvider?.provider || '', + reranking_model_name: validRerankDefaultModel?.model || '', } : { - provider: configs.reranking_model?.reranking_provider_name, - model: configs.reranking_model?.reranking_model_name, + reranking_provider_name: configs.reranking_model?.reranking_provider_name, + reranking_model_name: configs.reranking_model?.reranking_model_name, }), reranking_mode: configs.reranking_mode, weights: configs.weights as any, @@ -121,10 +121,10 @@ const RetrievalConfig: FC = ({ datasetConfigs={ { retrieval_model: payload.retrieval_mode, - reranking_model: multiple_retrieval_config?.reranking_model?.provider + reranking_model: multiple_retrieval_config?.reranking_model?.reranking_model_name ? { - reranking_provider_name: multiple_retrieval_config.reranking_model?.provider, - reranking_model_name: multiple_retrieval_config.reranking_model?.model, + reranking_provider_name: multiple_retrieval_config.reranking_model?.reranking_provider_name, + reranking_model_name: multiple_retrieval_config.reranking_model?.reranking_model_name, } : { reranking_provider_name: '', diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts index 1b85bfc0b51b46..2a9d9f9aa8387c 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts @@ -9,8 +9,8 @@ export type MultipleRetrievalConfig = { top_k: number score_threshold: number | null | undefined reranking_model?: { - provider: string - model: string + reranking_provider_name: string + reranking_model_name: string } reranking_mode?: RerankingModeEnum weights?: { diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts index 6b09c611f820a3..54a9aa53565c96 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts @@ -122,7 +122,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => { // set defaults models useEffect(() => { const inputs = inputRef.current - if (inputs.retrieval_mode === RETRIEVE_TYPE.multiWay && inputs.multiple_retrieval_config?.reranking_model?.provider && currentRerankModel && rerankDefaultModel) + if (inputs.retrieval_mode === RETRIEVE_TYPE.multiWay && inputs.multiple_retrieval_config?.reranking_model?.reranking_provider_name && currentRerankModel && rerankDefaultModel) return if (inputs.retrieval_mode === RETRIEVE_TYPE.oneWay && inputs.single_retrieval_config?.model?.provider) diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts b/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts index e9c857f393b94e..3926c8206caf6b 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts @@ -124,9 +124,12 @@ export const getMultipleRetrievalConfig = ( top_k, score_threshold, reranking_mode, - reranking_model, + reranking_model: { + reranking_provider_name: reranking_model?.reranking_provider_name || '', + reranking_model_name: reranking_model?.reranking_model_name || '', + }, weights, - reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : (selectedDatasets ?? []).length > 0, + reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : shouldSetWeightDefaultValue, } const setDefaultWeights = () => { @@ -152,15 +155,19 @@ export const getMultipleRetrievalConfig = ( if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) { result.reranking_mode = RerankingModeEnum.RerankingModel - if (!result.reranking_model?.provider || !result.reranking_model?.model) { + if (!result.reranking_model?.reranking_provider_name || !result.reranking_model?.reranking_model_name) { if (rerankModelIsValid) { + result.reranking_enable = true result.reranking_model = { - provider: validRerankModel?.provider || '', - model: validRerankModel?.model || '', + reranking_provider_name: validRerankModel?.provider || '', + reranking_model_name: validRerankModel?.model || '', } } else { - result.reranking_model = undefined + result.reranking_model = { + reranking_provider_name: '', + reranking_model_name: '', + } } } } @@ -169,9 +176,10 @@ export const getMultipleRetrievalConfig = ( if (!reranking_mode) { if (validRerankModel?.provider && validRerankModel?.model) { result.reranking_mode = RerankingModeEnum.RerankingModel + result.reranking_enable = true result.reranking_model = { - provider: validRerankModel.provider, - model: validRerankModel.model, + reranking_provider_name: validRerankModel.provider, + reranking_model_name: validRerankModel.model, } } else { @@ -186,9 +194,10 @@ export const getMultipleRetrievalConfig = ( if (reranking_mode === RerankingModeEnum.WeightedScore && weights && shouldSetWeightDefaultValue) { if (rerankModelIsValid) { result.reranking_mode = RerankingModeEnum.RerankingModel + result.reranking_enable = true result.reranking_model = { - provider: validRerankModel.provider || '', - model: validRerankModel.model || '', + reranking_provider_name: validRerankModel.provider || '', + reranking_model_name: validRerankModel.model || '', } } else { @@ -199,9 +208,15 @@ export const getMultipleRetrievalConfig = ( result.reranking_mode = RerankingModeEnum.WeightedScore setDefaultWeights() } + if (reranking_mode === RerankingModeEnum.RerankingModel && rerankModelIsValid) { + result.reranking_enable = true + result.reranking_model = { + reranking_provider_name: validRerankModel.provider || '', + reranking_model_name: validRerankModel.model || '', + } + } } - console.log('🚀 ~ result:', result) return result } @@ -223,7 +238,7 @@ export const checkoutRerankModelConfigedInRetrievalSettings = ( reranking_model, } = multipleRetrievalConfig - if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.provider || !reranking_model?.model)) { + if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.reranking_provider_name || !reranking_model?.reranking_model_name)) { if ((allEconomic || allExternal) && !reranking_enable) return true From f01d0c2a1f902f6750975f4370e8e9959be0d9a5 Mon Sep 17 00:00:00 2001 From: twwu Date: Thu, 2 Jan 2025 14:57:57 +0800 Subject: [PATCH 17/19] fix: enhance document detail handling with invalidation logic for segments --- .../datasets/documents/detail/index.tsx | 16 +++++++++++++--- web/app/components/datasets/documents/index.tsx | 16 +++++++++++++++- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/web/app/components/datasets/documents/detail/index.tsx b/web/app/components/datasets/documents/detail/index.tsx index 3fdfccdaa44b28..2b65c195fb4d9f 100644 --- a/web/app/components/datasets/documents/detail/index.tsx +++ b/web/app/components/datasets/documents/detail/index.tsx @@ -22,8 +22,9 @@ import { useDatasetDetailContext } from '@/context/dataset-detail' import FloatRightContainer from '@/app/components/base/float-right-container' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge' -import { useCheckSegmentBatchImportProgress, useSegmentBatchImport } from '@/service/knowledge/use-segment' +import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment' import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document' +import { useInvalid } from '@/service/use-base' type DocumentContextValue = { datasetId?: string @@ -149,11 +150,20 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => { const embedding = ['queuing', 'indexing', 'paused'].includes((documentDetail?.display_status || '').toLowerCase()) + const invalidChunkList = useInvalid(useSegmentListKey) + const invalidChildChunkList = useInvalid(useChildSegmentListKey) + const handleOperate = (operateName?: string) => { - if (operateName === 'delete') + if (operateName === 'delete') { backToPrev() - else + } + else { detailMutate() + setTimeout(() => { + invalidChunkList() + invalidChildChunkList() + }, 5000) + } } const mode = useMemo(() => { diff --git a/web/app/components/datasets/documents/index.tsx b/web/app/components/datasets/documents/index.tsx index d5d8f329b0c6f1..c9df2f28e243fc 100644 --- a/web/app/components/datasets/documents/index.tsx +++ b/web/app/components/datasets/documents/index.tsx @@ -24,6 +24,10 @@ import { DataSourceType } from '@/models/datasets' import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed' import { useProviderContext } from '@/context/provider-context' import cn from '@/utils/classnames' +import { useInvalidDocumentDetailKey } from '@/service/knowledge/use-document' +import { useInvalid } from '@/service/use-base' +import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment' + const FolderPlusIcon = ({ className }: React.SVGProps) => { return @@ -115,10 +119,20 @@ const Documents: FC = ({ datasetId }) => { setIsMuting(false) }, [isListLoading, isMuting]) + const invalidDocumentDetail = useInvalidDocumentDetailKey() + const invalidChunkList = useInvalid(useSegmentListKey) + const invalidChildChunkList = useInvalid(useChildSegmentListKey) + const handleUpdate = useCallback(() => { setIsMuting(true) mutate() - }, [mutate]) + invalidDocumentDetail() + setTimeout(() => { + invalidChunkList() + invalidChildChunkList() + }, 5000) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) const documentsWithProgress = useMemo(() => { let completedNum = 0 From 3443fec46e43c9d7b1b79f26786ec54735a12d12 Mon Sep 17 00:00:00 2001 From: twwu Date: Thu, 2 Jan 2025 15:08:12 +0800 Subject: [PATCH 18/19] fix: remove 'fix/parent-child-retrieval' branch from workflow triggers --- .github/workflows/build-push.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 0c6ed8a7fb61dd..8e5279fb67659b 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -5,7 +5,6 @@ on: branches: - "main" - "deploy/dev" - - "fix/parent-child-retrieval" release: types: [published] From 89a8866444afce43edb5c8da33a5908371c48ede Mon Sep 17 00:00:00 2001 From: twwu Date: Thu, 2 Jan 2025 15:46:20 +0800 Subject: [PATCH 19/19] fix: update reranking model structure for improved clarity and consistency --- .../dataset-config/params-config/index.tsx | 9 +++++- .../components/app/configuration/index.tsx | 9 +++++- .../components/retrieval-config.tsx | 14 ++++----- .../nodes/knowledge-retrieval/types.ts | 4 +-- .../nodes/knowledge-retrieval/use-config.ts | 2 +- .../nodes/knowledge-retrieval/utils.ts | 29 +++++++++---------- 6 files changed, 39 insertions(+), 28 deletions(-) diff --git a/web/app/components/app/configuration/dataset-config/params-config/index.tsx b/web/app/components/app/configuration/dataset-config/params-config/index.tsx index 3e8a3e3cdd413a..acd19559433e7f 100644 --- a/web/app/components/app/configuration/dataset-config/params-config/index.tsx +++ b/web/app/components/app/configuration/dataset-config/params-config/index.tsx @@ -86,7 +86,10 @@ const ParamsConfig = ({ const retrievalConfig = getMultipleRetrievalConfig({ top_k: restConfigs.top_k, score_threshold: restConfigs.score_threshold, - reranking_model: restConfigs.reranking_model, + reranking_model: restConfigs.reranking_model && { + provider: restConfigs.reranking_model.reranking_provider_name, + model: restConfigs.reranking_model.reranking_model_name, + }, reranking_mode: restConfigs.reranking_mode, weights: restConfigs.weights, reranking_enable: restConfigs.reranking_enable, @@ -97,6 +100,10 @@ const ParamsConfig = ({ setTempDataSetConfigs({ ...retrievalConfig, + reranking_model: { + reranking_provider_name: retrievalConfig.reranking_model?.provider || '', + reranking_model_name: retrievalConfig.reranking_model?.model || '', + }, retrieval_model, score_threshold_enabled, datasets, diff --git a/web/app/components/app/configuration/index.tsx b/web/app/components/app/configuration/index.tsx index f52b224a506311..b4289a105af03a 100644 --- a/web/app/components/app/configuration/index.tsx +++ b/web/app/components/app/configuration/index.tsx @@ -273,7 +273,10 @@ const Configuration: FC = () => { const retrievalConfig = getMultipleRetrievalConfig({ top_k: restConfigs.top_k, score_threshold: restConfigs.score_threshold, - reranking_model: restConfigs.reranking_model, + reranking_model: restConfigs.reranking_model && { + provider: restConfigs.reranking_model.reranking_provider_name, + model: restConfigs.reranking_model.reranking_model_name, + }, reranking_mode: restConfigs.reranking_mode, weights: restConfigs.weights, reranking_enable: restConfigs.reranking_enable, @@ -284,6 +287,10 @@ const Configuration: FC = () => { setDatasetConfigs({ ...retrievalConfig, + reranking_model: { + reranking_provider_name: retrievalConfig?.reranking_model?.provider || '', + reranking_model_name: retrievalConfig?.reranking_model?.model || '', + }, retrieval_model, score_threshold_enabled, datasets, diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx b/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx index c12f7fef756f0e..d3e20797335264 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx +++ b/web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx @@ -76,12 +76,12 @@ const RetrievalConfig: FC = ({ ? undefined : (!configs.reranking_model?.reranking_provider_name ? { - reranking_provider_name: validRerankDefaultProvider?.provider || '', - reranking_model_name: validRerankDefaultModel?.model || '', + provider: validRerankDefaultProvider?.provider || '', + model: validRerankDefaultModel?.model || '', } : { - reranking_provider_name: configs.reranking_model?.reranking_provider_name, - reranking_model_name: configs.reranking_model?.reranking_model_name, + provider: configs.reranking_model?.reranking_provider_name, + model: configs.reranking_model?.reranking_model_name, }), reranking_mode: configs.reranking_mode, weights: configs.weights as any, @@ -121,10 +121,10 @@ const RetrievalConfig: FC = ({ datasetConfigs={ { retrieval_model: payload.retrieval_mode, - reranking_model: multiple_retrieval_config?.reranking_model?.reranking_model_name + reranking_model: multiple_retrieval_config?.reranking_model?.provider ? { - reranking_provider_name: multiple_retrieval_config.reranking_model?.reranking_provider_name, - reranking_model_name: multiple_retrieval_config.reranking_model?.reranking_model_name, + reranking_provider_name: multiple_retrieval_config.reranking_model?.provider, + reranking_model_name: multiple_retrieval_config.reranking_model?.model, } : { reranking_provider_name: '', diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts index 2a9d9f9aa8387c..1b85bfc0b51b46 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts @@ -9,8 +9,8 @@ export type MultipleRetrievalConfig = { top_k: number score_threshold: number | null | undefined reranking_model?: { - reranking_provider_name: string - reranking_model_name: string + provider: string + model: string } reranking_mode?: RerankingModeEnum weights?: { diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts index 54a9aa53565c96..6b09c611f820a3 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts @@ -122,7 +122,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => { // set defaults models useEffect(() => { const inputs = inputRef.current - if (inputs.retrieval_mode === RETRIEVE_TYPE.multiWay && inputs.multiple_retrieval_config?.reranking_model?.reranking_provider_name && currentRerankModel && rerankDefaultModel) + if (inputs.retrieval_mode === RETRIEVE_TYPE.multiWay && inputs.multiple_retrieval_config?.reranking_model?.provider && currentRerankModel && rerankDefaultModel) return if (inputs.retrieval_mode === RETRIEVE_TYPE.oneWay && inputs.single_retrieval_config?.model?.provider) diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts b/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts index 3926c8206caf6b..c7b48c1eaaba6a 100644 --- a/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts +++ b/web/app/components/workflow/nodes/knowledge-retrieval/utils.ts @@ -124,10 +124,7 @@ export const getMultipleRetrievalConfig = ( top_k, score_threshold, reranking_mode, - reranking_model: { - reranking_provider_name: reranking_model?.reranking_provider_name || '', - reranking_model_name: reranking_model?.reranking_model_name || '', - }, + reranking_model, weights, reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : shouldSetWeightDefaultValue, } @@ -155,18 +152,18 @@ export const getMultipleRetrievalConfig = ( if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) { result.reranking_mode = RerankingModeEnum.RerankingModel - if (!result.reranking_model?.reranking_provider_name || !result.reranking_model?.reranking_model_name) { + if (!result.reranking_model?.provider || !result.reranking_model?.model) { if (rerankModelIsValid) { result.reranking_enable = true result.reranking_model = { - reranking_provider_name: validRerankModel?.provider || '', - reranking_model_name: validRerankModel?.model || '', + provider: validRerankModel?.provider || '', + model: validRerankModel?.model || '', } } else { result.reranking_model = { - reranking_provider_name: '', - reranking_model_name: '', + provider: '', + model: '', } } } @@ -178,8 +175,8 @@ export const getMultipleRetrievalConfig = ( result.reranking_mode = RerankingModeEnum.RerankingModel result.reranking_enable = true result.reranking_model = { - reranking_provider_name: validRerankModel.provider, - reranking_model_name: validRerankModel.model, + provider: validRerankModel.provider, + model: validRerankModel.model, } } else { @@ -196,8 +193,8 @@ export const getMultipleRetrievalConfig = ( result.reranking_mode = RerankingModeEnum.RerankingModel result.reranking_enable = true result.reranking_model = { - reranking_provider_name: validRerankModel.provider || '', - reranking_model_name: validRerankModel.model || '', + provider: validRerankModel.provider || '', + model: validRerankModel.model || '', } } else { @@ -211,8 +208,8 @@ export const getMultipleRetrievalConfig = ( if (reranking_mode === RerankingModeEnum.RerankingModel && rerankModelIsValid) { result.reranking_enable = true result.reranking_model = { - reranking_provider_name: validRerankModel.provider || '', - reranking_model_name: validRerankModel.model || '', + provider: validRerankModel.provider || '', + model: validRerankModel.model || '', } } } @@ -238,7 +235,7 @@ export const checkoutRerankModelConfigedInRetrievalSettings = ( reranking_model, } = multipleRetrievalConfig - if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.reranking_provider_name || !reranking_model?.reranking_model_name)) { + if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.provider || !reranking_model?.model)) { if ((allEconomic || allExternal) && !reranking_enable) return true