diff --git a/depot.json b/depot.json new file mode 100644 index 0000000000..75167fca4e --- /dev/null +++ b/depot.json @@ -0,0 +1 @@ +{"id":"du7O4b0e8P"} diff --git a/packages/backend/src/app/chatbot/embedings/embedings-settings.ts b/packages/backend/src/app/chatbot/embedings/embedings-settings.ts new file mode 100644 index 0000000000..56b592b84a --- /dev/null +++ b/packages/backend/src/app/chatbot/embedings/embedings-settings.ts @@ -0,0 +1,2 @@ + +export const EMBEDINGS_SEARCH_RESULT = 3; diff --git a/packages/backend/src/app/chatbot/embedings/fais-embeddings.ts b/packages/backend/src/app/chatbot/embedings/fais-embeddings.ts index a88117f201..ec00d90f19 100644 --- a/packages/backend/src/app/chatbot/embedings/fais-embeddings.ts +++ b/packages/backend/src/app/chatbot/embedings/fais-embeddings.ts @@ -5,6 +5,7 @@ import path from 'path' import os from 'os' import fs from 'fs/promises' import { localFileStore } from '../../helper/store' +import { EMBEDINGS_SEARCH_RESULT } from './embedings-settings' const storeCache: Record = {} @@ -42,7 +43,7 @@ export const faissEmbedding = ({ openAIApiKey, botId }: { botId: string, openAIA if (store.docstore._docs.size === 0) { return [] } - const similarDocuments = await store.similaritySearch(input, 5, botId) + const similarDocuments = await store.similaritySearch(input, EMBEDINGS_SEARCH_RESULT, botId) return similarDocuments.map((doc) => doc.pageContent) }, async addDocuments({ diff --git a/packages/backend/src/app/chatbot/framework/datasource.ts b/packages/backend/src/app/chatbot/framework/datasource.ts index 78c17bfd73..dbf3b11f16 100644 --- a/packages/backend/src/app/chatbot/framework/datasource.ts +++ b/packages/backend/src/app/chatbot/framework/datasource.ts @@ -8,7 +8,9 @@ export const datasources = { }): Promise { const blob = new Blob([buffer]) const pdfLoader = new PDFLoader(blob, { splitPages: true }) - const splitter = new RecursiveCharacterTextSplitter({}) + const splitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1300, + }) const documents = await pdfLoader.loadAndSplit(splitter) return documents },