Skip to content

Commit 7981b61

Browse files
authored
perf: yuque dataset (#5040)
* perf: yuque dataset * doc
1 parent 450d0a5 commit 7981b61

File tree

9 files changed

+139
-53
lines changed

9 files changed

+139
-53
lines changed

docSite/content/zh-cn/docs/development/upgrading/4912.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ weight: 788
1414
3. 问题分类和内容提取,提示词中自动加入上一轮结果进行额外引导。
1515
4. 判断器支持变量引用。
1616
5. 商业版支持知识库分块时,LLM 进行自动分段识别。
17+
6. Admin 管理员数据看板。
18+
7. 豆包 1.6 系列模型,更新 qwen 模型配置。
1719

1820
## ⚙️ 优化
1921

@@ -25,6 +27,7 @@ weight: 788
2527
6. MCP 工具调用,使用 Raw schema 进行工具调用,保障完整性。
2628
7. 删除知识库文件时,如果文件不存在,不会阻断删除。
2729
8. 升级 MCP SDK,兼容最新的 HTTPStreamable。
30+
9. 语雀文档库,递归获取文档类型目录下的数据。
2831

2932
## 🐛 修复
3033

packages/service/core/ai/config/provider/Doubao.json

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,72 @@
11
{
22
"provider": "Doubao",
33
"list": [
4+
{
5+
"model": "Doubao-Seed-1.6",
6+
"name": "Doubao-Seed-1.6",
7+
"maxContext": 220000,
8+
"maxResponse": 16000,
9+
"quoteMaxToken": 220000,
10+
"maxTemperature": 1,
11+
"showTopP": true,
12+
"showStopSign": true,
13+
"vision": true,
14+
"toolChoice": true,
15+
"functionCall": false,
16+
"defaultSystemChatPrompt": "",
17+
"datasetProcess": true,
18+
"usedInClassify": true,
19+
"usedInExtractFields": true,
20+
"usedInQueryExtension": true,
21+
"usedInToolCall": true,
22+
"defaultConfig": {},
23+
"fieldMap": {},
24+
"type": "llm"
25+
},
26+
{
27+
"model": "Doubao-Seed-1.6-thinking",
28+
"name": "Doubao-Seed-1.6-thinking",
29+
"maxContext": 220000,
30+
"maxResponse": 16000,
31+
"quoteMaxToken": 220000,
32+
"maxTemperature": 1,
33+
"showTopP": true,
34+
"showStopSign": true,
35+
"vision": true,
36+
"toolChoice": true,
37+
"functionCall": false,
38+
"defaultSystemChatPrompt": "",
39+
"datasetProcess": true,
40+
"usedInClassify": true,
41+
"usedInExtractFields": true,
42+
"usedInQueryExtension": true,
43+
"usedInToolCall": true,
44+
"defaultConfig": {},
45+
"fieldMap": {},
46+
"type": "llm"
47+
},
48+
{
49+
"model": "Doubao-Seed-1.6-flash",
50+
"name": "Doubao-Seed-1.6-flash",
51+
"maxContext": 220000,
52+
"maxResponse": 16000,
53+
"quoteMaxToken": 220000,
54+
"maxTemperature": 1,
55+
"showTopP": true,
56+
"showStopSign": true,
57+
"vision": true,
58+
"toolChoice": true,
59+
"functionCall": false,
60+
"defaultSystemChatPrompt": "",
61+
"datasetProcess": true,
62+
"usedInClassify": true,
63+
"usedInExtractFields": true,
64+
"usedInQueryExtension": true,
65+
"usedInToolCall": true,
66+
"defaultConfig": {},
67+
"fieldMap": {},
68+
"type": "llm"
69+
},
470
{
571
"model": "Doubao-1.5-lite-32k",
672
"name": "Doubao-1.5-lite-32k",

packages/service/core/ai/config/provider/Qwen.json

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
{
55
"model": "qwen-max",
66
"name": "Qwen-max",
7-
"maxContext": 32000,
8-
"maxResponse": 4000,
9-
"quoteMaxToken": 6000,
7+
"maxContext": 128000,
8+
"maxResponse": 8000,
9+
"quoteMaxToken": 120000,
1010
"maxTemperature": 1,
1111
"vision": false,
1212
"toolChoice": true,
@@ -27,10 +27,10 @@
2727
{
2828
"model": "qwen-vl-max",
2929
"name": "qwen-vl-max",
30-
"maxContext": 32000,
31-
"maxResponse": 2000,
32-
"quoteMaxToken": 20000,
33-
"maxTemperature": 1.2,
30+
"maxContext": 128000,
31+
"maxResponse": 8000,
32+
"quoteMaxToken": 120000,
33+
"maxTemperature": 1,
3434
"vision": true,
3535
"toolChoice": false,
3636
"functionCall": false,
@@ -49,9 +49,9 @@
4949
{
5050
"model": "qwen-plus",
5151
"name": "Qwen-plus",
52-
"maxContext": 64000,
52+
"maxContext": 128000,
5353
"maxResponse": 8000,
54-
"quoteMaxToken": 60000,
54+
"quoteMaxToken": 120000,
5555
"maxTemperature": 1,
5656
"vision": false,
5757
"toolChoice": true,
@@ -72,10 +72,10 @@
7272
{
7373
"model": "qwen-vl-plus",
7474
"name": "qwen-vl-plus",
75-
"maxContext": 32000,
76-
"maxResponse": 2000,
77-
"quoteMaxToken": 20000,
78-
"maxTemperature": 1.2,
75+
"maxContext": 128000,
76+
"maxResponse": 8000,
77+
"quoteMaxToken": 120000,
78+
"maxTemperature": 1,
7979
"vision": true,
8080
"toolChoice": false,
8181
"functionCall": false,
@@ -92,9 +92,9 @@
9292
{
9393
"model": "qwen-turbo",
9494
"name": "Qwen-turbo",
95-
"maxContext": 128000,
95+
"maxContext": 1000000,
9696
"maxResponse": 8000,
97-
"quoteMaxToken": 100000,
97+
"quoteMaxToken": 1000000,
9898
"maxTemperature": 1,
9999
"vision": false,
100100
"toolChoice": true,
@@ -487,9 +487,9 @@
487487
{
488488
"model": "qwen-long",
489489
"name": "qwen-long",
490-
"maxContext": 100000,
490+
"maxContext": 10000000,
491491
"maxResponse": 6000,
492-
"quoteMaxToken": 10000,
492+
"quoteMaxToken": 10000000,
493493
"maxTemperature": 1,
494494
"vision": false,
495495
"toolChoice": false,

packages/service/core/dataset/apiDataset/custom/api.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
106106

107107
const formattedFiles = files.map((file) => ({
108108
...file,
109-
hasChild: file.type === 'folder'
109+
hasChild: file.hasChild ?? file.type === 'folder'
110110
}));
111111

112112
return formattedFiles;

packages/service/core/dataset/apiDataset/yuqueDataset/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
198198
}: {
199199
apiFileId: string;
200200
}): Promise<ApiFileReadContentResponse> => {
201+
if (typeof apiFileId !== 'string') return Promise.reject('Invalid file id');
201202
const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/);
202203

203204
const data = await request<{ title: string; body: string }>(

packages/service/core/dataset/read.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ export const readApiServerFileContent = async ({
167167
};
168168

169169
export const rawText2Chunks = async ({
170-
rawText,
170+
rawText = '',
171171
chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
172172
chunkTriggerMinSize = 1000,
173173
backupParse,

projects/app/src/pageComponents/account/model/ModelDashboard/DataTableComponent.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ const DataTableComponent = ({
140140
model: item.model,
141141
totalCalls: item.totalCalls,
142142
errorCalls: item.errorCalls,
143-
totalCost: item.totalCost,
143+
totalCost: Math.floor(item.totalCost),
144144
avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0,
145145
avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0
146146
});
@@ -201,7 +201,7 @@ const DataTableComponent = ({
201201
model: modelName,
202202
totalCalls: item.totalCalls,
203203
errorCalls: item.errorCalls,
204-
totalCost: item.totalCost,
204+
totalCost: Math.floor(item.totalCost),
205205
avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0,
206206
avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0
207207
});

projects/app/src/pageComponents/account/model/ModelDashboard/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ const ModelDashboard = ({ Tab }: { Tab: React.ReactNode }) => {
349349
inputTokens,
350350
outputTokens,
351351
totalTokens,
352-
totalCost,
352+
totalCost: Math.floor(totalCost),
353353
avgResponseTime: Math.round(avgResponseTime * 100) / 100,
354354
avgTtfb: Math.round(avgTtfb * 100) / 100,
355355
maxRpm,

projects/app/src/pageComponents/dataset/detail/Import/diffSource/APIDataset.tsx

Lines changed: 47 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,10 @@ const CustomAPIFileInput = () => {
7070
}
7171
);
7272

73-
const { data: existIdList = [] } = useRequest2(
74-
() => getApiDatasetFileListExistId({ datasetId: datasetDetail._id }),
73+
const { data: existIdList = new Set() } = useRequest2(
74+
async () => {
75+
return new Set<string>(await getApiDatasetFileListExistId({ datasetId: datasetDetail._id }));
76+
},
7577
{
7678
manual: false
7779
}
@@ -89,35 +91,41 @@ const CustomAPIFileInput = () => {
8991
const allFiles: APIFileItem[] = [];
9092

9193
for (const file of files) {
92-
if (file.type === 'folder') {
94+
if (sources.some((item) => item.apiFileId === file.id)) {
95+
allFiles.push(file);
96+
continue;
97+
}
98+
99+
if (file.hasChild) {
93100
const folderFiles = await getApiDatasetFileList({
94101
datasetId: datasetDetail._id,
95102
parentId: file?.id
96103
});
97104

98105
const subFiles = await getFilesRecursively(folderFiles);
99106
allFiles.push(...subFiles);
100-
} else {
101-
allFiles.push(file);
102107
}
108+
allFiles.push(file);
103109
}
104110

105111
return allFiles;
106112
};
107113

108114
const allFiles = await getFilesRecursively(selectFiles);
115+
const uniqueFiles = allFiles.filter(
116+
(item, index, array) =>
117+
!existIdList.has(item.id) && array.findIndex((file) => file.id === item.id) === index
118+
);
109119

110120
setSources(
111-
allFiles
112-
.filter((item) => !existIdList.includes(item.id))
113-
.map((item) => ({
114-
id: item.id,
115-
apiFileId: item.id,
116-
apiFile: item,
117-
createStatus: 'waiting',
118-
sourceName: item.name,
119-
icon: getSourceNameIcon({ sourceName: item.name }) as any
120-
}))
121+
uniqueFiles.map((item) => ({
122+
id: item.id,
123+
apiFileId: item.id,
124+
apiFile: item,
125+
createStatus: 'waiting',
126+
sourceName: item.name,
127+
icon: getSourceNameIcon({ sourceName: item.name }) as any
128+
}))
121129
);
122130
},
123131
{
@@ -147,15 +155,24 @@ const CustomAPIFileInput = () => {
147155
[selectFiles]
148156
);
149157

150-
const handleSelectAll = useCallback(() => {
151-
const isAllSelected = fileList.length === selectFiles.length;
158+
const isAllSelected = useMemo(() => {
159+
return fileList.every(
160+
(item) => existIdList.has(item.id) || selectFiles.some((file) => file.id === item.id)
161+
);
162+
}, [fileList, selectFiles, existIdList]);
152163

164+
const handleSelectAll = useCallback(() => {
153165
if (isAllSelected) {
154-
setSelectFiles([]);
166+
setSelectFiles((state) =>
167+
state.filter((file) => !fileList.find((item) => item.id === file.id))
168+
);
155169
} else {
156-
setSelectFiles(fileList);
170+
setSelectFiles((state) => [
171+
...state.filter((file) => !fileList.find((item) => item.id === file.id)),
172+
...fileList.filter((item) => !existIdList.has(item.id))
173+
]);
157174
}
158-
}, [fileList, selectFiles]);
175+
}, [isAllSelected, fileList, existIdList]);
159176

160177
return (
161178
<MyBox isLoading={loading} position="relative" h="full">
@@ -193,23 +210,22 @@ const CustomAPIFileInput = () => {
193210
fontSize={'sm'}
194211
fontWeight={'medium'}
195212
color={'myGray.900'}
196-
onClick={(e) => {
197-
if (!(e.target as HTMLElement).closest('.checkbox')) {
198-
handleSelectAll();
199-
}
200-
}}
213+
// onClick={(e) => {
214+
// if (!(e.target as HTMLElement).closest('.checkbox')) {
215+
// handleSelectAll();
216+
// }
217+
// }}
201218
>
202219
<Checkbox
203220
className="checkbox"
204221
mr={2}
205-
isChecked={fileList.length === selectFiles.length}
222+
isChecked={isAllSelected}
206223
onChange={handleSelectAll}
207224
/>
208225
{t('common:Select_all')}
209226
</Flex>
210227
{fileList.map((item) => {
211-
const isFolder = item.type === 'folder';
212-
const isExists = existIdList.includes(item.id);
228+
const isExists = existIdList.has(item.id);
213229
const isChecked = isExists || selectFiles.some((file) => file.id === item.id);
214230

215231
return (
@@ -243,9 +259,9 @@ const CustomAPIFileInput = () => {
243259
/>
244260
<MyIcon
245261
name={
246-
!isFolder
247-
? (getSourceNameIcon({ sourceName: item.name }) as any)
248-
: 'common/folderFill'
262+
item.type === 'folder'
263+
? 'common/folderFill'
264+
: (getSourceNameIcon({ sourceName: item.name }) as any)
249265
}
250266
w={'18px'}
251267
mr={1.5}

0 commit comments

Comments
 (0)