Skip to content

Commit 31a91d5

Browse files
committed
feat: aggregate all measure columns in sql
1 parent dae947c commit 31a91d5

File tree

3 files changed

+67
-20
lines changed

3 files changed

+67
-20
lines changed

packages/vmind/src/common/dataProcess/dataQuery.ts

+7-18
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import {
99
replaceInvalidWords,
1010
swapMap,
1111
replaceBlankSpace,
12-
replaceString
12+
replaceString,
13+
sumAllMeasureFields,
14+
convertGroupByToString
1315
} from './utils';
1416
import alasql from 'alasql';
1517

@@ -25,48 +27,35 @@ export const VMIND_DATA_SOURCE = 'VMind_data_source';
2527
*/
2628
export const queryDataset = (sql: string, sourceDataset: DataItem[], fieldInfo: SimpleFieldInfo[]) => {
2729
const fieldNames = fieldInfo.map(field => field.fieldName);
28-
console.log(sql);
2930
const { validStr, sqlReplaceMap, columnReplaceMap } = replaceInvalidWords(sql, fieldNames);
30-
console.log(validStr);
31-
32-
//const { validStr: sqlWithNoASCII, replaceMap: ASCIIReplaceMap } = replaceNonASCIICharacters(sqlWithNoOperator);
33-
//console.log(sqlWithNoASCII);
34-
35-
//const replaceMap = mergeMap(ASCIIReplaceMap, operatorReplaceMap);
36-
console.log(columnReplaceMap);
37-
console.log(sqlReplaceMap);
3831

3932
//replace field names according to replaceMap
4033
const validColumnDataset = replaceDataset(sourceDataset, columnReplaceMap, true);
41-
console.log(validColumnDataset);
4234

4335
//replace field names and data values according to replaceMap
4436
const validDataset = replaceDataset(validColumnDataset, sqlReplaceMap, false);
45-
console.log(validDataset);
4637

4738
//replace blank spaces in column name
4839
const replacedFieldNames = fieldNames
4940
.map(field => replaceString(field, columnReplaceMap))
5041
.map(field => replaceString(field, sqlReplaceMap));
51-
const finalSql = replaceBlankSpace(validStr, replacedFieldNames as string[]);
52-
console.log(finalSql);
42+
const validSql = replaceBlankSpace(validStr, replacedFieldNames as string[]);
43+
44+
const finalSql = sumAllMeasureFields(validSql, fieldInfo, columnReplaceMap, sqlReplaceMap);
45+
//convertGroupByToString(finalSql, validDataset)
5346

5447
//replace VMIND_DATA_SOURCE with placeholder "?"
5548
const sqlParts = (finalSql + ' ').split(VMIND_DATA_SOURCE);
5649
const sqlCount = sqlParts.length - 1;
5750
const alasqlQuery = sqlParts.join('?');
5851
//do the query
5952
const alasqlDataset = alasql(alasqlQuery, new Array(sqlCount).fill(validDataset));
60-
console.log(alasqlDataset);
6153

6254
//restore the dataset
6355
const columnReversedMap = swapMap(columnReplaceMap);
6456
const columnRestoredDataset = replaceDataset(alasqlDataset, columnReversedMap, true);
65-
console.log(columnRestoredDataset);
6657
const sqlReversedMap = swapMap(sqlReplaceMap);
6758
const sqlRestoredDataset = replaceDataset(columnRestoredDataset, sqlReversedMap, false);
6859

69-
console.log(sqlRestoredDataset);
70-
7160
return sqlRestoredDataset;
7261
};

packages/vmind/src/common/dataProcess/utils.ts

+59-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,6 @@ export const replaceBlankSpace = (sql: string, fieldNames: string[]) => {
347347
const matchedFieldName = fieldNames.find(field => matchColumnName(column, field));
348348
return matchedFieldName ?? column;
349349
});
350-
console.log(columnsInSql, validColumnNames);
351350

352351
const finalSql = columnsInSql.reduce((prev, _cur, index) => {
353352
const originColumnName = columnsInSql[index];
@@ -360,3 +359,62 @@ export const replaceBlankSpace = (sql: string, fieldNames: string[]) => {
360359
}, sql);
361360
return finalSql;
362361
};
362+
363+
/**
364+
* sometimes skylark2 pro will return a sql statement with some measure fields not being aggregated
365+
* this will make an empty field in dataset
366+
* so we need to aggregate these fields.
367+
*
368+
*/
369+
export const sumAllMeasureFields = (
370+
sql: string,
371+
fieldInfo: SimpleFieldInfo[],
372+
columnReplaceMap: Map<string, string>,
373+
sqlReplaceMap: Map<string, string>
374+
) => {
375+
const measureFieldsInSql = fieldInfo
376+
.filter(field => field.role === ROLE.MEASURE)
377+
.map(field => {
378+
const { fieldName } = field;
379+
const replacedName1 = replaceString(fieldName, columnReplaceMap);
380+
const replacedName2 = replaceString(replacedName1, sqlReplaceMap);
381+
382+
return replacedName2;
383+
});
384+
385+
const ast: any = alasql.parse(sql);
386+
const nonAggregatedColumns: string[] = ast.statements[0].columns
387+
.filter((column: any) => !column.aggregatorid)
388+
.map((column: any) => column.columnid);
389+
const groupByColumns: string[] = ast.statements[0].group.map((column: any) => column.columnid);
390+
391+
//aggregate columns that is not in group by statement
392+
const needAggregateColumns = nonAggregatedColumns
393+
//filter all the measure fields
394+
.filter(column => measureFieldsInSql.includes(column))
395+
//filter measure fields that is not in groupby
396+
.filter(column => !groupByColumns.includes(column));
397+
398+
const patchedFields = needAggregateColumns.map(column => `SUM(\`${column}\`) as ${column}`);
399+
400+
const finalSql = needAggregateColumns.reduce((prev, cur, index) => {
401+
const regexStr = `\`?${cur}\`?`;
402+
const regex = new RegExp(regexStr, 'g');
403+
return prev.replace(regex, patchedFields[index]);
404+
}, sql);
405+
406+
return finalSql;
407+
};
408+
409+
/**
410+
* convert group by columns to string
411+
*/
412+
export const convertGroupByToString = (sql: string, dataset: DataItem[]) => {
413+
const ast: any = alasql.parse(sql);
414+
const groupByColumns: string[] = ast.statements[0].group.map((column: any) => column.columnid);
415+
dataset.forEach(item => {
416+
groupByColumns.forEach(column => {
417+
item[column] = item[column].toString();
418+
});
419+
});
420+
};

packages/vmind/src/skylark/dataProcess/query/prompts.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export const getQueryDatasetPrompt = (
1818
1. 从用户的指令中提取与数据相关的部分。忽略其他与数据无关的部分。
1919
2. 根据列的名称和类型,推断${VMIND_DATA_SOURCE}中与用户指令有关的列,并将其添加到SELECT中。尽可能多地选择相关列,不要遗漏一些关键的列,比如日期类型的维度等。你只能使用Column Information中提到的列,不要假设不存在的列。如果现有的列不能满足用户的命令,选择Column Information中最相关的列。
2020
3. 不论用户指定了哪种图表类型,将所选择的度量列使用聚合函数聚合,即使你推断它们不适合被聚合,即使用户没有要求你这样做。如果你不确定使用哪个聚合函数,使用SUM()。不要使用不支持的聚合函数。
21-
4. 使用你所选择的维度列对数据进行分组,将其添加到GROUP BY中。没有被聚合的度量列也必须被放到GROUP BY中
21+
4. 使用维度列对数据进行分组
2222
5. 在您的sql中,如有必要,您也可以使用WHERE, HAVING, ORDER BY, LIMIT。使用支持的操作符完成WHERE和HAVING。只能使用如columnA = value1,sum_b > 0的二元表达式。在您的表达式中,只能使用在维度列的domain中出现的维度值。
2323
2424
让我们一步一步思考。不要忘了将所有度量列聚合。

0 commit comments

Comments
 (0)