Skip to content

Commit 81398df

Browse files
authored
Merge pull request #48 from VisActor/feat-dataAggregation-skylark
Feat data aggregation skylark
2 parents a8d510d + b21450f commit 81398df

File tree

14 files changed

+678
-391
lines changed

14 files changed

+678
-391
lines changed

packages/chart-advisor/src/index.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ import { scorer as defaultScorer } from './score';
1212
import * as dataUtils from './dataUtil';
1313
import { isNil, isNaN } from 'lodash';
1414

15+
export { fold } from './fieldUtils';
16+
export { FOLD_NAME, FOLD_VALUE } from './constant';
17+
1518
export function chartAdvisor(params: AdviserParams): AdviseResult {
1619
const {
1720
originDataset,
@@ -66,7 +69,6 @@ export function chartAdvisor(params: AdviserParams): AdviseResult {
6669
};
6770
dimensionSet.uniqueID = uniqueID;
6871
originDataset.forEach(row => {
69-
//后端版本可直接获取到未平坦化的dataSource,不用做此判断
7072
dimensionSet.data.push(row[uniqueID]);
7173
});
7274
dimensionSet.dataType = dimension.type;

packages/vmind/src/common/dataProcess/utils.ts

+14-8
Original file line numberDiff line numberDiff line change
@@ -383,17 +383,23 @@ export const sumAllMeasureFields = (
383383
});
384384

385385
const ast: any = alasql.parse(sql);
386-
const nonAggregatedColumns: string[] = ast.statements[0].columns
386+
const selectedColumns = ast.statements[0].columns;
387+
const nonAggregatedColumns: string[] = selectedColumns
387388
.filter((column: any) => !column.aggregatorid)
388389
.map((column: any) => column.columnid);
389-
const groupByColumns: string[] = ast.statements[0].group.map((column: any) => column.columnid);
390390

391-
//aggregate columns that is not in group by statement
392-
const needAggregateColumns = nonAggregatedColumns
393-
//filter all the measure fields
394-
.filter(column => measureFieldsInSql.includes(column))
395-
//filter measure fields that is not in groupby
396-
.filter(column => !groupByColumns.includes(column));
391+
const groupByColumns: string[] = (ast.statements[0].group ?? []).map((column: any) => column.columnid);
392+
393+
//if there exist some aggregated columns in sql and there exist GROUP BY statement in sql, then aggregate all the measure columns
394+
let needAggregateColumns: string[] = [];
395+
if (groupByColumns.length > 0 && nonAggregatedColumns.length !== selectedColumns.length) {
396+
//aggregate columns that is not in group by statement
397+
needAggregateColumns = nonAggregatedColumns
398+
//filter all the measure fields
399+
.filter(column => measureFieldsInSql.includes(column))
400+
//filter measure fields that is not in groupby
401+
.filter(column => !groupByColumns.includes(column));
402+
}
397403

398404
const patchedFields = needAggregateColumns.map(column => `SUM(\`${column}\`) as ${column}`);
399405

packages/vmind/src/common/utils.ts

+10
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,13 @@ export const calculateTokenUsage = (usageList: any[]) => {
1111
});
1212
return totalUsage;
1313
};
14+
15+
export const execPipeline = <PipelineContext>(
16+
src: any,
17+
pipes: ((src: any, context: PipelineContext) => any)[],
18+
context: PipelineContext
19+
) =>
20+
pipes.reduce((pre: any, pipe: (src: any, context: PipelineContext) => any) => {
21+
const result = pipe(pre, context);
22+
return result;
23+
}, src);

packages/vmind/src/common/vizDataToSpec/utils.ts

+22
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import { Cell, DataItem, DataType, ROLE, SimpleFieldInfo } from 'src/typings';
12
import { VIDEO_LENGTH_BY_CHART_TYPE, DEFAULT_VIDEO_LENGTH } from './constants';
3+
import { FOLD_NAME, FOLD_VALUE, fold } from '@visactor/chart-advisor';
24

35
export const detectAxesType = (values: any[], field: string) => {
46
const isNumber = values.every(d => !d[field] || !isNaN(Number(d[field])));
@@ -43,3 +45,23 @@ export const estimateVideoTime = (chartType: string, spec: any, parsedTime?: num
4345
frameArr: []
4446
};
4547
};
48+
49+
export const getRemainedFields = (cell: Cell, fieldInfo: SimpleFieldInfo[]) => {
50+
const usedFields = Object.values(cell).flat();
51+
const remainedFields = fieldInfo.filter(f => !usedFields.includes(f.fieldName));
52+
return remainedFields;
53+
};
54+
55+
export const getFieldByRole = (fields: SimpleFieldInfo[], role: ROLE) => {
56+
return fields.find(f => f.role === role);
57+
};
58+
59+
export const getFieldByDataType = (fields: SimpleFieldInfo[], dataTypeList: DataType[]) => {
60+
return fields.find(f => dataTypeList.includes(f.type));
61+
};
62+
63+
export const foldDatasetByYField = (dataset: DataItem[], yFieldList: string[], fieldInfo: SimpleFieldInfo[]) => {
64+
const aliasMap = Object.fromEntries(fieldInfo.map(d => [d.fieldName, d.fieldName]));
65+
66+
return fold(dataset as any, yFieldList, FOLD_NAME, FOLD_VALUE, aliasMap, false);
67+
};

packages/vmind/src/common/vizDataToSpec/vizDataToSpec.ts

+4-9
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ import {
4545
boxPlotField,
4646
boxPlotStyle
4747
} from './pipes';
48-
import { Cell, ChartType, Context, FieldInfo, Pipe, SimpleFieldInfo } from '../../typings';
49-
import { CARTESIAN_CHART_LIST, detectAxesType } from './utils';
48+
import { Cell, ChartType, Context, SimpleFieldInfo } from '../../typings';
5049
import { isArray } from 'lodash';
50+
import { execPipeline } from '../utils';
51+
5152
export const vizDataToSpec = (
5253
dataset: any[],
5354
chartType: ChartType,
@@ -56,7 +57,7 @@ export const vizDataToSpec = (
5657
totalTime?: number
5758
) => {
5859
const pipelines = pipelineMap[chartType];
59-
const spec = execPipeline({}, pipelines, {
60+
const spec = execPipeline<Context>({}, pipelines, {
6061
chartType,
6162
dataset,
6263
cell,
@@ -160,9 +161,3 @@ export const pipelineMap: { [chartType: string]: any } = {
160161
'WATERFALL CHART': pipelineWaterfall,
161162
'BOX PLOT': pipelineBoxPlot
162163
};
163-
164-
export const execPipeline = (src: any, pipes: Pipe[], context: Context) =>
165-
pipes.reduce((pre: any, pipe: Pipe) => {
166-
const result = pipe(pre, context);
167-
return result;
168-
}, src);

packages/vmind/src/gpt/chart-generation/NLToChart.ts

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@ import { SUPPORTED_CHART_LIST } from '../../common/vizDataToSpec/constants';
22
import { DataItem, GPTChartAdvisorResult, ILLMOptions, LOCATION, SimpleFieldInfo, VizSchema } from '../../typings';
33
import { checkChartTypeAndCell, vizDataToSpec } from '../../common/vizDataToSpec';
44
import { parseGPTResponse, requestGPT } from '../utils';
5-
import { patchChartTypeAndCell, patchUserInput } from './utils';
5+
import { patchUserInput } from './utils';
66
import { ChartAdvisorPromptEnglish } from './prompts';
77
import { chartAdvisorHandler } from '../../common/chartAdvisor';
88
import { estimateVideoTime } from '../../common/vizDataToSpec/utils';
99
import { getSchemaFromFieldInfo } from '../../common/schema';
1010
import { queryDatasetWithGPT } from '../dataProcess/query/queryDataset';
1111
import { calculateTokenUsage } from '../..//common/utils';
1212
import { pick } from 'lodash';
13+
import { patchChartTypeAndCell } from './patch';
1314

1415
export const generateChartWithGPT = async (
1516
userPrompt: string, //user's intent of visualization, usually aspect in data that they want to visualize
@@ -53,10 +54,11 @@ export const generateChartWithGPT = async (
5354
const chartTypeRes = resJson['CHART_TYPE'].toUpperCase();
5455
const cellRes = resJson['FIELD_MAP'];
5556
advisorUsage = resJson['usage'];
56-
const patchResult = patchChartTypeAndCell(chartTypeRes, cellRes, dataset);
57+
const patchResult = patchChartTypeAndCell(chartTypeRes, cellRes, dataset, fieldInfo);
5758
if (checkChartTypeAndCell(patchResult.chartTypeNew, patchResult.cellNew, fieldInfo)) {
5859
chartType = patchResult.chartTypeNew;
5960
cell = patchResult.cellNew;
61+
dataset = patchResult.datasetNew;
6062
}
6163
} catch (err) {
6264
console.warn(err);

0 commit comments

Comments
 (0)