Skip to content

Commit 31ccae4

Browse files
authored
Merge pull request #30 from VisActor/dev/1.2.4
Dev/1.2.4
2 parents 25dc339 + 887d22a commit 31ccae4

File tree

13 files changed

+194
-121
lines changed

13 files changed

+194
-121
lines changed

packages/vmind/__tests__/browser/src/constants/mockData.ts

+3-4
Original file line numberDiff line numberDiff line change
@@ -3625,15 +3625,14 @@ Xiaomi,0.1,4087,131345
36253625
* 雷达图
36263626
*/
36273627
export const mockUserInput14 = {
3628-
csv: `key,value
3628+
csv: `dimension,performance
36293629
Strength,5
36303630
Speed,5
36313631
Shooting,3
36323632
Endurance,5
36333633
Precision,5
3634-
Growth,5
3635-
`,
3636-
input: '帮我展示个人在不同方面的绩效,他是否是六边形战士'
3634+
Growth,5`,
3635+
input: '帮我展示这个人在不同方面的绩效,他是否是六边形战士'
36373636
};
36383637

36393638
/**

packages/vmind/bundler.config.js

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
const json = require('@rollup/plugin-json');
55

66
module.exports = {
7-
formats: ['cjs', 'es', 'umd'],
7+
formats: ['cjs', 'es'],
88
outputDir: {
99
es: 'esm',
1010
cjs: 'cjs',
11-
umd: 'build'
1211
},
1312
name: 'VMind',
1413
umdOutputFilename: 'index',

packages/vmind/src/chart-to-video/index.ts

+26-20
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ export async function _chatToVideoWasm(
77
propsSpec: any,
88
propsTime: TimeType,
99
outName = 'out',
10-
outerPackages: OuterPackages
10+
outerPackages: OuterPackages,
11+
mode?: 'node' | 'desktop-browser'
1112
) {
1213
const { ManualTicker, defaultTimeline, VChart, fetchFile, FFmpeg, createCanvas } = outerPackages;
1314

@@ -31,7 +32,7 @@ export async function _chatToVideoWasm(
3132
const canvas = createCanvas(width, height);
3233
const vchart = new VChart(spec, {
3334
renderCanvas: canvas,
34-
mode: 'desktop-browser',
35+
mode: 'node',
3536
dpr: 1,
3637
disableDirtyBounds: true,
3738
ticker: defaultTicker,
@@ -74,28 +75,33 @@ export async function _chatToVideoWasm(
7475
vchart.getStage().render();
7576
const num = `0000${i}`.slice(-3);
7677

77-
const size = { width: canvas.width, height: canvas.height };
78-
const blob = await new Promise((resolve, reject) => {
79-
canvas.toBlob((blob: any) => {
80-
if (blob) {
81-
const info = {
82-
data: blob,
83-
format: 'PNG',
84-
size
85-
};
86-
console.log(`BBB--------${info}`);
87-
resolve(info);
88-
} else {
89-
console.log('no blob');
90-
reject('no blob');
91-
}
92-
}, `image/png`);
93-
});
78+
if (mode === 'node') {
79+
const buffer = (canvas as any).toBuffer();
80+
FFmpeg.FS('writeFile', `vchart${idx}.${num}.png`, buffer);
81+
} else {
82+
const size = { width: canvas.width, height: canvas.height };
83+
const blob = await new Promise((resolve, reject) => {
84+
canvas.toBlob((blob: any) => {
85+
if (blob) {
86+
const info = {
87+
data: blob,
88+
format: 'PNG',
89+
size
90+
};
91+
console.log(`BBB--------${info}`);
92+
resolve(info);
93+
} else {
94+
console.log('no blob');
95+
reject('no blob');
96+
}
97+
}, `image/png`);
98+
});
99+
FFmpeg.FS('writeFile', `vchart${idx}.${num}.png`, await fetchFile((blob as any).data));
100+
}
94101

95102
// defaultTicker.mode = 'raf'
96103
// const imageData = ctx.getImageData(0, 0, ctx.canvas.width, ctx.canvas.height);
97104
// console.log(new Uint8Array(imageData.data.buffer))
98-
FFmpeg.FS('writeFile', `vchart${idx}.${num}.png`, await fetchFile((blob as any).data));
99105
}
100106

101107
vchart.release();

packages/vmind/src/common/dataProcess/index.ts

-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ export const parseCSVWithVChart = (csvString: string) => {
1515
return dataView;
1616
};
1717

18-
export const getDataView = (dataset: DataItem[]) => {};
19-
2018
export const getDataset = (csvString: string): { dataset: DataItem[]; columns: string[] } => {
2119
//get dataset from csv string
2220
const dataView = parseCSVWithVChart(csvString);

packages/vmind/src/common/dataProcess/utils.ts

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { sampleSize, isNumber, isInteger } from 'lodash';
22
import { DataItem, DataType, ROLE, SimpleFieldInfo } from '../../typings';
33
import dayjs from 'dayjs';
4+
import { uniqArray } from '@visactor/vutils';
45
export const readTopNLine = (csvFile: string, n: number) => {
56
// get top n lines of a csv file
67
let res = '';
@@ -29,13 +30,18 @@ function validateDate(date: any) {
2930
return dayjs(date, 'YYYY-MM-DD').isValid() || dayjs(date, 'MM-DD').isValid();
3031
}
3132

33+
export function removeEmptyLines(str: string) {
34+
return str.replace(/\n\s*\n/g, '\n');
35+
}
36+
3237
export const detectFieldType = (dataset: DataItem[], column: string): SimpleFieldInfo => {
3338
let fieldType: DataType | undefined = undefined;
3439
//detect field type based on rules
3540
//The data types have the following inclusion relationships:
3641
//date=>string
3742
//int=>float=>string
3843
//detect field type from strict to loose
44+
3945
dataset.every(data => {
4046
const value = data[column];
4147
const numberValue = Number(value);
@@ -88,10 +94,19 @@ export const detectFieldType = (dataset: DataItem[], column: string): SimpleFiel
8894
return true;
8995
}
9096
});
97+
const role = [DataType.STRING, DataType.DATE].includes(fieldType) ? ROLE.DIMENSION : ROLE.MEASURE;
98+
99+
//calculate domain of the column
100+
const domain: (string | number)[] = dataset.map(d => (role === ROLE.DIMENSION ? d[column] : Number(d[column])));
101+
91102
return {
92103
fieldName: column,
93104
type: fieldType,
94-
role: [DataType.STRING, DataType.DATE].includes(fieldType) ? ROLE.DIMENSION : ROLE.MEASURE
105+
role,
106+
domain:
107+
role === ROLE.DIMENSION
108+
? (uniqArray(domain) as string[]).slice(0, 20)
109+
: [Math.min(...(domain as number[])), Math.max(...(domain as number[]))]
95110
};
96111
};
97112
export const getFieldInfoFromDataset = (dataset: DataItem[], columns: string[]): SimpleFieldInfo[] => {

packages/vmind/src/core/VMind.ts

+4-4
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,18 @@ class VMind {
9696
return { fieldInfo: [], dataset };
9797
}
9898

99-
async exportVideo(spec: any, time: TimeType, outerPackages: OuterPackages) {
99+
async exportVideo(spec: any, time: TimeType, outerPackages: OuterPackages, mode?: 'node' | 'desktop-browser') {
100100
const { VChart, FFmpeg, fetchFile, ManualTicker } = outerPackages;
101101
const outName = `out`;
102-
await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages);
102+
await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages, mode);
103103
const data = FFmpeg.FS('readFile', `${outName}.mp4`);
104104
return data.buffer;
105105
}
106106

107-
async exportGIF(spec: any, time: TimeType, outerPackages: OuterPackages) {
107+
async exportGIF(spec: any, time: TimeType, outerPackages: OuterPackages, mode?: 'node' | 'desktop-browser') {
108108
const { VChart, FFmpeg, fetchFile } = outerPackages;
109109
const outName = `out`;
110-
await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages);
110+
await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages, mode);
111111
// 调色板
112112
await FFmpeg.run('-i', `${outName}.mp4`, '-filter_complex', '[0:v] palettegen', 'palette.png');
113113
await FFmpeg.run(

packages/vmind/src/gpt/chart-generation/utils.ts

+29-5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ export const patchChartTypeAndCell = (chartTypeOutter: string, cell: any, datase
3939
const { x, y } = cell;
4040

4141
let chartType = chartTypeOutter;
42+
43+
// patch the "axis" field to x
44+
if (cell.axis && (!cell.x || !cell.y)) {
45+
if (!cell.x) {
46+
cell.x = cell.axis;
47+
} else if (!cell.y) {
48+
cell.y = cell.axis;
49+
}
50+
}
51+
4252
// y轴字段有多个时,处理方式:
4353
// 1. 图表类型为: 箱型图, 图表类型不做矫正
4454
// 2. 图表类型为: 柱状图 或 折线图, 图表类型矫正为双轴图
@@ -77,13 +87,20 @@ export const patchChartTypeAndCell = (chartTypeOutter: string, cell: any, datase
7787
const {
7888
lower_whisker,
7989
lowerWhisker,
80-
lowerBox,
8190
min,
8291
lower,
92+
lowerBox,
93+
lower_box,
8394
q1,
95+
lower_quartile,
96+
lowerQuartile,
97+
midline,
8498
median,
8599
q3,
86100
upperBox,
101+
upper_box,
102+
upper_quartile,
103+
upperQuartile,
87104
upper_whisker,
88105
upperWhisker,
89106
max,
@@ -98,15 +115,22 @@ export const patchChartTypeAndCell = (chartTypeOutter: string, cell: any, datase
98115
lowerWhisker,
99116
min,
100117
lower,
101-
q1,
102118
lowerBox,
119+
lower_box,
120+
q1,
121+
lower_quartile,
122+
lowerQuartile,
123+
midline,
103124
median,
104-
upperBox,
105125
q3,
126+
upperBox,
127+
upper_box,
128+
upper_quartile,
129+
upperQuartile,
106130
upper_whisker,
131+
upperWhisker,
107132
max,
108-
upper,
109-
upperWhisker
133+
upper
110134
].filter(Boolean)
111135
}
112136
};

packages/vmind/src/gpt/dataProcess/prompts.ts

+32-56
Original file line numberDiff line numberDiff line change
@@ -195,78 +195,51 @@ Response:
195195

196196
export const getQueryDatasetPrompt = (
197197
showThoughts: boolean
198-
) => `You are an expert in data analysis. Here is a raw dataset named dataSource. User will tell you his command and column information of DataSource. You need to generate a standard SQL query to select useful fields from dataSource according to the template following the Steps and Description. Return the JSON object only.
199-
# Note
200-
1. You are running on a simple SQL engine, so the advanced features, such as RANK() OVER, TOP, JOIN, UNION, etc., are not supported. Please follow the SQL template and Description strictly.
201-
2. Don't guess the specific data content in your SQL. Don't use conditional statement.
202-
3. If you think the fields in dataSource cannot meet user requirements, do not further generate new fields. Just ignore user's command and use these fields.
198+
) => `You are an expert in data analysis. Here is a raw dataset named dataSource. User will tell you his command and column information of dataSource. Your task is to generate SimQuery and fieldInfo according to SimQuery Instruction. Response one JSON object only.
203199
200+
# SimQuery Instruction
201+
- SimQuery is a simplified SQL-like language. Supported keywords in SimQuery: ["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT"].
202+
- A SimQuery query looks like this: "SELECT columnA, SUM(columnB) as sum_b FROM dataSource WHERE columnA = value1 GROUP BY columnA HAVING sum_b>0 ORDER BY sum_b LIMIT 10".
203+
- Columns in SELECT can only be original columns or aggregated columns. Supported aggregation methods in SimQuery: ["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"].
204+
- The "WHERE" and "HAVING" in SimQuery can only use original columns or aggregated columns in dataSource. Supported Operators in SimQuery:[ ">", ">=", "<", "<=", "=", "!=", "in", "not in", "is null", "is not null", "between", "not between", "like", "not like"]. Don't use non-existent columns.
205+
- Don't use unsupported keywords such as CASE WHEN...ELSE...END or PERCENTILE_CONT. Don't use unsupported aggregation methods on columns. Don't use unsupported operators. Unsupported keywords, methods and operators will cause system crash. If current keywords and methods can't meet your needs, just simple select the column without any process.
206+
- Make your SimQuery as simple as possible.
204207
205-
# SQL template:
206-
SELECT xxx FROM xxx (WHERE xxx) GROUP BY xxx (HAVING xxx) (ORDER BY xxx) (LIMIT xxx).
207-
208+
You need to follow the steps below.
208209
209210
# Steps
210-
1. Just use user's command to select useful fields directly. Ignore other parts of user's command.
211-
2. Select useful dimension fields from dataSource. Use the original dimension field without any process.
212-
3. Aggregate the measure fields. Supported aggregation function: MAX(), MIN(), SUM(), COUNT(), AVG(). Note: don't aggregate measures using functions that are not supported such as PERCENTILE_CONT(). Don't use conditional statement.
213-
4. Group the data using dimension fields and fill it in GROUP BY.
214-
5. You can also use WHERE, HAVING, ORDER BY, LIMIT in your SQL if necessary.
215-
216-
217-
# Description
218-
1. The part in brackets is optional. xxx in the SQL template can only be original columns or aggregated columns. Select Data only from one table. Don't use unsupported features such as RANK(), TOP, UNION, etc.
219-
2. Make your SQL as simple as possible. Strictly follow the SQL template to generate SQL. Don't use JOIN, UNION, subquery or other feature that is not in the SQL template. Don't process fields in ways other than supported aggregation functions.
220-
3. Please don't change or translate the field names in your SQL statement.
221-
4. Don't ignore GROUP BY in your SQL.
211+
1. Extract the part related to the data from the user's instruction. Ignore other parts that is not related to the data.
212+
2. Select useful dimension and measure columns from dataSource. You can only use columns in Column Information and do not assume non-existent columns. If the existing columns can't meet user's command, just select the most related columns in Column Information.
213+
3. Use the original dimension columns without any process. Aggregate the measure columns using aggregation methods supported in SimQuery. Don't use unsupported methods. If current keywords and methods can't meet your needs, just simple select the column without any process.
214+
4. Group the data using dimension columns.
215+
5. You can also use WHERE, HAVING, ORDER BY, LIMIT in your SimQuery if necessary. Use the supported operators to finish the WHERE and HAVING of SimQuery. You can only use binary expression such as columnA = value1, sum_b > 0. You can only use dimension values appearing in the domain of dimension columns in your expression.
222216
217+
Let's think step by step.
223218
224-
Response in JSON format without any additional words. Your JSON object must contain sql and fieldInfo.
219+
Response one JSON object without any additional words. Your JSON object must contain SimQuery and fieldInfo.
225220
226-
Make your SQL as simple as possible.
227-
228-
Response in the following JSON format:
221+
Response in the following format:
229222
\`\`\`
230223
{
231-
sql: string; //your sql statement. Note that it's a string in a JSON object so it must be in one line without any \\n.
232-
fieldInfo: {
233-
fieldName: string; //name of the field.
234-
description?: string; //description of the field. If it is an aggregated field, please describe how it is generated in detail.
235-
}[]; //array of the information about the fields in your sql. Describing its aggregation method and other information of the fields.
224+
${showThoughts ? 'THOUGHTS: string //your thoughts' : ''}
225+
SimQuery: string; //your SimQuery query. Note that it's a string in a JSON object so it must be in one line without any \\n.
226+
fieldInfo: {
227+
fieldName: string; //name of the field.
228+
description?: string; //description of the field. If it is an aggregated field, please describe how it is generated in detail.
229+
}[]; //array of the information about the fields in your SimQuery. Describing its aggregation method and other information of the fields.
236230
}
237231
\`\`\`
238232
239233
#Examples:
240234
241-
User's Command: 帮我展示个人在不同方面的绩效,他是否是六边形战士
242-
Column Information: [{"fieldName":"key","type":"string","role":"dimension"},{"fieldName":"value","type":"int","role":"measure"}]
243-
244-
Response:
245-
\`\`\`
246-
247-
{
248-
"sql": "SELECT key, SUM(value) AS performance FROM dataSource GROUP BY key",
249-
"fieldInfo": [
250-
{
251-
"fieldName": "key",
252-
"description": "The identifier of the person."
253-
},
254-
{
255-
"fieldName": "performance",
256-
"description": "An aggregated field representing the performance of the person in different aspects. It is generated by aggregating the 'value' field."
257-
}
258-
]
259-
}
260-
\`\`\`
261-
----------------------------------
262-
263235
User's Command: Show me the change of the GDP rankings of each country.
264236
Column Information: [{"fieldName":"country","type":"string","role":"dimension"},{"fieldName":"continent","type":"string","role":"dimension"},{"fieldName":"GDP","type":"float","role":"measure"},{"fieldName":"year","type":"int","role":"measure"}]
265237
266238
Response:
267239
\`\`\`
268240
{
269-
"sql": "SELECT country, year, SUM(GDP) AS total_GDP FROM dataSource GROUP BY country, year ORDER BY year, total_GDP DESC",
241+
${showThoughts ? '"THOUGHTS": string //your thoughts' : ''}
242+
"SimQuery": "SELECT country, year, SUM(GDP) AS total_GDP FROM dataSource GROUP BY country, year ORDER BY year, total_GDP DESC",
270243
"fieldInfo": [
271244
{
272245
"fieldName": "country",
@@ -291,7 +264,8 @@ Column Information: [{"fieldName":"城市","type":"string","role":"dimension"},{
291264
Response:
292265
\`\`\`
293266
{
294-
"sql": "SELECT 城市, SUM(\`2022年GDP(亿元)\`) as sum_2022_GDP FROM dataSource ORDER BY sum_2022_GDP DESC LIMIT 5",
267+
${showThoughts ? '"THOUGHTS": string //your thoughts' : ''}
268+
"SimQuery": "SELECT 城市, SUM(\`2022年GDP(亿元)\`) as sum_2022_GDP FROM dataSource ORDER BY sum_2022_GDP DESC LIMIT 5",
295269
"fieldInfo": [
296270
{
297271
"fieldName": "城市",
@@ -312,7 +286,8 @@ Column Information: [{"fieldName":"时间","type":"string","role":"dimension"},{
312286
Response:
313287
\`\`\`
314288
{
315-
"sql": "SELECT \`时间\`, SUM(\`男_DASH_早餐\`) AS breakfast_amount_man, SUM(\`女_DASH_早餐\`) AS breakfast_amount_woman FROM dataSource GROUP BY \`时间\`",
289+
${showThoughts ? '"THOUGHTS": string //your thoughts' : ''}
290+
"SimQuery": "SELECT \`时间\`, SUM(\`男_DASH_早餐\`) AS breakfast_amount_man, SUM(\`女_DASH_早餐\`) AS breakfast_amount_woman FROM dataSource GROUP BY \`时间\`",
316291
"fieldInfo": [
317292
{
318293
"fieldName": "gender",
@@ -332,6 +307,7 @@ You only need to return the JSON in your response directly to the user.
332307
Finish your tasks in one-step.
333308
334309
# Constraints:
335-
1. Write your SQL statement in one line without any \\n.
336-
2. Response the JSON object directly without any other contents. Make sure it can be directly parsed by JSON.parse() in JavaScript.
310+
1. Write your SimQuery statement in one line without any \\n.
311+
2. Please don't change or translate the field names in your SimQuery statement. Don't miss the GROUP BY in your query.
312+
3. Response the JSON object directly without any other contents. Make sure it can be directly parsed by JSON.parse() in JavaScript.
337313
`;

0 commit comments

Comments
 (0)