Skip to content

Commit 183a039

Browse files
authored
feat: add support for json register (#47)
* add: json method * add: helper * update: path * add: tests * rever: prettier change * add: events and logger * update: comments * add: json in metadata * remove: params * add: comments * resolve: comment * change: method * nit: changes * add: tests * fix: test * address: comments * remove: type * revert: settings * update: test * change: json
1 parent e14013c commit 183a039

20 files changed

+313
-76
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
export const TEST_QUERIES = [
2+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet',
3+
"SELECT * FROM taxi.parquet WHERE originating_base_num='B03404' LIMIT 100",
4+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet GROUP BY hvfhs_license_num',
5+
'SELECT * as total_count FROM taxi.parquet ORDER BY bcf LIMIT 100',
6+
`
7+
WITH group_by_query AS (
8+
SELECT
9+
hvfhs_license_num,
10+
COUNT(*)
11+
FROM
12+
taxi.parquet
13+
GROUP BY
14+
hvfhs_license_num
15+
),
16+
17+
full_query AS (
18+
SELECT
19+
*
20+
FROM
21+
taxi.parquet
22+
)
23+
24+
SELECT
25+
COUNT(*)
26+
FROM
27+
group_by_query
28+
LEFT JOIN
29+
full_query
30+
ON
31+
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
32+
LIMIT 1
33+
`,
34+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson',
35+
'SELECT * FROM taxijson WHERE price >= 1.0005812645 LIMIT 100',
36+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson GROUP BY order_count',
37+
'SELECT * as total_count FROM taxijson ORDER BY seconds_in_bucket LIMIT 100',
38+
];

benchmarking/benchmarking-app/src/app/dbm-context/memory-dbm-context.tsx

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ export const MemoryDBMProvider = ({ children }: { children: JSX.Element }) => {
2424
fetchTableFileBuffers: async (table) => {
2525
return [];
2626
},
27+
logger: log,
28+
onEvent: (event) => {
29+
console.info(event);
30+
},
2731
});
2832
log.setLevel('DEBUG');
2933
const dbm = new DBM({

benchmarking/benchmarking-app/src/app/file-loader/file-loader.tsx

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import axios from 'axios';
22
import { useState } from 'react';
3+
import TAXI_JSON_DATA from '../../assets/data-sets/taxi.json';
34
import { useDBM } from '../hooks/dbm-context';
45
import { useClassicEffect } from '../hooks/use-classic-effect';
56

@@ -21,6 +22,12 @@ export const FileLoader = ({ children }: { children: JSX.Element }) => {
2122
buffer: fileBufferView,
2223
});
2324

25+
await fileManager.registerJSON({
26+
json: TAXI_JSON_DATA,
27+
tableName: 'taxijson',
28+
fileName: 'taxi.json',
29+
});
30+
2431
setIsFileLoader(true);
2532
})();
2633
}, []);

benchmarking/benchmarking-app/src/app/query-benchmarking/query-benchmarking.tsx

+3-36
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { useState } from 'react';
2+
import { TEST_QUERIES } from '../constants';
23
import { useDBM } from '../hooks/dbm-context';
34
import { useClassicEffect } from '../hooks/use-classic-effect';
45

@@ -14,49 +15,15 @@ export const QueryBenchmarking = () => {
1415

1516
useClassicEffect(() => {
1617
setTotalTime(0);
17-
const testQueries = [
18-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet',
19-
"SELECT * FROM taxi.parquet WHERE originating_base_num='B03404' LIMIT 100",
20-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet GROUP BY hvfhs_license_num',
21-
'SELECT * as total_count FROM taxi.parquet ORDER BY bcf LIMIT 100',
22-
`
23-
WITH group_by_query AS (
24-
SELECT
25-
hvfhs_license_num,
26-
COUNT(*)
27-
FROM
28-
taxi.parquet
29-
GROUP BY
30-
hvfhs_license_num
31-
),
32-
33-
full_query AS (
34-
SELECT
35-
*
36-
FROM
37-
taxi.parquet
38-
)
39-
40-
SELECT
41-
COUNT(*)
42-
FROM
43-
group_by_query
44-
LEFT JOIN
45-
full_query
46-
ON
47-
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
48-
LIMIT 1
49-
`,
50-
];
5118

5219
setOutput([]);
5320
const promiseArr = [];
5421
const start = performance.now();
55-
for (let i = 0; i < testQueries.length; i++) {
22+
for (let i = 0; i < TEST_QUERIES.length; i++) {
5623
const eachQueryStart = performance.now();
5724

5825
const promiseObj = dbm
59-
.queryWithTableNames(testQueries[i], ['taxi'])
26+
.queryWithTableNames(TEST_QUERIES[i], ['taxi'])
6027
.then((results) => {
6128
const end = performance.now();
6229
const time = end - eachQueryStart;

benchmarking/benchmarking-app/src/assets/data-sets/taxi.json

+1
Large diffs are not rendered by default.

benchmarking/benchmarking-app/tsconfig.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"allowJs": false,
55
"esModuleInterop": false,
66
"allowSyntheticDefaultImports": true,
7-
"strict": true
7+
"strict": true,
8+
"resolveJsonModule": true,
89
},
910
"files": [],
1011
"include": [],

meerkat-dbm/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@devrev/meerkat-dbm",
3-
"version": "0.0.136",
3+
"version": "0.0.14",
44
"dependencies": {
55
"tslib": "^2.3.0",
66
"@duckdb/duckdb-wasm": "^1.28.0",

meerkat-dbm/src/dbm/dbm.spec.ts

+10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { AsyncDuckDB } from '@duckdb/duckdb-wasm';
22
import log from 'loglevel';
33
import {
44
FileBufferStore,
5+
FileJsonStore,
56
FileManagerType,
67
} from '../file-manager/file-manager-type';
78
import { FileData, Table, TableWiseFiles } from '../types';
@@ -28,6 +29,15 @@ export class MockFileManager implements FileManagerType {
2829
this.tables[prop.tableName].files.push(prop);
2930
}
3031

32+
async registerJSON(prop: FileJsonStore): Promise<void> {
33+
const { json, ...fileData } = prop;
34+
35+
this.registerFileBuffer({
36+
...fileData,
37+
buffer: new Uint8Array(),
38+
});
39+
}
40+
3141
async getFileBuffer(name: string): Promise<Uint8Array> {
3242
const fileBuffer = this.fileBufferStore[name];
3343
if (!fileBuffer) {

meerkat-dbm/src/dbm/dbm.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { AsyncDuckDBConnection } from '@duckdb/duckdb-wasm';
22
import { FileManagerType } from '../file-manager/file-manager-type';
3-
import { DBMEvent } from '../logger/event-types';
4-
import { DBMLogger } from '../logger/logger-types';
3+
import { DBMEvent, DBMLogger } from '../logger';
54
import { InstanceManagerType } from './instance-manager';
65

76
import { DBMConstructorOptions, QueryOptions, QueryQueueItem } from './types';

meerkat-dbm/src/dbm/types.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { FileManagerType } from '../file-manager/file-manager-type';
2-
import { DBMEvent } from '../logger/event-types';
3-
import { DBMLogger } from '../logger/logger-types';
2+
import { DBMEvent, DBMLogger } from '../logger';
43
import { TableWiseFiles } from '../types';
54
import { InstanceManagerType } from './instance-manager';
65

meerkat-dbm/src/file-manager/file-manager-type.ts

+52-14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,41 @@
11
import { InstanceManagerType } from '../dbm/instance-manager';
2+
import { DBMEvent, DBMLogger } from '../logger';
23
import { Table, TableWiseFiles } from '../types';
34

5+
export interface FileManagerConstructorOptions {
6+
/**
7+
* @description
8+
* It manages the lifecycle of the DuckDB database instance.
9+
* It provides methods for obtaining an initialized DuckDB instance and terminating the instance.
10+
*/
11+
instanceManager: InstanceManagerType;
12+
13+
/**
14+
* @description
15+
* Represents an logger instance, which will be used for logging messages throughout the File Manager's execution.
16+
*/
17+
logger?: DBMLogger;
18+
19+
/**
20+
* @description
21+
* A callback function that handles events emitted by the File Manager.
22+
*/
23+
onEvent?: (event: DBMEvent) => void;
24+
25+
/**
26+
* @description
27+
* Configuration options for the File Manager.
28+
*/
29+
options?: {
30+
/**
31+
* Maximum size of the file in DB in bytes
32+
*/
33+
maxFileSize?: number;
34+
};
35+
36+
fetchTableFileBuffers: (tableName: string) => Promise<FileBufferStore[]>;
37+
}
38+
439
export interface FileManagerType {
540
/**
641
* @description
@@ -16,6 +51,15 @@ export interface FileManagerType {
1651
*/
1752
registerFileBuffer: (props: FileBufferStore) => Promise<void>;
1853

54+
/**
55+
* @description
56+
* Registers a single JSON file in the file manager.
57+
* It converts a JSON object to a Uint8Array by writing it to a Parquet file in a DuckDB database and registers it.
58+
* Also emits an event with the time taken for the conversion.
59+
* @param props - The FileJsonStore object to register.
60+
*/
61+
registerJSON: (props: FileJsonStore) => Promise<void>;
62+
1963
/**
2064
* @description
2165
* Retrieves the file buffer associated with a given file name.
@@ -73,24 +117,18 @@ export interface FileManagerType {
73117
onDBShutdownHandler: () => Promise<void>;
74118
}
75119

76-
77-
export interface FileBufferStore {
120+
export type BaseFileStore = {
78121
tableName: string;
79122
fileName: string;
80-
buffer: Uint8Array;
81123
staleTime?: number;
82124
cacheTime?: number;
83125
metadata?: object;
84-
}
126+
};
85127

86-
export interface FileManagerConstructorOptions {
87-
fetchTableFileBuffers: (tableName: string) => Promise<FileBufferStore[]>;
88-
instanceManager: InstanceManagerType;
89-
options?: {
90-
/**
91-
* Maximum size of the file in DB in bytes
92-
*/
93-
maxFileSize?: number;
94-
};
95-
}
128+
export type FileBufferStore = BaseFileStore & {
129+
buffer: Uint8Array;
130+
};
96131

132+
export type FileJsonStore = BaseFileStore & {
133+
json: object;
134+
};

meerkat-dbm/src/file-manager/indexed-db/__tests__/indexed-db-file-manager.spec.ts

+33-13
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,19 @@ import { InstanceManagerType } from '../../../dbm/instance-manager';
44
import { FILE_TYPES } from '../../../types';
55
import { IndexedDBFileManager } from '../indexed-db-file-manager';
66
import { MeerkatDatabase } from '../meerkat-database';
7+
import log = require('loglevel');
78

89
const mockDB = {
9-
registerFileBuffer: async (fileName: string, buffer: Uint8Array) => {
10-
return new Promise((resolve) => {
11-
setTimeout(() => {
12-
resolve([fileName]);
13-
}, 200);
14-
});
15-
},
16-
unregisterFileBuffer: async (fileName: string) => {
17-
return new Promise((resolve) => {
18-
setTimeout(() => {
19-
resolve([fileName]);
20-
}, 200);
21-
});
10+
registerFileBuffer: jest.fn(),
11+
registerFileText: jest.fn(),
12+
copyFileToBuffer: jest.fn(),
13+
registerEmptyFileBuffer: jest.fn(),
14+
connect: async () => {
15+
return {
16+
query: jest.fn(),
17+
insertJSONFromPath: jest.fn(),
18+
close: jest.fn(),
19+
};
2220
},
2321
};
2422

@@ -71,6 +69,10 @@ describe('IndexedDBFileManager', () => {
7169
return [];
7270
},
7371
instanceManager,
72+
logger: log,
73+
onEvent: (event) => {
74+
console.log(event);
75+
},
7476
});
7577

7678
await fileManager.initializeDB();
@@ -212,6 +214,24 @@ describe('IndexedDBFileManager', () => {
212214

213215
expect(tableData[0].metadata).toEqual({ test: 'test' });
214216
});
217+
218+
it('should register JSON data', async () => {
219+
const fileJson = {
220+
tableName: 'taxi-json',
221+
fileName: 'taxi-json.parquet',
222+
json: {
223+
test: 'test',
224+
},
225+
};
226+
227+
await fileManager.registerJSON(fileJson);
228+
229+
const tableData = await indexedDB.tablesKey.toArray();
230+
const fileBufferData = await indexedDB.files.toArray();
231+
232+
tableData.some((table) => table.tableName === fileJson.tableName);
233+
fileBufferData.some((file) => file.fileName === fileJson.fileName);
234+
});
215235
});
216236

217237

0 commit comments

Comments
 (0)