Skip to content

Commit 767a567

Browse files
authored
feat: add bulk json register (#49)
* add: bulk register * add: test * bump: version * fix: test
1 parent 21299d1 commit 767a567

File tree

9 files changed

+94
-10
lines changed

9 files changed

+94
-10
lines changed

benchmarking/benchmarking-app/src/app/constants.ts

+4-4
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ export const TEST_QUERIES = [
3131
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
3232
LIMIT 1
3333
`,
34-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson',
35-
'SELECT * FROM taxijson WHERE price >= 1.0005812645 LIMIT 100',
36-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson GROUP BY order_count',
37-
'SELECT * as total_count FROM taxijson ORDER BY seconds_in_bucket LIMIT 100',
34+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson.parquet',
35+
'SELECT * FROM taxijson.parquet WHERE price >= 1.0005812645 LIMIT 100',
36+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson.parquet GROUP BY order_count',
37+
'SELECT * as total_count FROM taxijson.parquet ORDER BY seconds_in_bucket LIMIT 100',
3838
];

benchmarking/benchmarking-app/src/app/file-loader/file-loader.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export const FileLoader = ({ children }: { children: JSX.Element }) => {
2525
await fileManager.registerJSON({
2626
json: TAXI_JSON_DATA,
2727
tableName: 'taxijson',
28-
fileName: 'taxi.json',
28+
fileName: 'taxijson.parquet',
2929
});
3030

3131
setIsFileLoader(true);

meerkat-dbm/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@devrev/meerkat-dbm",
3-
"version": "0.0.14",
3+
"version": "0.0.141",
44
"dependencies": {
55
"tslib": "^2.3.0",
66
"@duckdb/duckdb-wasm": "^1.28.0",

meerkat-dbm/src/dbm/dbm.spec.ts

+6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ export class MockFileManager implements FileManagerType {
2929
this.tables[prop.tableName].files.push(prop);
3030
}
3131

32+
async bulkRegisterJSON(props: FileJsonStore[]): Promise<void> {
33+
for (const prop of props) {
34+
await this.registerJSON(prop);
35+
}
36+
}
37+
3238
async registerJSON(prop: FileJsonStore): Promise<void> {
3339
const { json, ...fileData } = prop;
3440

meerkat-dbm/src/file-manager/file-manager-type.ts

+9
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ export interface FileManagerType {
5151
*/
5252
registerFileBuffer: (props: FileBufferStore) => Promise<void>;
5353

54+
/**
55+
* @description
56+
* Registers multiple JSON files in the file manager.
57+
* It converts JSON objects to Uint8Arrays by writing them to Parquet files in a DuckDB database and registers them.
58+
* Also emits an event with the time taken for the conversion.
59+
* @param props - An array of FileJsonStore objects.
60+
*/
61+
bulkRegisterJSON: (props: FileJsonStore[]) => Promise<void>;
62+
5463
/**
5564
* @description
5665
* Registers a single JSON file in the file manager.

meerkat-dbm/src/file-manager/indexed-db/__tests__/indexed-db-file-manager.spec.ts

+41-4
Original file line numberDiff line numberDiff line change
@@ -216,21 +216,58 @@ describe('IndexedDBFileManager', () => {
216216
});
217217

218218
it('should register JSON data', async () => {
219-
const fileJson = {
219+
const jsonFile = {
220220
tableName: 'taxi-json',
221221
fileName: 'taxi-json.parquet',
222222
json: {
223223
test: 'test',
224224
},
225225
};
226226

227-
await fileManager.registerJSON(fileJson);
227+
await fileManager.registerJSON(jsonFile);
228228

229229
const tableData = await indexedDB.tablesKey.toArray();
230230
const fileBufferData = await indexedDB.files.toArray();
231231

232-
tableData.some((table) => table.tableName === fileJson.tableName);
233-
fileBufferData.some((file) => file.fileName === fileJson.fileName);
232+
expect(
233+
tableData.some((table) => table.tableName === jsonFile.tableName)
234+
).toBe(true);
235+
236+
expect(
237+
fileBufferData.some((file) => file.fileName === jsonFile.fileName)
238+
).toBe(true);
239+
});
240+
241+
it('should register multiple JSON data', async () => {
242+
const jsonFiles = [
243+
{
244+
tableName: 'taxi-json-bulk',
245+
fileName: 'taxi-json1.parquet',
246+
json: {
247+
test: 'test',
248+
},
249+
},
250+
{
251+
tableName: 'taxi-json-bulk',
252+
fileName: 'taxi-json2.parquet',
253+
json: {
254+
test: 'test',
255+
},
256+
},
257+
];
258+
259+
await fileManager.bulkRegisterJSON(jsonFiles);
260+
261+
const tableData = await indexedDB.tablesKey.toArray();
262+
const fileBufferData = await indexedDB.files.toArray();
263+
264+
expect(
265+
tableData.some((table) => table.tableName === jsonFiles[0].tableName)
266+
).toBe(true);
267+
268+
expect(fileBufferData.map((file) => file.fileName)).toEqual(
269+
expect.arrayContaining(jsonFiles.map((file) => file.fileName))
270+
);
234271
});
235272
});
236273

meerkat-dbm/src/file-manager/indexed-db/indexed-db-file-manager.ts

+21
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,27 @@ export class IndexedDBFileManager implements FileManagerType {
121121
});
122122
}
123123

124+
async bulkRegisterJSON(jsonData: FileJsonStore[]): Promise<void> {
125+
const fileBuffers = await Promise.all(
126+
jsonData.map(async (jsonFile) => {
127+
const { json, tableName, ...fileData } = jsonFile;
128+
129+
const bufferData = await getBufferFromJSON({
130+
instanceManager: this.instanceManager,
131+
json: json,
132+
tableName,
133+
logger: this.logger,
134+
onEvent: this.onEvent,
135+
metadata: jsonFile.metadata,
136+
});
137+
138+
return { buffer: bufferData, tableName, ...fileData };
139+
})
140+
);
141+
142+
await this.bulkRegisterFileBuffer(fileBuffers);
143+
}
144+
124145
async registerJSON(jsonData: FileJsonStore): Promise<void> {
125146
const { json, tableName, ...fileData } = jsonData;
126147

meerkat-dbm/src/file-manager/memory-file-manager.ts

+8
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ export class MemoryDBFileManager implements FileManagerType {
4444
return db.registerFileBuffer(props.fileName, props.buffer);
4545
}
4646

47+
async bulkRegisterJSON(jsonData: FileJsonStore[]): Promise<void> {
48+
const promiseArr = jsonData.map((fileBuffer) =>
49+
this.registerJSON(fileBuffer)
50+
);
51+
52+
await Promise.all(promiseArr);
53+
}
54+
4755
async registerJSON(jsonData: FileJsonStore): Promise<void> {
4856
const { json, tableName, ...fileData } = jsonData;
4957

meerkat-dbm/src/utils/get-buffer-from-json.ts

+3
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ export const getBufferFromJSON = async ({
6868
metadata: { ...metadata, json },
6969
});
7070

71+
// Drop the table.
72+
await connection.query(`DROP TABLE ${tableName};`);
73+
7174
await db.registerEmptyFileBuffer(jsonFileName);
7275

7376
await connection.close();

0 commit comments

Comments
 (0)