Skip to content

Commit 4cf77f7

Browse files
authored
feat: indexeddb file manager (#30)
* add: indexed dbm layer * add: dexie * chore: remove idb * update: changes * add: file manager test * add: benchmarking test * update: comments * add: return * update: dexie * remove: comment * address: comments * add: tables map * update: comment * update: use tables * fix: test * bump version
1 parent 5406672 commit 4cf77f7

File tree

16 files changed

+662
-26
lines changed

16 files changed

+662
-26
lines changed

benchmarking/benchmarking-app/src/app/app.tsx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { Route, BrowserRouter as Router, Routes } from 'react-router-dom';
2+
import { IndexedDBMProvider } from './dbm-context/indexed-dbm-context';
23
import { MemoryDBMProvider } from './dbm-context/memory-dbm-context';
34
import { RawDBMProvider } from './dbm-context/raw-dbm-context';
45
import { FileLoader } from './file-loader/file-loader';
@@ -35,6 +36,19 @@ export function App() {
3536
</div>
3637
}
3738
/>
39+
<Route
40+
path="/indexed-dbm"
41+
element={
42+
<div>
43+
<h1>IndexedDB DuckDB</h1>
44+
<IndexedDBMProvider>
45+
<FileLoader>
46+
<QueryBenchmarking />
47+
</FileLoader>
48+
</IndexedDBMProvider>
49+
</div>
50+
}
51+
/>
3852
</Routes>
3953
</Router>
4054
);

benchmarking/benchmarking-app/src/app/benchmarking-tests/dbm-benchmarking.spec.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ describe('Benchmarking DBMs', () => {
77
let browser;
88
let appProcess;
99

10+
let totalTimeForMemoryDB: number;
11+
1012
beforeAll(async () => {
1113
appProcess = spawn('npx', ['nx', 'serve', 'benchmarking-app'], {
1214
stdio: 'inherit',
@@ -58,7 +60,7 @@ describe('Benchmarking DBMs', () => {
5860
/**
5961
* Get the total time as number
6062
*/
61-
const totalTimeForMemoryDB = await page.$eval('#total_time', (el) =>
63+
totalTimeForMemoryDB = await page.$eval('#total_time', (el) =>
6264
Number(el.textContent)
6365
);
6466

@@ -70,6 +72,27 @@ describe('Benchmarking DBMs', () => {
7072
expect(totalTimeForRawDB).toBeLessThan(totalTimeForMemoryDB * 1.1);
7173
}, 220000);
7274

75+
it('Benchmark indexed dbm duckdb', async () => {
76+
await page.goto('http://localhost:4200/indexed-dbm');
77+
/**
78+
* wait for total time to be render
79+
*/
80+
await page.waitForSelector('#total_time', { timeout: 300000 });
81+
/**
82+
* Get the total time as number
83+
*/
84+
const totalTimeForIndexedDBM = await page.$eval('#total_time', (el) =>
85+
Number(el.textContent)
86+
);
87+
88+
console.info('totalTimeForIndexedDBM', totalTimeForIndexedDBM);
89+
90+
/**
91+
* The total diff between indexed dbm and memory dbm should be less than 30%
92+
*/
93+
expect(totalTimeForIndexedDBM).toBeLessThan(totalTimeForMemoryDB * 1.3);
94+
}, 300000);
95+
7396
afterAll(async () => {
7497
await browser.close();
7598
appProcess.kill('SIGTERM');
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { DBM, IndexedDBFileManager } from '@devrev/meerkat-dbm';
2+
import React, { useState } from 'react';
3+
import { DBMContext } from '../hooks/dbm-context';
4+
import { useClassicEffect } from '../hooks/use-classic-effect';
5+
import { useAsyncDuckDB } from './use-async-duckdb';
6+
7+
export const IndexedDBMProvider = ({ children }: { children: JSX.Element }) => {
8+
const fileManagerRef = React.useRef<IndexedDBFileManager | null>(null);
9+
const [dbm, setdbm] = useState<DBM | null>(null);
10+
11+
const dbState = useAsyncDuckDB();
12+
13+
useClassicEffect(() => {
14+
if (!dbState) {
15+
return;
16+
}
17+
fileManagerRef.current = new IndexedDBFileManager({
18+
db: dbState,
19+
fetchTableFileBuffers: async (table) => {
20+
return [];
21+
},
22+
});
23+
24+
fileManagerRef.current.initializeDB();
25+
26+
const dbm = new DBM({
27+
db: dbState,
28+
fileManager: fileManagerRef.current,
29+
});
30+
31+
setdbm(dbm);
32+
}, [dbState]);
33+
34+
if (!dbm || !fileManagerRef.current) {
35+
return <div>Loading...</div>;
36+
}
37+
38+
return (
39+
<DBMContext.Provider
40+
value={{
41+
dbm,
42+
fileManager: fileManagerRef.current,
43+
}}
44+
>
45+
{children}
46+
</DBMContext.Provider>
47+
);
48+
};

benchmarking/benchmarking-app/src/app/file-loader/file-loader.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export const FileLoader = ({ children }: { children: JSX.Element }) => {
2020
fileName: 'taxi.parquet',
2121
buffer: fileBufferView,
2222
});
23+
2324
setIsFileLoader(true);
2425
})();
2526
}, []);

benchmarking/benchmarking-app/src/app/query-benchmarking/query-benchmarking.tsx

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,30 +21,30 @@ export const QueryBenchmarking = () => {
2121
'SELECT * as total_count FROM taxi.parquet ORDER BY bcf LIMIT 100',
2222
`
2323
WITH group_by_query AS (
24-
SELECT
25-
hvfhs_license_num,
26-
COUNT(*)
27-
FROM
28-
taxi.parquet
29-
GROUP BY
24+
SELECT
25+
hvfhs_license_num,
26+
COUNT(*)
27+
FROM
28+
taxi.parquet
29+
GROUP BY
3030
hvfhs_license_num
31-
),
32-
31+
),
32+
3333
full_query AS (
34-
SELECT
35-
*
36-
FROM
34+
SELECT
35+
*
36+
FROM
3737
taxi.parquet
38-
)
39-
40-
SELECT
41-
COUNT(*)
42-
FROM
43-
group_by_query
44-
LEFT JOIN
45-
full_query
46-
ON
47-
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
38+
)
39+
40+
SELECT
41+
COUNT(*)
42+
FROM
43+
group_by_query
44+
LEFT JOIN
45+
full_query
46+
ON
47+
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
4848
LIMIT 1
4949
`,
5050
];
@@ -56,7 +56,7 @@ export const QueryBenchmarking = () => {
5656
const eachQueryStart = performance.now();
5757

5858
const promiseObj = dbm
59-
.queryWithTableNames(testQueries[i], ['taxi.parquet'])
59+
.queryWithTableNames(testQueries[i], ['taxi'])
6060
.then((results) => {
6161
const end = performance.now();
6262
const time = end - eachQueryStart;
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:cb066a3fe0770e44dde67cfc43e8b964b7b3ca7f2c90dc35d2d57cb7e41629b8
3+
size 473816636

meerkat-dbm/package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
{
22
"name": "@devrev/meerkat-dbm",
3-
"version": "0.0.11",
3+
"version": "0.0.12",
44
"dependencies": {
55
"tslib": "^2.3.0",
6-
"@duckdb/duckdb-wasm": "^1.28.0"
6+
"@duckdb/duckdb-wasm": "^1.28.0",
7+
"dexie": "^3.2.4"
78
},
89
"repository": {
910
"type": "git",

meerkat-dbm/src/file-manager/file-manager-type.ts

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export interface FileBufferStore {
1212
export interface FileManagerType {
1313
bulkRegisterFileBuffer: (props: FileBufferStore[]) => Promise<void>;
1414
registerFileBuffer: (props: FileBufferStore) => Promise<void>;
15-
getFileBuffer: (name: string) => Promise<Uint8Array>;
15+
getFileBuffer: (name: string) => Promise<Uint8Array | undefined>;
1616
mountFileBufferByTableNames: (tableName: string[]) => Promise<void>;
1717
unmountFileBufferByTableNames: (tableName: string[]) => Promise<void>;
1818
}
@@ -21,3 +21,30 @@ export interface FileManagerConstructorOptions {
2121
fetchTableFileBuffers: (tableName: string) => Promise<FileBufferStore[]>;
2222
db: AsyncDuckDB;
2323
}
24+
25+
export const FILE_TYPES = {
26+
PARQUET: 'parquet',
27+
} as const;
28+
29+
export type FileType = (typeof FILE_TYPES)[keyof typeof FILE_TYPES];
30+
31+
export interface Table {
32+
tableName: string;
33+
files: FileData[];
34+
totalSize?: number;
35+
metadata?: object;
36+
}
37+
38+
export interface FileData {
39+
fileName: string;
40+
fileType?: FileType;
41+
size?: number;
42+
staleTime?: number;
43+
cacheTime?: number;
44+
metadata?: object;
45+
}
46+
47+
export interface File {
48+
fileName: string;
49+
buffer: Uint8Array;
50+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import Dexie from 'dexie';
2+
import { File, Table } from '../file-manager-type';
3+
4+
/**
5+
* We don't need to specify the buffer field for index
6+
* https://dexie.org/docs/Version/Version.stores()#warning
7+
*/
8+
9+
export class DuckDBDatabase extends Dexie {
10+
tablesKey: Dexie.Table<Table, string>;
11+
files: Dexie.Table<File, string>;
12+
13+
constructor() {
14+
super('DuckDBDatabase');
15+
16+
this.version(1).stores({
17+
tablesKey: '&tableName',
18+
files: '&fileName',
19+
});
20+
21+
this.tablesKey = this.table('tablesKey');
22+
this.files = this.table('files');
23+
}
24+
}

0 commit comments

Comments
 (0)