Skip to content

Commit 4fdfe79

Browse files
authored
feat: native dbm and file manager (#121)
* add: dbm * cleanup * update: interface * add: abstract class * add: event handler * add file handler * fix: ipc * update: query * some: cleanup * update: ipc * add: tes * fix: views * remove: json * add: fm type * some: cleanup * update: test * fix: prequery test * bump version * fix: events * update: types
1 parent d0b5139 commit 4fdfe79

36 files changed

+988
-208
lines changed

benchmarking/src/app/app.tsx

+23-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import { Link, Route, BrowserRouter as Router, Routes } from 'react-router-dom';
22
import { IndexedDBMProvider } from './dbm-context/indexed-dbm-context';
33
import { MemoryDBMProvider } from './dbm-context/memory-dbm-context';
4+
import { NativeDBMProvider } from './dbm-context/native-dbm-context';
45
import { ParallelIndexedDBMProvider } from './dbm-context/parallel-indexed-dbm-context';
56
import { ParallelMemoryDBMProvider } from './dbm-context/parallel-memory-dbm-context';
67
import { RawDBMProvider } from './dbm-context/raw-dbm-context';
78
import { FileLoader } from './file-loader/file-loader';
9+
import { NativeAppFileLoader } from './file-loader/native-app-file-loader';
810
import { QueryBenchmarking } from './query-benchmarking/query-benchmarking';
911

1012
export function App() {
@@ -27,6 +29,9 @@ export function App() {
2729
<li>
2830
<Link to="/parallel-indexed-dbm">Parallel Indexed DuckDB</Link>
2931
</li>
32+
<li>
33+
<Link to="/native-dbm">Native Node DuckDB</Link>
34+
</li>
3035
</ul>
3136
</nav>
3237
<Routes>
@@ -36,7 +41,7 @@ export function App() {
3641
<div>
3742
<h1>Raw DuckDB</h1>
3843
<RawDBMProvider>
39-
<FileLoader bufferType="uint8Array">
44+
<FileLoader>
4045
<QueryBenchmarking />
4146
</FileLoader>
4247
</RawDBMProvider>
@@ -50,7 +55,7 @@ export function App() {
5055
<h1>In Memory Sequence DuckDB</h1>
5156

5257
<MemoryDBMProvider>
53-
<FileLoader bufferType="uint8Array">
58+
<FileLoader>
5459
<QueryBenchmarking />
5560
</FileLoader>
5661
</MemoryDBMProvider>
@@ -63,7 +68,7 @@ export function App() {
6368
<div>
6469
<h1>IndexedDB DuckDB</h1>
6570
<IndexedDBMProvider>
66-
<FileLoader bufferType="uint8Array">
71+
<FileLoader>
6772
<QueryBenchmarking />
6873
</FileLoader>
6974
</IndexedDBMProvider>
@@ -76,7 +81,7 @@ export function App() {
7681
<div>
7782
<h1>Parallel Memory DuckDB</h1>
7883
<ParallelMemoryDBMProvider>
79-
<FileLoader bufferType="sharedArrayBuffer">
84+
<FileLoader>
8085
<QueryBenchmarking />
8186
</FileLoader>
8287
</ParallelMemoryDBMProvider>
@@ -89,13 +94,26 @@ export function App() {
8994
<div>
9095
<h1>Parallel Indexed DuckDB</h1>
9196
<ParallelIndexedDBMProvider>
92-
<FileLoader bufferType="uint8Array">
97+
<FileLoader>
9398
<QueryBenchmarking />
9499
</FileLoader>
95100
</ParallelIndexedDBMProvider>
96101
</div>
97102
}
98103
/>
104+
<Route
105+
path="/native-dbm"
106+
element={
107+
<div>
108+
<h1>Native Node DuckDB</h1>
109+
<NativeDBMProvider>
110+
<NativeAppFileLoader>
111+
<QueryBenchmarking />
112+
</NativeAppFileLoader>
113+
</NativeDBMProvider>
114+
</div>
115+
}
116+
/>
99117
</Routes>
100118
</Router>
101119
);

benchmarking/src/app/constants.ts

+22-23
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ export const TEST_QUERIES = [
55
hvfhs_license_num,
66
COUNT(*)
77
FROM
8-
taxi.parquet
8+
taxi
99
GROUP BY
1010
hvfhs_license_num
1111
),
@@ -14,7 +14,7 @@ export const TEST_QUERIES = [
1414
SELECT
1515
*
1616
FROM
17-
taxi.parquet
17+
taxi
1818
)
1919
2020
SELECT
@@ -27,18 +27,17 @@ export const TEST_QUERIES = [
2727
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
2828
LIMIT 1
2929
`,
30-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet',
31-
"SELECT * FROM taxi.parquet WHERE originating_base_num='B03404' LIMIT 100",
32-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet GROUP BY hvfhs_license_num',
33-
'SELECT * FROM taxi.parquet ORDER BY bcf LIMIT 100',
34-
30+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi',
31+
"SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100",
32+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num',
33+
'SELECT * FROM taxi ORDER BY bcf LIMIT 100',
3534
`
3635
WITH group_by_query AS (
3736
SELECT
3837
hvfhs_license_num,
3938
COUNT(*)
4039
FROM
41-
taxi.parquet
40+
taxi
4241
GROUP BY
4342
hvfhs_license_num
4443
),
@@ -47,7 +46,7 @@ export const TEST_QUERIES = [
4746
SELECT
4847
*
4948
FROM
50-
taxi.parquet
49+
taxi
5150
)
5251
5352
SELECT
@@ -60,17 +59,17 @@ export const TEST_QUERIES = [
6059
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
6160
LIMIT 1
6261
`,
63-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet',
64-
"SELECT * FROM taxi.parquet WHERE originating_base_num='B03404' LIMIT 100",
65-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet GROUP BY hvfhs_license_num',
66-
'SELECT * FROM taxi.parquet ORDER BY bcf LIMIT 100',
62+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi',
63+
"SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100",
64+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num',
65+
'SELECT * FROM taxi ORDER BY bcf LIMIT 100',
6766
`
6867
WITH group_by_query AS (
6968
SELECT
7069
hvfhs_license_num,
7170
COUNT(*)
7271
FROM
73-
taxi.parquet
72+
taxi
7473
GROUP BY
7574
hvfhs_license_num
7675
),
@@ -79,7 +78,7 @@ export const TEST_QUERIES = [
7978
SELECT
8079
*
8180
FROM
82-
taxi.parquet
81+
taxi
8382
)
8483
8584
SELECT
@@ -92,12 +91,12 @@ export const TEST_QUERIES = [
9291
group_by_query.hvfhs_license_num = full_query.hvfhs_license_num
9392
LIMIT 1
9493
`,
95-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet',
96-
"SELECT * FROM taxi.parquet WHERE originating_base_num='B03404' LIMIT 100",
97-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi.parquet GROUP BY hvfhs_license_num',
98-
'SELECT * FROM taxi.parquet ORDER BY bcf LIMIT 100',
99-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson.parquet',
100-
'SELECT * FROM taxijson.parquet WHERE price >= 1.0005812645 LIMIT 100',
101-
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxijson.parquet GROUP BY order_count',
102-
'SELECT * FROM taxijson.parquet ORDER BY seconds_in_bucket LIMIT 100',
94+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi',
95+
"SELECT * FROM taxi WHERE originating_base_num='B03404' LIMIT 100",
96+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi GROUP BY hvfhs_license_num',
97+
'SELECT * FROM taxi ORDER BY bcf LIMIT 100',
98+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi_json',
99+
'SELECT * FROM taxi_json WHERE price >= 1.0005812645 LIMIT 100',
100+
'SELECT CAST(COUNT(*) as VARCHAR) as total_count FROM taxi_json GROUP BY order_count',
101+
'SELECT * FROM taxi_json ORDER BY seconds_in_bucket LIMIT 100',
103102
];

benchmarking/src/app/dbm-context/indexed-dbm-context.tsx

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ export const IndexedDBMProvider = ({ children }: { children: JSX.Element }) => {
5252
value={{
5353
dbm,
5454
fileManager: fileManagerRef.current,
55+
fileManagerType: 'indexdb',
5556
}}
5657
>
5758
{children}

benchmarking/src/app/dbm-context/memory-dbm-context.tsx

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ export const MemoryDBMProvider = ({ children }: { children: JSX.Element }) => {
5050
value={{
5151
dbm,
5252
fileManager: fileManagerRef.current,
53+
fileManagerType: 'memory',
5354
}}
5455
>
5556
{children}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import {
2+
DBMNative,
3+
FileManagerType,
4+
FileStore,
5+
NativeFileManager,
6+
} from '@devrev/meerkat-dbm';
7+
import log from 'loglevel';
8+
import { NativeBridge } from 'meerkat-dbm/src/dbm/dbm-native/native-bridge';
9+
import { useMemo, useRef, useState } from 'react';
10+
import { DBMContext } from '../hooks/dbm-context';
11+
import { useClassicEffect } from '../hooks/use-classic-effect';
12+
import { InstanceManager } from './instance-manager';
13+
import { useAsyncDuckDB } from './use-async-duckdb';
14+
15+
export const NativeDBMProvider = ({ children }: { children: JSX.Element }) => {
16+
const fileManagerRef = useRef<FileManagerType | null>(null);
17+
const [dbm, setdbm] = useState<DBMNative | null>(null);
18+
const instanceManagerRef = useRef<InstanceManager>(new InstanceManager());
19+
20+
const dbState = useAsyncDuckDB();
21+
22+
const nativeBridge: NativeBridge = useMemo(() => {
23+
return {
24+
registerFiles: async (files: FileStore[]): Promise<void> => {
25+
await window.api?.registerFiles(files);
26+
},
27+
28+
query: async (query): Promise<Record<string, unknown>> => {
29+
const result = await window.api?.query(query);
30+
31+
return result ?? {};
32+
},
33+
dropFilesByTableName: async ({ tableName, fileNames }): Promise<void> => {
34+
await window.api?.dropFilesByTableName({
35+
tableName,
36+
fileNames,
37+
});
38+
},
39+
getFilePathsForTable: async (tableName): Promise<string[]> => {
40+
return (await window.api?.getFilePathsForTable(tableName)) ?? [];
41+
},
42+
};
43+
}, []);
44+
45+
useClassicEffect(() => {
46+
if (!dbState) {
47+
return;
48+
}
49+
50+
fileManagerRef.current = new NativeFileManager({
51+
fetchTableFileBuffers: async (table) => {
52+
return [];
53+
},
54+
nativeBridge: nativeBridge,
55+
logger: log,
56+
onEvent: (event) => {
57+
console.log('event', event);
58+
},
59+
instanceManager: instanceManagerRef.current,
60+
});
61+
62+
const dbm = new DBMNative({
63+
instanceManager: instanceManagerRef.current,
64+
fileManager: fileManagerRef.current,
65+
logger: log,
66+
onEvent: (event) => {
67+
console.log('event', event);
68+
},
69+
nativeBridge: nativeBridge,
70+
});
71+
setdbm(dbm);
72+
}, [dbState]);
73+
74+
if (!dbm || !fileManagerRef.current) {
75+
return <div>Loading...</div>;
76+
}
77+
78+
return (
79+
<DBMContext.Provider
80+
value={{
81+
dbm,
82+
fileManager: fileManagerRef.current,
83+
fileManagerType: 'native',
84+
}}
85+
>
86+
{children}
87+
</DBMContext.Provider>
88+
);
89+
};

benchmarking/src/app/dbm-context/parallel-indexed-dbm-context.tsx

+15-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ import {
44
ParallelIndexedDBFileManager,
55
} from '@devrev/meerkat-dbm';
66
import log from 'loglevel';
7+
import { TableWiseFiles } from 'meerkat-dbm/src/types/common-types';
78
import { useRef, useState } from 'react';
89
import { DBMContext } from '../hooks/dbm-context';
910
import { useClassicEffect } from '../hooks/use-classic-effect';
11+
import { generateViewQuery } from '../utils';
1012
import { InstanceManager } from './instance-manager';
1113
import { useAsyncDuckDB } from './use-async-duckdb';
1214

@@ -43,8 +45,18 @@ export const ParallelIndexedDBMProvider = ({
4345
fetchTableFileBuffers: async (table) => {
4446
return [];
4547
},
46-
fetchPreQuery: () => {
47-
return [];
48+
fetchPreQuery: (runnerId: string, tableWiseFiles: TableWiseFiles[]) => {
49+
const preQueries: string[] = [];
50+
51+
for (const tableWiseFile of tableWiseFiles) {
52+
preQueries.push(
53+
generateViewQuery(
54+
tableWiseFile.tableName,
55+
tableWiseFile.files.map((file) => file.fileName)
56+
)
57+
);
58+
}
59+
return preQueries;
4860
},
4961
onEvent: (event) => {
5062
console.info(event);
@@ -77,6 +89,7 @@ export const ParallelIndexedDBMProvider = ({
7789
value={{
7890
dbm,
7991
fileManager: fileManagerRef.current as any,
92+
fileManagerType: 'parallel-indexdb',
8093
}}
8194
>
8295
{children}

benchmarking/src/app/dbm-context/parallel-memory-dbm-context.tsx

+16-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ import {
44
ParallelMemoryFileManager,
55
} from '@devrev/meerkat-dbm';
66
import log from 'loglevel';
7+
import { TableWiseFiles } from 'meerkat-dbm/src/types/common-types';
78
import { useRef, useState } from 'react';
89
import { DBMContext } from '../hooks/dbm-context';
910
import { useClassicEffect } from '../hooks/use-classic-effect';
11+
import { generateViewQuery } from '../utils';
1012
import { InstanceManager } from './instance-manager';
1113
import { useAsyncDuckDB } from './use-async-duckdb';
1214

@@ -43,8 +45,19 @@ export const ParallelMemoryDBMProvider = ({
4345
fetchTableFileBuffers: async (table) => {
4446
return fileManagerRef.current.getTableBufferData(table);
4547
},
46-
fetchPreQuery: () => {
47-
return [];
48+
fetchPreQuery: (runnerId: string, tableWiseFiles: TableWiseFiles[]) => {
49+
const preQueries: string[] = [];
50+
51+
for (const tableWiseFile of tableWiseFiles) {
52+
preQueries.push(
53+
generateViewQuery(
54+
tableWiseFile.tableName,
55+
tableWiseFile.files.map((file) => file.fileName)
56+
)
57+
);
58+
}
59+
60+
return preQueries;
4861
},
4962
onEvent: (event) => {
5063
console.info(event);
@@ -77,6 +90,7 @@ export const ParallelMemoryDBMProvider = ({
7790
value={{
7891
dbm,
7992
fileManager: fileManagerRef.current as any,
93+
fileManagerType: 'parallel-memory',
8094
}}
8195
>
8296
{children}

benchmarking/src/app/dbm-context/raw-dbm-context.tsx

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ export const RawDBMProvider = ({ children }: { children: JSX.Element }) => {
5151
value={{
5252
dbm,
5353
fileManager: fileManagerRef.current,
54+
fileManagerType: 'raw',
5455
}}
5556
>
5657
{children}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export const TAXI_FILE_URL =
2+
'http://localhost:4204/data-sets/fhvhv_tripdata_2023-01.parquet';

0 commit comments

Comments
 (0)