Skip to content

Commit b8c1d5b

Browse files
authored
Managing file in duckdb for better swapping (#42)
* WIP * Adding max allowed memory in file * Fix test * Adding test * Upgrade version * Update comments
1 parent ec5d022 commit b8c1d5b

File tree

8 files changed

+231
-57
lines changed

8 files changed

+231
-57
lines changed

meerkat-dbm/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@devrev/meerkat-dbm",
3-
"version": "0.0.134",
3+
"version": "0.0.135",
44
"dependencies": {
55
"tslib": "^2.3.0",
66
"@duckdb/duckdb-wasm": "^1.28.0",

meerkat-dbm/src/dbm/dbm.spec.ts

+5
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ export class MockFileManager implements FileManagerType {
100100

101101
return data;
102102
}
103+
104+
onDBShutdownHandler = jest.fn(async () => {
105+
// do nothing
106+
});
103107
}
104108

105109
const mockDB = {
@@ -307,6 +311,7 @@ describe('DBM', () => {
307311
/**
308312
* Expect instanceManager.terminateDB to be called
309313
*/
314+
expect(fileManager.onDBShutdownHandler).toBeCalled();
310315
expect(instanceManager.terminateDB).toBeCalled();
311316
});
312317

meerkat-dbm/src/dbm/dbm.ts

+1-20
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ export class DBM {
8989
if (this.onDuckdbShutdown) {
9090
this.onDuckdbShutdown();
9191
}
92+
await this.fileManager.onDBShutdownHandler();
9293
await this.instanceManager.terminateDB();
9394
}
9495

@@ -186,26 +187,6 @@ export class DBM {
186187
metadata: options?.metadata,
187188
});
188189

189-
/**
190-
* Unload all the files from the database, so that the files can be removed from memory
191-
*/
192-
const startUnmountTime = Date.now();
193-
await this.fileManager.unmountFileBufferByTableNames(tableNames);
194-
const endUnmountTime = Date.now();
195-
196-
this.logger.debug(
197-
'Time spent in unmounting files:',
198-
endUnmountTime - startUnmountTime,
199-
'ms',
200-
query
201-
);
202-
203-
this._emitEvent({
204-
event_name: 'unmount_file_buffer_duration',
205-
duration: endUnmountTime - startUnmountTime,
206-
metadata: options?.metadata,
207-
});
208-
209190
return result;
210191
}
211192

meerkat-dbm/src/file-manager/file-manager-type.ts

+7-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ export interface FileManagerType {
1414
registerFileBuffer: (props: FileBufferStore) => Promise<void>;
1515
getFileBuffer: (name: string) => Promise<Uint8Array | undefined>;
1616
mountFileBufferByTableNames: (tableName: string[]) => Promise<void>;
17-
unmountFileBufferByTableNames: (tableName: string[]) => Promise<void>;
1817
getFilesByTableName(tableName: string): Promise<FileData[]>;
1918
dropFilesByTableName(tableName: string, fileNames: string[]): Promise<void>;
2019
getFilesNameForTables(tableNames: string[]): Promise<
@@ -23,11 +22,18 @@ export interface FileManagerType {
2322
files: string[];
2423
}[]
2524
>;
25+
onDBShutdownHandler: () => Promise<void>;
2626
}
2727

2828
export interface FileManagerConstructorOptions {
2929
fetchTableFileBuffers: (tableName: string) => Promise<FileBufferStore[]>;
3030
instanceManager: InstanceManagerType;
31+
options?: {
32+
/**
33+
* Maximum size of the file in DB in bytes
34+
*/
35+
maxFileSize?: number;
36+
};
3137
}
3238

3339
export const FILE_TYPES = {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { InstanceManagerType } from '../../dbm/instance-manager';
2+
import { FileRegisterer } from './file-registerer';
3+
4+
const mockDB = {
5+
registerFileBuffer: jest.fn(async (fileName: string, buffer: Uint8Array) => {
6+
return new Promise((resolve) => {
7+
setTimeout(() => {
8+
resolve([fileName]);
9+
}, 200);
10+
});
11+
}),
12+
registerEmptyFileBuffer: jest.fn(async (fileName: string) => {
13+
return new Promise((resolve) => {
14+
setTimeout(() => {
15+
resolve([fileName]);
16+
}, 200);
17+
});
18+
}),
19+
};
20+
21+
describe('FileRegisterer', () => {
22+
let db: typeof mockDB;
23+
let instanceManager: InstanceManagerType;
24+
let fileRegisterer: FileRegisterer;
25+
26+
beforeEach(() => {
27+
jest.clearAllMocks();
28+
db = mockDB;
29+
instanceManager = {
30+
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
31+
//@ts-ignore
32+
getDB: async () => {
33+
return db;
34+
},
35+
terminateDB: async () => {
36+
return;
37+
},
38+
};
39+
fileRegisterer = new FileRegisterer({ instanceManager });
40+
});
41+
42+
it('should register file buffer', async () => {
43+
const fileName = 'test.txt';
44+
const buffer = new Uint8Array();
45+
await fileRegisterer.registerFileBuffer(fileName, buffer);
46+
expect(db.registerFileBuffer).toHaveBeenCalledWith(fileName, buffer);
47+
expect(fileRegisterer.isFileRegisteredInDB(fileName)).toBeTruthy();
48+
});
49+
50+
it('should register empty file buffer', async () => {
51+
const fileName = 'test.txt';
52+
await fileRegisterer.registerEmptyFileBuffer(fileName);
53+
expect(db.registerEmptyFileBuffer).toHaveBeenCalledWith(fileName);
54+
expect(fileRegisterer.isFileRegisteredInDB(fileName)).toBeFalsy();
55+
});
56+
57+
it('should check if file is registered in DB', () => {
58+
const fileName = 'test.txt';
59+
expect(fileRegisterer.isFileRegisteredInDB(fileName)).toBeFalsy();
60+
});
61+
62+
it('should flush file cache', () => {
63+
fileRegisterer.flushFileCache();
64+
expect(fileRegisterer.getAllFilesInDB()).toEqual([]);
65+
});
66+
67+
it('should get total byte length', () => {
68+
const fileName = 'test.txt';
69+
const buffer = new Uint8Array(5);
70+
fileRegisterer.registerFileBuffer(fileName, buffer);
71+
expect(fileRegisterer.totalByteLength()).toEqual(5);
72+
});
73+
74+
it('should get all files in DB', () => {
75+
const fileName1 = 'test1.txt';
76+
const fileName2 = 'test2.txt';
77+
const buffer = new Uint8Array();
78+
fileRegisterer.registerFileBuffer(fileName1, buffer);
79+
fileRegisterer.registerFileBuffer(fileName2, buffer);
80+
expect(fileRegisterer.getAllFilesInDB()).toEqual([fileName1, fileName2]);
81+
});
82+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import { AsyncDuckDB } from '@duckdb/duckdb-wasm';
2+
import { InstanceManagerType } from '../../dbm/instance-manager';
3+
4+
interface FileRegistererConstructorOptions {
5+
instanceManager: InstanceManagerType;
6+
}
7+
8+
export interface FileRegistererType {
9+
registerFileBuffer: AsyncDuckDB['registerFileBuffer'];
10+
registerEmptyFileBuffer: AsyncDuckDB['registerEmptyFileBuffer'];
11+
isFileRegisteredInDB: (fileName: string) => boolean;
12+
flushFileCache: () => void;
13+
totalByteLength: () => number;
14+
getAllFilesInDB: () => string[];
15+
}
16+
17+
export class FileRegisterer implements FileRegistererType {
18+
instanceManager: InstanceManagerType;
19+
20+
private registeredFilesSet = new Map<
21+
string,
22+
{
23+
byteLength: number;
24+
}
25+
>();
26+
27+
constructor({ instanceManager }: FileRegistererConstructorOptions) {
28+
this.instanceManager = instanceManager;
29+
}
30+
31+
registerFileBuffer: AsyncDuckDB['registerFileBuffer'] = async (
32+
fileName,
33+
buffer
34+
) => {
35+
if (this.registeredFilesSet.has(fileName)) {
36+
return;
37+
}
38+
this.registeredFilesSet.set(fileName, {
39+
byteLength: buffer.byteLength,
40+
});
41+
return this.instanceManager
42+
.getDB()
43+
.then((db) => db.registerFileBuffer(fileName, buffer));
44+
};
45+
46+
registerEmptyFileBuffer: AsyncDuckDB['registerEmptyFileBuffer'] = async (
47+
fileName
48+
) => {
49+
await this.instanceManager
50+
.getDB()
51+
.then((db) => db.registerEmptyFileBuffer(fileName));
52+
53+
this.registeredFilesSet.delete(fileName);
54+
};
55+
56+
isFileRegisteredInDB(fileName: string): boolean {
57+
return this.registeredFilesSet.has(fileName);
58+
}
59+
60+
/**
61+
* This function on purpose does not clear the files from the DB
62+
* As this is called when the DB is shutdown and we just want to tell our file registerer to clear the cache
63+
*/
64+
flushFileCache(): void {
65+
this.registeredFilesSet.clear();
66+
}
67+
68+
totalByteLength(): number {
69+
return Array.from(this.registeredFilesSet.values()).reduce(
70+
(total, { byteLength }) => total + byteLength,
71+
0
72+
);
73+
}
74+
75+
getAllFilesInDB(): string[] {
76+
return Array.from(this.registeredFilesSet.keys());
77+
}
78+
}

0 commit comments

Comments
 (0)