From 96086e75d5dfd02cc6578adee4cd4c4564657954 Mon Sep 17 00:00:00 2001 From: jheer Date: Mon, 16 Sep 2024 11:54:11 -0700 Subject: [PATCH] fix: Ensure type is present for empty columns. --- docs/api/column.md | 3 ++- src/build/column-from-values.js | 4 ++-- src/column.js | 10 ++++++---- src/decode/table-from-ipc.js | 4 ++-- test/data/empty.arrows | Bin 0 -> 112 bytes test/table-from-arrays-test.js | 29 ++++++++++++++++++++++++----- test/table-from-ipc-test.js | 9 ++++++--- test/util/data.js | 19 +++++++++++++------ 8 files changed, 55 insertions(+), 23 deletions(-) create mode 100644 test/data/empty.arrows diff --git a/docs/api/column.md b/docs/api/column.md index 8368f6b..a87a020 100644 --- a/docs/api/column.md +++ b/docs/api/column.md @@ -20,11 +20,12 @@ A data column. A column provides a view over one or more value batches, each cor * [Symbol.iterator](#iterator)
# -Column.constructor(data) +Column.constructor(data[, type]) Create a new column with the given data batches. * *data* (`Batch[]`): The column data batches. +* *type* (`DataType`): The column [data type](data-types). If not specified, the type is extracted from the data batches. This argument is only needed to ensure correct types for "empty" columns without any data.
# Column.type diff --git a/src/build/column-from-values.js b/src/build/column-from-values.js index a72f750..2a75c81 100644 --- a/src/build/column-from-values.js +++ b/src/build/column-from-values.js @@ -25,7 +25,7 @@ export function columnFromValues(length, visit, type, options, dicts) { // if null type, generate batches and exit early if (type.typeId === Type.Null) { - return new Column(nullBatches(type, length, limit)); + return new Column(nullBatches(type, length, limit), type); } const ctx = builderContext(opt, dicts); @@ -46,7 +46,7 @@ export function columnFromValues(length, visit, type, options, dicts) { // resolve dictionaries ctx.finish(); - return new Column(data); + return new Column(data, type); } /** diff --git a/src/column.js b/src/column.js index e1fd798..678e12f 100644 --- a/src/column.js +++ b/src/column.js @@ -4,12 +4,12 @@ import { isDirectBatch } from './batch.js'; /** * Build up a column from batches. */ -export function columnBuilder() { +export function columnBuilder(type) { let data = []; return { add(batch) { data.push(batch); return this; }, clear: () => data = [], - done: () => new Column(data) + done: () => new Column(data, type) }; } @@ -25,14 +25,16 @@ export class Column { /** * Create a new column instance. * @param {import('./batch.js').Batch[]} data The value batches. + * @param {import('./types.js').DataType} [type] The column data type. + * If not specified, the type is extracted from the batches. */ - constructor(data) { + constructor(data, type = data[0]?.type) { /** * The column data type. * @type {import('./types.js').DataType} * @readonly */ - this.type = data[0].type; + this.type = type; /** * The column length. * @type {number} diff --git a/src/decode/table-from-ipc.js b/src/decode/table-from-ipc.js index eda3ecb..eb32c54 100644 --- a/src/decode/table-from-ipc.js +++ b/src/decode/table-from-ipc.js @@ -60,7 +60,7 @@ export function createTable(data, options = {}) { if (isDelta) { throw new Error('Delta update can not be first dictionary batch.'); } - dicts.set(id, columnBuilder().add(batch)); + dicts.set(id, columnBuilder(type).add(batch)); } else { const dict = dicts.get(id); if (!isDelta) dict.clear(); @@ -70,7 +70,7 @@ export function createTable(data, options = {}) { dicts.forEach((value, key) => dictionaryMap.set(key, value.done())); // decode column fields - const cols = fields.map(() => columnBuilder()); + const cols = fields.map(f => columnBuilder(f.type)); for (const batch of records) { const ctx = context(batch); fields.forEach((f, i) => cols[i].add(visit(f.type, ctx))); diff --git a/test/data/empty.arrows b/test/data/empty.arrows new file mode 100644 index 0000000000000000000000000000000000000000..dfa24cdd113ccd669150c3167082e37e135d358f GIT binary patch literal 112 zcmXYoOA3H63`Ad{ik~h#fR}S2xNs+W{rF0o;Z0zYiHNv>O<RC2LJ#7 literal 0 HcmV?d00001 diff --git a/test/table-from-arrays-test.js b/test/table-from-arrays-test.js index 20c0834..83b1192 100644 --- a/test/table-from-arrays-test.js +++ b/test/table-from-arrays-test.js @@ -1,5 +1,5 @@ import assert from 'node:assert'; -import { float64, int8, int32, bool, dictionary, tableFromArrays, utf8, float32 } from '../src/index.js'; +import { float64, int8, int32, bool, dictionary, tableFromArrays, utf8, float32, nullType } from '../src/index.js'; describe('tableFromArrays', () => { const values = { @@ -57,10 +57,29 @@ describe('tableFromArrays', () => { }); it('creates empty table', () => { - const table = tableFromArrays({}); - assert.strictEqual(table.numRows, 0); - assert.strictEqual(table.numCols, 0); - assert.deepStrictEqual(table.schema.fields, []); + const withoutCols = tableFromArrays({}); + assert.strictEqual(withoutCols.numRows, 0); + assert.strictEqual(withoutCols.numCols, 0); + assert.deepStrictEqual(withoutCols.schema.fields, []); + + const withCols = tableFromArrays({ foo: [], bar: [] }); + assert.strictEqual(withCols.numRows, 0); + assert.strictEqual(withCols.numCols, 2); + assert.deepStrictEqual( + withCols.schema.fields.map(f => f.type), + [ nullType(), nullType() ] + ); + + const withTypes = tableFromArrays( + { foo: [], bar: [] }, + { types: { foo: int32(), bar: float32() }} + ); + assert.strictEqual(withTypes.numRows, 0); + assert.strictEqual(withTypes.numCols, 2); + assert.deepStrictEqual( + withTypes.schema.fields.map(f => f.type), + [ int32(), float32() ] + ); }); it('throws when array lengths differ', () => { diff --git a/test/table-from-ipc-test.js b/test/table-from-ipc-test.js index 9dc3c8a..257c944 100644 --- a/test/table-from-ipc-test.js +++ b/test/table-from-ipc-test.js @@ -149,11 +149,14 @@ describe('tableFromIPC', () => { }); it('decodes empty data', async () => { - for (const { bytes } of empty()) { + const data = await empty(); + for (const { bytes } of data) { const table = tableFromIPC(bytes); + table.schema.fields.map((f, i) => { + assert.deepStrictEqual(table.getChildAt(i).type, f.type); + }); assert.strictEqual(table.numRows, 0); - assert.strictEqual(table.numCols, 0); - assert.deepStrictEqual(table.toColumns(), {}); + assert.strictEqual(table.numCols, table.schema.fields.length); assert.deepStrictEqual(table.toArray(), []); assert.deepStrictEqual([...table], []); } diff --git a/test/util/data.js b/test/util/data.js index c3d729c..49f2c2b 100644 --- a/test/util/data.js +++ b/test/util/data.js @@ -299,10 +299,17 @@ export async function utf8View() { // For empty result sets, DuckDB node only returns a zero byte // Other variants may include a schema message -export function empty() { - return [{ - values: [], - bytes: Uint8Array.of(0, 0, 0, 0), - nullCount: 0 - }]; +export async function empty() { + return [ + { + values: [], + bytes: Uint8Array.of(0, 0, 0, 0), + nullCount: 0 + }, + { + values: [], + bytes: new Uint8Array(await readFile(`test/data/empty.arrows`)), + nullCount: 0 + } + ]; }