Skip to content

Commit

Permalink
fix: Ensure type is present for empty columns.
Browse files Browse the repository at this point in the history
  • Loading branch information
jheer committed Sep 16, 2024
1 parent de42ac1 commit 96086e7
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 23 deletions.
3 changes: 2 additions & 1 deletion docs/api/column.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ A data column. A column provides a view over one or more value batches, each cor
* [Symbol.iterator](#iterator)

<hr/><a id="constructor" href="#constructor">#</a>
Column.<b>constructor</b>(<i>data</i>)
Column.<b>constructor</b>(<i>data</i>[, <i>type</i>])

Create a new column with the given data batches.

* *data* (`Batch[]`): The column data batches.
* *type* (`DataType`): The column [data type](data-types). If not specified, the type is extracted from the data batches. This argument is only needed to ensure correct types for "empty" columns without any data.

<hr/><a id="type" href="#type">#</a>
Column.<b>type</b>
Expand Down
4 changes: 2 additions & 2 deletions src/build/column-from-values.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export function columnFromValues(length, visit, type, options, dicts) {

// if null type, generate batches and exit early
if (type.typeId === Type.Null) {
return new Column(nullBatches(type, length, limit));
return new Column(nullBatches(type, length, limit), type);
}

const ctx = builderContext(opt, dicts);
Expand All @@ -46,7 +46,7 @@ export function columnFromValues(length, visit, type, options, dicts) {
// resolve dictionaries
ctx.finish();

return new Column(data);
return new Column(data, type);
}

/**
Expand Down
10 changes: 6 additions & 4 deletions src/column.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import { isDirectBatch } from './batch.js';
/**
* Build up a column from batches.
*/
export function columnBuilder() {
export function columnBuilder(type) {
let data = [];
return {
add(batch) { data.push(batch); return this; },
clear: () => data = [],
done: () => new Column(data)
done: () => new Column(data, type)
};
}

Expand All @@ -25,14 +25,16 @@ export class Column {
/**
* Create a new column instance.
* @param {import('./batch.js').Batch<T>[]} data The value batches.
* @param {import('./types.js').DataType} [type] The column data type.
* If not specified, the type is extracted from the batches.
*/
constructor(data) {
constructor(data, type = data[0]?.type) {
/**
* The column data type.
* @type {import('./types.js').DataType}
* @readonly
*/
this.type = data[0].type;
this.type = type;
/**
* The column length.
* @type {number}
Expand Down
4 changes: 2 additions & 2 deletions src/decode/table-from-ipc.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export function createTable(data, options = {}) {
if (isDelta) {
throw new Error('Delta update can not be first dictionary batch.');
}
dicts.set(id, columnBuilder().add(batch));
dicts.set(id, columnBuilder(type).add(batch));
} else {
const dict = dicts.get(id);
if (!isDelta) dict.clear();
Expand All @@ -70,7 +70,7 @@ export function createTable(data, options = {}) {
dicts.forEach((value, key) => dictionaryMap.set(key, value.done()));

// decode column fields
const cols = fields.map(() => columnBuilder());
const cols = fields.map(f => columnBuilder(f.type));
for (const batch of records) {
const ctx = context(batch);
fields.forEach((f, i) => cols[i].add(visit(f.type, ctx)));
Expand Down
Binary file added test/data/empty.arrows
Binary file not shown.
29 changes: 24 additions & 5 deletions test/table-from-arrays-test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import assert from 'node:assert';
import { float64, int8, int32, bool, dictionary, tableFromArrays, utf8, float32 } from '../src/index.js';
import { float64, int8, int32, bool, dictionary, tableFromArrays, utf8, float32, nullType } from '../src/index.js';

describe('tableFromArrays', () => {
const values = {
Expand Down Expand Up @@ -57,10 +57,29 @@ describe('tableFromArrays', () => {
});

it('creates empty table', () => {
const table = tableFromArrays({});
assert.strictEqual(table.numRows, 0);
assert.strictEqual(table.numCols, 0);
assert.deepStrictEqual(table.schema.fields, []);
const withoutCols = tableFromArrays({});
assert.strictEqual(withoutCols.numRows, 0);
assert.strictEqual(withoutCols.numCols, 0);
assert.deepStrictEqual(withoutCols.schema.fields, []);

const withCols = tableFromArrays({ foo: [], bar: [] });
assert.strictEqual(withCols.numRows, 0);
assert.strictEqual(withCols.numCols, 2);
assert.deepStrictEqual(
withCols.schema.fields.map(f => f.type),
[ nullType(), nullType() ]
);

const withTypes = tableFromArrays(
{ foo: [], bar: [] },
{ types: { foo: int32(), bar: float32() }}
);
assert.strictEqual(withTypes.numRows, 0);
assert.strictEqual(withTypes.numCols, 2);
assert.deepStrictEqual(
withTypes.schema.fields.map(f => f.type),
[ int32(), float32() ]
);
});

it('throws when array lengths differ', () => {
Expand Down
9 changes: 6 additions & 3 deletions test/table-from-ipc-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,14 @@ describe('tableFromIPC', () => {
});

it('decodes empty data', async () => {
for (const { bytes } of empty()) {
const data = await empty();
for (const { bytes } of data) {
const table = tableFromIPC(bytes);
table.schema.fields.map((f, i) => {
assert.deepStrictEqual(table.getChildAt(i).type, f.type);
});
assert.strictEqual(table.numRows, 0);
assert.strictEqual(table.numCols, 0);
assert.deepStrictEqual(table.toColumns(), {});
assert.strictEqual(table.numCols, table.schema.fields.length);
assert.deepStrictEqual(table.toArray(), []);
assert.deepStrictEqual([...table], []);
}
Expand Down
19 changes: 13 additions & 6 deletions test/util/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,17 @@ export async function utf8View() {

// For empty result sets, DuckDB node only returns a zero byte
// Other variants may include a schema message
export function empty() {
return [{
values: [],
bytes: Uint8Array.of(0, 0, 0, 0),
nullCount: 0
}];
export async function empty() {
return [
{
values: [],
bytes: Uint8Array.of(0, 0, 0, 0),
nullCount: 0
},
{
values: [],
bytes: new Uint8Array(await readFile(`test/data/empty.arrows`)),
nullCount: 0
}
];
}

0 comments on commit 96086e7

Please sign in to comment.