Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Ensure type is present for empty columns. #21

Merged
merged 1 commit into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/api/column.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ A data column. A column provides a view over one or more value batches, each cor
* [Symbol.iterator](#iterator)

<hr/><a id="constructor" href="#constructor">#</a>
Column.<b>constructor</b>(<i>data</i>)
Column.<b>constructor</b>(<i>data</i>[, <i>type</i>])

Create a new column with the given data batches.

* *data* (`Batch[]`): The column data batches.
* *type* (`DataType`): The column [data type](data-types). If not specified, the type is extracted from the data batches. This argument is only needed to ensure correct types for "empty" columns without any data.

<hr/><a id="type" href="#type">#</a>
Column.<b>type</b>
Expand Down
4 changes: 2 additions & 2 deletions src/build/column-from-values.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export function columnFromValues(length, visit, type, options, dicts) {

// if null type, generate batches and exit early
if (type.typeId === Type.Null) {
return new Column(nullBatches(type, length, limit));
return new Column(nullBatches(type, length, limit), type);
}

const ctx = builderContext(opt, dicts);
Expand All @@ -46,7 +46,7 @@ export function columnFromValues(length, visit, type, options, dicts) {
// resolve dictionaries
ctx.finish();

return new Column(data);
return new Column(data, type);
}

/**
Expand Down
10 changes: 6 additions & 4 deletions src/column.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import { isDirectBatch } from './batch.js';
/**
* Build up a column from batches.
*/
export function columnBuilder() {
export function columnBuilder(type) {
let data = [];
return {
add(batch) { data.push(batch); return this; },
clear: () => data = [],
done: () => new Column(data)
done: () => new Column(data, type)
};
}

Expand All @@ -25,14 +25,16 @@ export class Column {
/**
* Create a new column instance.
* @param {import('./batch.js').Batch<T>[]} data The value batches.
* @param {import('./types.js').DataType} [type] The column data type.
* If not specified, the type is extracted from the batches.
*/
constructor(data) {
constructor(data, type = data[0]?.type) {
/**
* The column data type.
* @type {import('./types.js').DataType}
* @readonly
*/
this.type = data[0].type;
this.type = type;
/**
* The column length.
* @type {number}
Expand Down
4 changes: 2 additions & 2 deletions src/decode/table-from-ipc.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export function createTable(data, options = {}) {
if (isDelta) {
throw new Error('Delta update can not be first dictionary batch.');
}
dicts.set(id, columnBuilder().add(batch));
dicts.set(id, columnBuilder(type).add(batch));
} else {
const dict = dicts.get(id);
if (!isDelta) dict.clear();
Expand All @@ -70,7 +70,7 @@ export function createTable(data, options = {}) {
dicts.forEach((value, key) => dictionaryMap.set(key, value.done()));

// decode column fields
const cols = fields.map(() => columnBuilder());
const cols = fields.map(f => columnBuilder(f.type));
for (const batch of records) {
const ctx = context(batch);
fields.forEach((f, i) => cols[i].add(visit(f.type, ctx)));
Expand Down
Binary file added test/data/empty.arrows
Binary file not shown.
29 changes: 24 additions & 5 deletions test/table-from-arrays-test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import assert from 'node:assert';
import { float64, int8, int32, bool, dictionary, tableFromArrays, utf8, float32 } from '../src/index.js';
import { float64, int8, int32, bool, dictionary, tableFromArrays, utf8, float32, nullType } from '../src/index.js';

describe('tableFromArrays', () => {
const values = {
Expand Down Expand Up @@ -57,10 +57,29 @@ describe('tableFromArrays', () => {
});

it('creates empty table', () => {
const table = tableFromArrays({});
assert.strictEqual(table.numRows, 0);
assert.strictEqual(table.numCols, 0);
assert.deepStrictEqual(table.schema.fields, []);
const withoutCols = tableFromArrays({});
assert.strictEqual(withoutCols.numRows, 0);
assert.strictEqual(withoutCols.numCols, 0);
assert.deepStrictEqual(withoutCols.schema.fields, []);

const withCols = tableFromArrays({ foo: [], bar: [] });
assert.strictEqual(withCols.numRows, 0);
assert.strictEqual(withCols.numCols, 2);
assert.deepStrictEqual(
withCols.schema.fields.map(f => f.type),
[ nullType(), nullType() ]
);

const withTypes = tableFromArrays(
{ foo: [], bar: [] },
{ types: { foo: int32(), bar: float32() }}
);
assert.strictEqual(withTypes.numRows, 0);
assert.strictEqual(withTypes.numCols, 2);
assert.deepStrictEqual(
withTypes.schema.fields.map(f => f.type),
[ int32(), float32() ]
);
});

it('throws when array lengths differ', () => {
Expand Down
9 changes: 6 additions & 3 deletions test/table-from-ipc-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,14 @@ describe('tableFromIPC', () => {
});

it('decodes empty data', async () => {
for (const { bytes } of empty()) {
const data = await empty();
for (const { bytes } of data) {
const table = tableFromIPC(bytes);
table.schema.fields.map((f, i) => {
assert.deepStrictEqual(table.getChildAt(i).type, f.type);
});
assert.strictEqual(table.numRows, 0);
assert.strictEqual(table.numCols, 0);
assert.deepStrictEqual(table.toColumns(), {});
assert.strictEqual(table.numCols, table.schema.fields.length);
assert.deepStrictEqual(table.toArray(), []);
assert.deepStrictEqual([...table], []);
}
Expand Down
19 changes: 13 additions & 6 deletions test/util/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,17 @@ export async function utf8View() {

// For empty result sets, DuckDB node only returns a zero byte
// Other variants may include a schema message
export function empty() {
return [{
values: [],
bytes: Uint8Array.of(0, 0, 0, 0),
nullCount: 0
}];
export async function empty() {
return [
{
values: [],
bytes: Uint8Array.of(0, 0, 0, 0),
nullCount: 0
},
{
values: [],
bytes: new Uint8Array(await readFile(`test/data/empty.arrows`)),
nullCount: 0
}
];
}