diff --git a/docs/api/data-types.md b/docs/api/data-types.md
index 00c99fc..762f988 100644
--- a/docs/api/data-types.md
+++ b/docs/api/data-types.md
@@ -94,6 +94,19 @@ Create a Dictionary data type instance. A dictionary type consists of a dictiona
* *ordered* (`boolean`): Indicates if dictionary values are ordered (default `false`).
* *id* (`number`): Optional dictionary id. The default value (-1) indicates that the dictionary applies to a single column only. Provide an explicit id in order to reuse a dictionary across columns when building, in which case different dictionaries *must* have different unique ids. All dictionary ids are later resolved (possibly to new values) upon IPC encoding.
+```js
+import { dictionary, int16, utf8 } from '@uwdata/flechette';
+// dictionary type with string values and int16 indices
+// {
+// typeId: -1,
+// id: -1,
+// dictionary: { typeId: 5, ... },
+// indices: { typeId: 2, bitWidth: 16, signed: true, ... }
+// ordered: false
+// }
+dictionary(utf8(), int16())
+```
+
### Null
#
@@ -101,6 +114,12 @@ Create a Dictionary data type instance. A dictionary type consists of a dictiona
Create a Null data type instance. Null data requires no storage and all extracted values are `null`.
+```js
+import { nullType } from '@uwdata/flechette';
+// { typeId: 1 }
+nullType()
+```
+
### Int
#
@@ -111,6 +130,12 @@ Create an Int data type instance. Integer values are stored within typed arrays
* *bitWidth* (`number`): The integer bit width, must be `8`, `16`, `32` (default), or `64`.
* *signed* (`boolean`): Flag for signed or unsigned integers (default `true`).
+```js
+import { int } from '@uwdata/flechette';
+// { typeId: 2, bitWidth: 32, signed: true, ... }
+int()
+```
+
#
int8()
@@ -160,6 +185,12 @@ Create a Float data type instance for floating point numbers. Floating point val
* *precision* (`number`): The floating point precision, one of `Precision.HALF` (16-bit), `Precision.SINGLE` (32-bit) or `Precision.DOUBLE` (64-bit, default).
+```js
+import { float } from '@uwdata/flechette';
+// { typeId: 3, precision: 2, ... }
+float()
+```
+
#
float16()
@@ -182,6 +213,12 @@ Create a Float data type instance for 64-bit (double precision) floating point n
Create a Binary data type instance for variably-sized opaque binary data with 32-bit offsets. Binary values are stored in a `Uint8Array` using a 32-bit offset array and extracted to JavaScript `Uint8Array` subarray values.
+```js
+import { binary } from '@uwdata/flechette';
+// { typeId: 4 }
+binary()
+```
+
### Utf8
#
@@ -189,6 +226,12 @@ Create a Binary data type instance for variably-sized opaque binary data with 32
Create a Utf8 data type instance for Unicode string data of variable length with 32-bit offsets. [UTF-8](https://en.wikipedia.org/wiki/UTF-8) code points are stored as binary data and extracted to JavaScript `string` values using [`TextDecoder`](https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder). Due to decoding overhead, repeated access to string data can be costly. If making multiple passes over Utf8 data, we recommended converting the string upfront (e.g., via [`Column.toArray`](column#toArray)) and accessing the result.
+```js
+import { utf8 } from '@uwdata/flechette';
+// { typeId: 5 }
+utf8()
+```
+
### Bool
#
@@ -196,6 +239,12 @@ Create a Utf8 data type instance for Unicode string data of variable length with
Create a Bool data type instance for boolean data. Bool values are stored compactly in `Uint8Array` bitmaps with eight values per byte, and extracted to JavaScript `boolean` values.
+```js
+import { bool } from '@uwdata/flechette';
+// { typeId: 6 }
+bool()
+```
+
### Decimal
#
@@ -209,6 +258,13 @@ By default, Flechette converts decimals to 64-bit floating point numbers upon ex
* *scale* (`number`): The number of fractional digits, beyond the decimal point.
* *bitWidth* (`number`): The decimal bit width, one of `128` (default) or `256`.
+```js
+import { utf8 } from '@uwdata/flechette';
+// decimal with 18 total digits, including 3 fractional digits
+// { typeId: 7, precision: 18, scale: 3, bitWidth: 128, ... }
+decimal(18, 3)
+```
+
### Date
#
@@ -220,16 +276,34 @@ By default, extracted date values are converted to JavaScript `number` values re
* *unit* (`number`): The date unit, one of `DateUnit.DAY` or `DateUnit.MILLISECOND`.
+```js
+import { DateUnit, date } from '@uwdata/flechette';
+// { typeId: 8, unit: 0, ... }
+date(DateUnit.DAY)
+```
+
#
dateDay()
Create a Date data type instance with units of `DateUnit.DAY`.
+```js
+import { dateDay } from '@uwdata/flechette';
+// { typeId: 8, unit: 0, ... }
+dateDay()
+```
+
#
dateMillisecond()
Create a Date data type instance with units of `DateUnit.MILLISECOND`.
+```js
+import { dateMillisecond } from '@uwdata/flechette';
+// { typeId: 8, unit: 1, ... }
+dateMillisecond()
+```
+
### Time
#
@@ -244,26 +318,58 @@ Time values are stored as integers in either an `Int32Array` (*bitWidth* = 32) o
* *unit* (`number`): The time unit, one of `TimeUnit.SECOND`, `TimeUnit.MILLISECOND` (default), `TimeUnit.MICROSECOND`, or `TimeUnit.NANOSECOND`.
* *bitWidth (`number`): The time bit width, one of `32` (for seconds and milliseconds) or `64` (for microseconds and nanoseconds).
+```js
+import { TimeUnit, time } from '@uwdata/flechette';
+// { typeId: 9, unit: 1, bitWidth: 32, ... }
+time()
+// { typeId: 9, unit: 2, bitWidth: 64, ... }
+time(TimeUnit.MICROSECONDS, 64)
+```
+
#
timeSecond()
Create a Time data type instance with units of `TimeUnit.SECOND`.
+```js
+import { timeSecond } from '@uwdata/flechette';
+// { typeId: 9, unit: 0, bitWidth: 32, ... }
+timeSecond()
+```
+
#
timeMillisecond()
Create a Time data type instance with units of `TimeUnit.MILLISECOND`.
+```js
+import { timeMillisecond } from '@uwdata/flechette';
+// { typeId: 9, unit: 1, bitWidth: 32, ... }
+timeMillisecond()
+```
+
#
timeMicrosecond()
Create a Time data type instance with units of `TimeUnit.MICROSECOND`.
+```js
+import { timeMicrosecond } from '@uwdata/flechette';
+// { typeId: 9, unit: 2, bitWidth: 64, ... }
+timeMicrosecond()
+```
+
#
timeNanosecond()
Create a Time data type instance with units of `TimeUnit.NANOSECOND`.
+```js
+import { timeNanosecond } from '@uwdata/flechette';
+// { typeId: 9, unit: 3, bitWidth: 64, ... }
+timeNanosecond()
+```
+
### Timestamp
#
@@ -274,7 +380,15 @@ Create a Timestamp data type instance. Timestamp values are 64-bit signed intege
Timestamp values are stored in a `BigInt64Array` and converted to millisecond-based JavaScript `number` values (potentially with fractional digits) upon extraction. An error is raised if a value exceeds either `Number.MIN_SAFE_INTEGER` or `Number.MAX_SAFE_INTEGER`. Pass the `useDate` extraction option (e.g., to [`tableFromIPC`](/flechette/api/#tableFromIPC) or [`tableFromArrays`](/flechette/api/#tableFromArrays)) to instead extract timestamp values as JavaScript `Date` objects.
* *unit* (`number`): The time unit, one of `TimeUnit.SECOND`, `TimeUnit.MILLISECOND` (default), `TimeUnit.MICROSECOND`, or `TimeUnit.NANOSECOND`.
-* *timezone* (`string`): An optional string for the name of a timezone. If provided, the value should either be a string as used in the Olson timezone database (the "tz database" or "tzdata"), such as "America/New_York", or an absolute timezone offset of the form "+XX:XX" or "-XX:XX", such as "+07:30". Whether a timezone string is present indicates different semantics about the data.
+* *timezone* (`string`): An optional string for the name of a timezone. If provided, the value should either be a string as used in the Olson timezone database (the "tz database" or "tzdata"), such as "America/New_York", or an absolute timezone offset of the form "+XX:XX" or "-XX:XX", such as "+07:30". Whether a timezone string is present indicates different semantics about the data. That said, Flechette does not process the timezone information.
+
+```js
+import { timestamp } from '@uwdata/flechette';
+// { typeId: 10, unit: 1, timezone: null, ... }
+timestamp()
+// { typeId: 10, unit: 2, timezone: 'Europe/Berlin', ... }
+timestamp(TimeUnit.MICROSECOND, 'Europe/Berlin')
+```
### Interval
@@ -291,6 +405,14 @@ Create an Interval data type instance. Values represent calendar intervals store
* *unit* (`number`): The interval unit. One of `IntervalUnit.YEAR_MONTH`, `IntervalUnit.DAY_TIME`, or `IntervalUnit.MONTH_DAY_NANO` (default).
+```js
+import { interval } from '@uwdata/flechette';
+// { typeId: 11, unit: 0, ... }
+interval(IntervalUnit.YEAR_MONTH)
+// { typeId: 11, unit: 2, ... }
+interval(IntervalUnit.MONTH_DAY_NANO)
+```
+
### List
#
@@ -300,6 +422,20 @@ Create a List type instance, representing variably-sized lists (arrays) with 32-
* *child* (`DataType | Field`): The child (list item) field or data type.
+```js
+import { int32, list } from '@uwdata/flechette';
+// {
+// typeId: 12,
+// children: [{
+// name: '',
+// type: type: { typeId: 2, bitWidth: 32, signed: true, ... },
+// ...
+// }],
+// ...
+// }
+list(int32())
+```
+
### Struct
#
@@ -311,16 +447,20 @@ By default, structs are fully extracted to standard JavaScript objects. Pass the
* *children* (`Field[] | object`): An array of property fields, or an object mapping property names to data types. If an object, the instantiated fields are assumed to be nullable and have no metadata.
-*Examples*
```js
-import { bool, float32, int16, struct } from '@uwdata/flechette';
+import { bool, field, float32, int16, struct } from '@uwdata/flechette';
// using an object with property names and types
+// {
+// typeId: 13,
+// children: [
+// { name: 'foo', type: { typeId: 2, bitWidth: 16, ... }, ... },
+// { name: 'bar', type: { typeId: 6 }, ... },
+// { name: 'baz', type: { typeId: 3, precision: 1, ... }, ... }
+// ]
+// }
struct({ foo: int16(), bar: bool(), baz: float32() })
-```
-```js
-import { bool, field, float32, int16, struct } from '@uwdata/flechette';
// using an array of Field instances
struct([
field('foo', int16()),
@@ -345,6 +485,27 @@ Extracted JavaScript values depend on the child types.
* *typeIds* (`number[]`): Children type ids, in the same order as the children types. Type ids provide a level of indirection over children types. If not provided, the children indices are used as the type ids.
* *typeIdForValue* (`(value: any, index: number) => number`): A function that takes an arbitrary value and a row index and returns a correponding union type id. This function is required to build union-typed data with [`tableFromArrays`](/flechette/api/#tableFromArrays) or [`columnFromArray`](/flechette/api/#columnFromArray).
+```js
+import { float64, utf8, union } from '@uwdata/flechette';
+// {
+// typeId: 14,
+// mode: 1,
+// typeIds: [ 0, 1 ],
+// typeMap: { '0': 0, '1': 1 },
+// children: [
+// { name: '_0', type: { typeId: 3, precision: 2, ... }, ... },
+// { name: '_1', type: { typeId: 5 }, ... }
+// ],
+// typeIdForValue: <>
+// }
+union(
+ UnionMode.Dense,
+ [float64(), utf8()],
+ [0, 1],
+ v => typeof v === 'string' : 1 : 0
+)
+```
+
### FixedSizeBinary
#
@@ -354,6 +515,12 @@ Create a FixedSizeBinary data type instance for opaque binary data where each en
* *stride* (`number`): The fixed size in bytes.
+```js
+import { fixedSizeBinary } from '@uwdata/flechette';
+// { typeId: 15, stride: 128 }
+fixedSizeBinary(128)
+```
+
### FixedSizeList
#
@@ -364,6 +531,16 @@ Create a FixedSizeList type instance for list (array) data where every list has
* *child* (`DataType | Field`): The child (list item) field or data type.
* *stride* (`number`): The fixed list size.
+```js
+import { fixedSizeList, float32 } from '@uwdata/flechette';
+// {
+// typeId: 16,
+// stride: 8,
+// children: [ { name: '', type: { typeId: 3, precision: 1, ... }, ... } ]
+// }
+fixedSizeList(float32(), 8)
+```
+
### Map
#
@@ -377,6 +554,26 @@ By default, map data is extracted to arrays of `[key, value]` pairs, in the styl
* *valueField* (`DataType | Field`): The map value field or data type.
* *keysSorted* (`boolean`): Flag indicating if the map keys are sorted (default `false`).
+```js
+import { int64, map, utf8 } from '@uwdata/flechette';
+// {
+// typeId: 17,
+// keysSorted: false,
+// children: [{
+// name: 'entries',
+// type: {
+// typeId: 13,
+// children: [
+// { name: 'key', type: { typeId: 5 }, ... },
+// { name: 'value', type: { typeId: 2, bitWidth: 64, ... }, ... }
+// ]
+// }, ...
+// }
+// ]}, ...
+// }
+map(utf8(), int64())
+```
+
### Duration
#
@@ -388,6 +585,12 @@ Duration values are stored as integers in a `BigInt64Array`. By default, duratio
* *unit* (`number`): The duration time unit, one of `TimeUnit.SECOND`, `TimeUnit.MILLISECOND` (default), `TimeUnit.MICROSECOND`, or `TimeUnit.NANOSECOND`.
+```js
+import { duration } from '@uwdata/flechette';
+// { typeId: 18, unit: 1, ... }
+duration()
+```
+
### LargeBinary
#
@@ -395,6 +598,12 @@ Duration values are stored as integers in a `BigInt64Array`. By default, duratio
Create a LargeBinary data type instance for variably-sized opaque binary data with 64-bit offsets, allowing representation of extremely large data values. Large binary values are stored in a `Uint8Array`, indexed using a 64-bit offset array and extracted to JavaScript `Uint8Array` subarray values.
+```js
+import { largeBinary } from '@uwdata/flechette';
+// { typeId: 19, ... }
+largeBinary()
+```
+
### LargeUtf8
#
@@ -402,6 +611,12 @@ Create a LargeBinary data type instance for variably-sized opaque binary data wi
Create a LargeUtf8 data type instance for Unicode string data of variable length with 64-bit offsets, allowing representation of extremely large data values. [UTF-8](https://en.wikipedia.org/wiki/UTF-8) code points are stored as binary data and extracted to JavaScript `string` values using [`TextDecoder`](https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder). Due to decoding overhead, repeated access to string data can be costly. If making multiple passes over Utf8 data, we recommended converting the string upfront (e.g., via [`Column.toArray`](column#toArray)) and accessing the result.
+```js
+import { largeUtf8 } from '@uwdata/flechette';
+// { typeId: 20, ... }
+largeUtf8()
+```
+
### LargeList
#
@@ -411,6 +626,12 @@ Create a LargeList type instance, representing variably-sized lists (arrays) wit
* *child* (`DataType | Field`): The child (list item) field or data type.
+```js
+import { largeList, utf8 } from '@uwdata/flechette';
+// { typeId: 21, children: [ { name: '', type: { typeId: 5 }, ... } ], ... }
+largeList(utf8())
+```
+
### RunEndEncoded
#
@@ -423,12 +644,17 @@ To extract values by index, binary search is performed over the run_ends to loca
* *runsField* (`DataType | Field`): The run-ends field or data type.
* *valuesField* (`DataType | Field`): The values field or data type.
-*Examples*
-
```js
import { int32, runEndEncoded, utf8 } from '@uwdata/flechette';
// 32-bit integer run ends and utf8 string values
-const type = runEndEncoded(int32(), utf8());
+// {
+// typeId: 22,
+// children: [
+// { name: 'run_ends', type: { typeId: 2, bitWidth: 32, ... }, ... },
+// { name: 'values', type: { typeId: 5 }, ... }
+// ]
+// }
+runEndEncoded(int32(), utf8())
```
### BinaryView
@@ -440,6 +666,12 @@ Create a BinaryView type instance. BinaryView data is logically the same as the
Flechette can encode and decode BinaryView data, extracting `Uint8Array` values. However, Flechette does not currently support building BinaryView columns from JavaScript values.
+```js
+import { binaryView } from '@uwdata/flechette';
+// { typeId: 23 }
+binaryView()
+```
+
### Utf8View
#
@@ -449,6 +681,12 @@ Create a Utf8View type instance. Utf8View data is logically the same as the [Utf
Flechette can encode and decode Utf8View data, extracting `string` values. However, Flechette does not currently support building Utf8View columns from JavaScript values.
+```js
+import { utf8View } from '@uwdata/flechette';
+// { typeId: 24 }
+utf8View()
+```
+
### ListView
#
@@ -460,6 +698,15 @@ ListView data are extracted to either `Array` or `TypedArray` instances, dependi
* *child* (`DataType | Field`): The child (list item) field or data type.
+```js
+import { float16, listView } from '@uwdata/flechette';
+// {
+// typeId: 25,
+// children: [ { name: 'value', type: { typeId: 3, ... }, ... } ]
+// }
+listView(float16())
+```
+
### LargeListView
#
@@ -470,3 +717,12 @@ Create a LargeListView type instance, representing variably-sized lists (arrays)
LargeListView data are extracted to either `Array` or `TypedArray` instances, depending on the child type. Flechette can encode and decode LargeListView data; however, Flechette does not currently support building LargeListView columns from JavaScript values.
* *child* (`DataType | Field`): The child (list item) field or data type.
+
+```js
+import { float16, largeListView } from '@uwdata/flechette';
+// {
+// typeId: 26,
+// children: [ { name: 'value', type: { typeId: 3, ... }, ... } ]
+// }
+largeListView(float16())
+```
diff --git a/docs/api/index.md b/docs/api/index.md
index 1ca5c2e..ba6ff9a 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -26,8 +26,6 @@ Decode [Apache Arrow IPC data](https://arrow.apache.org/docs/format/Columnar.htm
* *useMap* (`boolean`): If true, extract Arrow 'Map' values as JavaScript `Map` instances Otherwise, return an array of [key, value] pairs compatible with both `Map` and `Object.fromEntries` (default).
* *useProxy* (`boolean`): If true, extract Arrow 'Struct' values and table row objects using zero-copy proxy objects that extract data from underlying Arrow batches. The proxy objects can improve performance and reduce memory usage, but do not support property enumeration (`Object.keys`, `Object.values`, `Object.entries`) or spreading (`{ ...object }`).
-*Examples*
-
```js
import { tableFromIPC } from '@uwdata/flechette';
const url = 'https://vega.github.io/vega-datasets/data/flights-200k.arrow';
@@ -44,8 +42,6 @@ Encode an Arrow table into Arrow IPC binary format and return the result as a `U
* *options* (`object`): Encoding options object.
* *format* (`string`): Arrow `'stream'` (the default) or `'file'` format.
-*Examples*
-
```js
import { tableToIPC } from '@uwdata/flechette';
const bytes = tableFromIPC(table, { format: 'stream' });
@@ -62,8 +58,6 @@ Create a new table from a set of named arrays. Data types for the resulting Arro
* *maxBatchRows* (`number`): The maximum number of rows to include in a single record batch. If the array lengths exceed this number, the resulting table will consist of multiple record batches.
* In addition, all [tableFromIPC](#tableFromIPC) extraction options are supported.
-*Examples*
-
```js
import { tableFromArrays } from '@uwdata/flechette';
@@ -108,27 +102,17 @@ Create a new column from a provided data array. The data types for the column ca
* *maxBatchRows* (`number`): The maximum number of rows to include in a single record batch. If the array lengths exceed this number, the resulting table will consist of multiple record batches.
* In addition, all [tableFromIPC](#tableFromIPC) extraction options are supported.
-*Examples*
-
```js
-import { columnFromArray } from '@uwdata/flechette';
+import { columnFromArray, float32, int64 } from '@uwdata/flechette';
// create column with inferred type (here, float64)
-const col = columnFromArray([1.1, 2.2, 3.3, 4.4, 5.5]);
-```
-
-```js
-import { columnFromArray, float32 } from '@uwdata/flechette';
+columnFromArray([1.1, 2.2, 3.3, 4.4, 5.5]);
// create column with specified type
-const col = columnFromArray([1.1, 2.2, 3.3, 4.4, 5.5], float32());
-```
-
-```js
-import { columnFromArray, int64 } from '@uwdata/flechette';
+columnFromArray([1.1, 2.2, 3.3, 4.4, 5.5], float32());
// create column with specified type and options
-const col = columnFromArray(
+columnFromArray(
[1n, 32n, 2n << 34n], int64(),
{ maxBatchRows: 1000, useBigInt: true }
);
@@ -142,8 +126,6 @@ Create a new table from a collection of columns. This method is useful for creat
* *data* (`object | array`): The input columns as an object with name keys, or an array of [name, column] pairs.
* *useProxy* (`boolean`): Flag indicating if row proxy objects should be used to represent table rows (default `false`). Typically this should match the value of the `useProxy` extraction option used for column generation.
-*Examples*
-
```js
import { columnFromArray, tableFromColumns } from '@uwdata/flechette';
diff --git a/src/data-types.js b/src/data-types.js
index 1653fb7..6950d94 100644
--- a/src/data-types.js
+++ b/src/data-types.js
@@ -23,8 +23,8 @@ export const invalidDataType = (typeId) =>
*/
export const field = (name, type, nullable = true, metadata = null) => ({
name,
- nullable,
type,
+ nullable,
metadata
});
@@ -96,10 +96,10 @@ const basicType = (typeId) => ({ typeId });
*/
export const dictionary = (type, indexType, ordered = false, id = -1) => ({
typeId: Type.Dictionary,
+ id,
dictionary: type,
indices: indexType || int32(),
- ordered,
- id
+ ordered
});
/**