@@ -85,14 +85,14 @@ Create a new field instance for use in a schema or type definition. A field repr
 ### Dictionary
 <hr/><a id="dictionary" href="#dictionary">#</a>
-<b>dictionary</b>(<i>type</i>[, <i>indexType</i>, <i>id</i>, <i>ordered</i>])
+<b>dictionary</b>(<i>type</i>[, <i>indexType</i>, <i>ordered</i>, <i>id</i>])
-Create a Dictionary data type instance. A dictionary type consists of a dictionary of values (which may be of any type) and corresponding integer indices that reference those values. If values are repeated, a dictionary encoding can provide substantial space savings. In the IPC format, dictionary indices reside alongside other columns in a record batch, while dictionary values are written to special dictionary batches, linked by a unique dictionary *id*. Internally Flechette extracts dictionary values upfront; while this incurs some initial overhead, it enables fast subsequent lookups.
+Create a Dictionary data type instance. A dictionary type consists of a dictionary of values (which may be of any type) and corresponding integer indices that reference those values. If values are repeated, a dictionary encoding can provide substantial space savings. In the IPC format, dictionary indices reside alongside other columns in a record batch, while dictionary values are written to special dictionary batches, linked by a unique dictionary *id* assigned at encoding time. Internally Flechette extracts dictionary values immediately upon decoding; while this incurs some initial overhead, it enables fast subsequent lookups.
 * *type* (`DataType`): The data type of dictionary values.
 * *indexType* (`DataType`): The data type of dictionary indices. Must be an integer type (default [`int32`](#int32)).
-* *id* (`number`): The dictionary id, should be unique in a table. Defaults to `-1`, but is set to a proper id if the type is passed through [`tableFromArrays`](/flechette/api/#tableFromArrays).
 * *ordered* (`boolean`): Indicates if dictionary values are ordered (default `false`).
+* *id* (`number`): Optional dictionary id. The default value (-1) indicates that the dictionary applies to a single column only. Provide an explicit id in order to reuse a dictionary across columns when building, in which case different dictionaries *must* have different unique ids. All dictionary ids are later resolved (possibly to new values) upon IPC encoding.
 ### Null
@@ -137,7 +137,7 @@ const col = columnFromArray(
 <hr/><a id="tableFromColumns" href="#tableFromColumns">#</a>
 <b>tableFromColumns</b>(<i>columns</i>[, <i>useProxy</i>])
-Create a new table from a collection of columns. This method is useful for creating new tables using one or more pre-existing column instances. Otherwise, [`tableFromArrays`](#tableFromArrays) should be preferred. Input columns are assumed to have the same record batch sizes and non-conflicting dictionary ids.
+Create a new table from a collection of columns. This method is useful for creating new tables using one or more pre-existing column instances. Otherwise, [`tableFromArrays`](#tableFromArrays) should be preferred. Input columns are assumed to have the same record batch sizes.
 * *data* (`object | array`): The input columns as an object with name keys, or an array of [name, column] pairs.
 * *useProxy* (`boolean`): Flag indicating if row proxy objects should be used to represent table rows (default `false`). Typically this should match the value of the `useProxy` extraction option used for column generation.
@@ -6,7 +6,7 @@ import { toBigInt, toDateDay, toFloat16, toTimestamp } from '../util/numbers.js'
 import { BinaryBuilder } from './builders/binary.js';
 import { BoolBuilder } from './builders/bool.js';
 import { DecimalBuilder } from './builders/decimal.js';
-import { DictionaryBuilder, dictionaryValues } from './builders/dictionary.js';
+import { DictionaryBuilder, dictionaryContext } from './builders/dictionary.js';
 import { FixedSizeBinaryBuilder } from './builders/fixed-size-binary.js';
 import { FixedSizeListBuilder } from './builders/fixed-size-list.js';
 import { IntervalDayTimeBuilder, IntervalMonthDayNanoBuilder } from './builders/interval.js';
@@ -19,36 +19,20 @@ import { Utf8Builder } from './builders/utf8.js';
 import { DirectBuilder, Int64Builder, TransformBuilder } from './builders/values.js';
- * Create a new context object for shared builder state.
+ * Create a context object for shared builder state.
  * @param {import('../types.js').ExtractionOptions} [options]
  *  Batch extraction options.
- * @param {Map<number, ReturnType<dictionaryValues>>} [dictMap]
- *  A map of dictionary ids to value builder helpers.
+* @param {ReturnType<dictionaryContext>} [dictionaries]
+ *  Context object for tracking dictionaries.
-export function builderContext(options, dictMap = new Map) {
-  let dictId = 0;
+export function builderContext(
+  options = {},
+  dictionaries = dictionaryContext()
+) {
   return {
-    batchType(type) {
-      return batchType(type, options);
-    },
-    dictionary(type, id) {
-      let dict;
-      if (id != null) {
-        dict = dictMap.get(id);
-      } else {
-        while (dictMap.has(dictId + 1)) ++dictId;
-        id = dictId;
-      }
-      if (!dict) {
-        dictMap.set(id, dict = dictionaryValues(id, type, this));
-      }
-      return dict;
-    },
-    finish() {
-      for (const dict of dictMap.values()) {
-        dict.finish(options);
-      }
-    }
+    batchType: type => batchType(type, options),
+    dictionary(type) { return dictionaries.get(type, this); },
+    finish: () => dictionaries.finish(options)
@@ -6,21 +6,58 @@ import { ValidityBuilder } from './validity.js';
 import { ValidityBuilder } from './validity.js';
- * Builder helped for creating dictionary values.
- * @param {number} id The dictionary id.
+ * Create a context object for managing dictionary builders.
+ */
+export function dictionaryContext() {
+  const idMap = new Map;
+  const dicts = new Set;
+  return {
+    /**
+     * Get a dictionary values builder for the given dictionary type.
+     * @param {import('../../types.js').DictionaryType} type
+     *  The dictionary type.
+     * @param {*} ctx The builder context.
+     * @returns {ReturnType<dictionaryValues>}
+     */
+    get(type, ctx) {
+      // if a dictionary has a non-negative id, assume it was set
+      // intentionally and track it for potential reuse across columns
+      // otherwise the dictionary is used for a single column only
+      const id = type.id;
+      if (id >= 0 && idMap.has(id)) {
+        return idMap.get(id);
+      } else {
+        const dict = dictionaryValues(type, ctx);
+        if (id >= 0) idMap.set(id, dict);
+        dicts.add(dict);
+        return dict;
+      }
+    },
+    /**
+     * Finish building dictionary values columns and assign them to
+     * their corresponding dictionary batches.
+     * @param {import('../../types.js').ExtractionOptions} options
+     */
+    finish(options) {
+      dicts.forEach(dict => dict.finish(options));
+    }
+  };
+ * Builder helper for creating dictionary values.
  * @param {import('../../types.js').DictionaryType} type
  *  The dictionary data type.
- * @param {*} ctx
- * @returns
+ * @param {ReturnType<import('../builder.js').builderContext>} ctx
+ *  The builder context.
-export function dictionaryValues(id, type, ctx) {
+export function dictionaryValues(type, ctx) {
   const keys = Object.create(null);
   const values = builder(type.dictionary, ctx);
   const batches = [];
   let index = -1;
-  type.id = id;
   return {
@@ -1,10 +1,8 @@
 import { float32Array, float64Array, int16Array, int32Array, int64Array, int8Array, isInt64ArrayType, isTypedArray, uint16Array, uint32Array, uint64Array, uint8Array } from '../util/arrays.js';
-import { DirectBatch, Int64Batch, NullBatch } from '../batch.js';
+import { DirectBatch, Int64Batch } from '../batch.js';
 import { Column } from '../column.js';
 import { float32, float64, int16, int32, int64, int8, uint16, uint32, uint64, uint8 } from '../data-types.js';
-import { inferType } from './infer-type.js';
-import { builder, builderContext } from './builder.js';
-import { Type } from '../constants.js';
+import { columnFromValues } from './column-from-values.js';
  * Create a new column from a provided data array.
@@ -14,25 +12,20 @@ import { Type } from '../constants.js';
  *  If not specified, type inference is attempted.
  * @param {import('../types.js').ColumnBuilderOptions} [options]
  *  Builder options for the generated column.
- * @param {ReturnType<import('./builder.js').builderContext>} [ctx]
+ * @param {ReturnType<import('./builders/dictionary.js').dictionaryContext>} [dicts]
  *  Builder context object, for internal use only.
  * @returns {Column<T>} The generated column.
-export function columnFromArray(data, type, options = {}, ctx) {
-  if (!type) {
-    if (isTypedArray(data)) {
-      return columnFromTypedArray(data, options);
-    } else {
-      type = inferType(data);
-    }
-  }
-  return columnFromValues(data, type, options, ctx);
+export function columnFromArray(data, type, options = {}, dicts) {
+  return !type && isTypedArray(data)
+    ? columnFromTypedArray(data, options)
+    : columnFromValues(data.length, v => data.forEach(v), type, options, dicts);
  * Create a new column from a typed array input.
  * @template T
- * @param {import('../types.js').TypedArray} values
+ * @param {import('../types.js').TypedArray} values The input data.
  * @param {import('../types.js').ColumnBuilderOptions} options
  *  Builder options for the generated column.
  * @returns {Column<T>} The generated column.
@@ -62,52 +55,6 @@ function columnFromTypedArray(values, { maxBatchRows, useBigInt }) {
   return new Column(batches);
- * Build a column by iterating over the provided values array.
- * @template T
- * @param {Array | import('../types.js').TypedArray} values The input data.
- * @param {import('../types.js').DataType} type The column data type.
- * @param {import('../types.js').ColumnBuilderOptions} [options]
- *  Builder options for the generated column.
- * @param {ReturnType<import('./builder.js').builderContext>} [ctx]
- *  Builder context object, for internal use only.
- * @returns {Column<T>} The generated column.
- */
-function columnFromValues(values, type, options, ctx) {
-  const { maxBatchRows, ...opt } = options;
-  const length = values.length;
-  const limit = Math.min(maxBatchRows || Infinity, length);
-  // if null type, generate batches and exit early
-  if (type.typeId === Type.Null) {
-    return new Column(nullBatches(type, length, limit));
-  }
-  const data = [];
-  ctx ??= builderContext(opt);
-  const b = builder(type, ctx).init();
-  const next = b => data.push(b.batch());
-  const numBatches = Math.floor(length / limit);
-  let idx = 0;
-  let row = 0;
-  for (let i = 0; i < numBatches; ++i) {
-    for (row = 0; row < limit; ++row) {
-      b.set(values[idx++], row);
-    }
-    next(b);
-  }
-  for (row = 0; idx < length; ++idx) {
-    b.set(values[idx], row++);
-  }
-  if (row) next(b);
-  // resolve dictionaries
-  ctx.finish();
-  return new Column(data);
  * Return an Arrow data type for a given typed array type.
  * @param {import('../types.js').TypedArrayConstructor} arrayType
@@ -128,22 +75,3 @@ function typeForTypedArray(arrayType) {
     case uint64Array: return uint64();
- * Create null batches with the given batch size limit.
- * @param {import('../types.js').NullType} type The null data type.
- * @param {number} length The total column length.
- * @param {number} limit The maximum batch size.
- * @returns {import('../batch.js').NullBatch[]} The null batches.
- */
-function nullBatches(type, length, limit) {
-  const data = [];
-  const batch = length => new NullBatch({ length, nullCount: length, type });
-  const numBatches = Math.floor(length / limit);
-  for (let i = 0; i < numBatches; ++i) {
-    data.push(batch(limit));
-  }
-  const rem = length % limit;
-  if (rem) data.push(batch(rem));
-  return data;
@@ -0,0 +1,69 @@
+import { NullBatch } from '../batch.js';
+import { Column } from '../column.js';
+import { inferType } from './infer-type.js';
+import { builder, builderContext } from './builder.js';
+import { Type } from '../constants.js';
+ * Create a new column by iterating over provided values.
+ * @template T
+ * @param {number} length The input data length.
+ * @param {(visitor: (value: any) => void) => void} visit
+ *  A function that applies a callback to successive data values.
+ * @param {import('../types.js').DataType} type The data type.
+ * @param {import('../types.js').ColumnBuilderOptions} [options]
+ *  Builder options for the generated column.
+ * @param {ReturnType<
+ *    import('./builders/dictionary.js').dictionaryContext
+ *  >} [dicts] Builder context object, for internal use only.
+ * @returns {Column<T>} The generated column.
+ */
+export function columnFromValues(length, visit, type, options, dicts) {
+  type ??= inferType(visit);
+  const { maxBatchRows, ...opt } = options;
+  const limit = Math.min(maxBatchRows || Infinity, length);
+  // if null type, generate batches and exit early
+  if (type.typeId === Type.Null) {
+    return new Column(nullBatches(type, length, limit));
+  }
+  const ctx = builderContext(opt, dicts);
+  const b = builder(type, ctx).init();
+  const data = [];
+  const next = b => data.push(b.batch());
+  let row = 0;
+  visit(value => {
+    b.set(value, row++);
+    if (row >= limit) {
+      next(b);
+      row = 0;
+    }
+  });
+  if (row) next(b);
+  // resolve dictionaries
+  ctx.finish();
+  return new Column(data);
+ * Create null batches with the given batch size limit.
+ * @param {import('../types.js').NullType} type The null data type.
+ * @param {number} length The total column length.
+ * @param {number} limit The maximum batch size.
+ * @returns {import('../batch.js').NullBatch[]} The null batches.
+ */
+function nullBatches(type, length, limit) {
+  const data = [];
+  const batch = length => new NullBatch({ length, nullCount: length, type });
+  const numBatches = Math.floor(length / limit);
+  for (let i = 0; i < numBatches; ++i) {
+    data.push(batch(limit));
+  }
+  const rem = length % limit;
+  if (rem) data.push(batch(rem));
+  return data;
@@ -3,14 +3,13 @@ import { isArray } from '../util/arrays.js';
  * Infer the data type for a given input array.
- * @param {import('../types.js').ValueArray} data The data array.
+ * @param {(visitor: (value: any) => void) => void} visit
+ *  A function that applies a callback to successive data values.
  * @returns {import('../types.js').DataType} The data type.
-export function inferType(data) {
+export function inferType(visit) {
   const profile = profiler();
-  for (let i = 0; i < data.length; ++i) {
-    profile.add(data[i]);
-  }
+  visit(value => profile.add(value));
   return profile.type();
-import { builderContext } from './builder.js';
+import { dictionaryContext } from './builders/dictionary.js';
 import { columnFromArray } from './column-from-array.js';
 import { tableFromColumns } from './table-from-columns.js';
@@ -13,11 +13,11 @@ import { tableFromColumns } from './table-from-columns.js';
 export function tableFromArrays(data, options = {}) {
   const { types = {}, ...opt } = options;
-  const ctx = builderContext();
+  const dicts = dictionaryContext();
   const entries = Array.isArray(data) ? data : Object.entries(data);
   const columns = entries.map(([name, array]) =>
     /** @type {[string, import('../column.js').Column]} */ (
-    [ name, columnFromArray(array, types[name], opt, ctx)]
+    [ name, columnFromArray(array, types[name], opt, dicts)]
   return tableFromColumns(columns, options.useProxy);
-import { Endianness, Type, Version } from '../constants.js';
+import { Endianness, Version } from '../constants.js';
 import { field } from '../data-types.js';
 import { Table } from '../table.js';
  * Create a new table from a collection of columns. Columns are assumed
- * to have the same record batch sizes and consistent dictionary ids.
+ * to have the same record batch sizes.
  * @param {[string, import('../column.js').Column][]
  *  | Record<string, import('../column.js').Column>} data The columns,
  *  as an object with name keys, or an array of [name, column] pairs.
@@ -14,21 +14,13 @@ import { Table } from '../table.js';
 export function tableFromColumns(data, useProxy) {
   const fields = [];
-  const dictionaryTypes = new Map;
   const entries = Array.isArray(data) ? data : Object.entries(data);
   const length = entries[0]?.[1].length;
   const columns = entries.map(([name, col]) => {
     if (col.length !== length) {
       throw new Error('All columns must have the same length.');
-    const type = col.type;
-    if (type.typeId === Type.Dictionary) {
-      const dict = dictionaryTypes.get(type.id);
-      if (dict && dict !== type.dictionary) {
-        throw new Error('Same id used across different dictionaries.');
-      }
-      dictionaryTypes.set(type.id, type.dictionary);
-    }
     fields.push(field(name, col.type));
     return col;
@@ -37,8 +29,7 @@ export function tableFromColumns(data, useProxy) {
     version: Version.V5,
     endianness: Endianness.Little,
-    metadata: null,
-    dictionaryTypes
+    metadata: null
   return new Table(schema, columns, useProxy);
@@ -85,17 +85,21 @@ const basicType = (typeId) => ({ typeId });
  *  values.
  * @param {import('./types.js').IntType} [indexType] The data type of
  *  dictionary indices. Must be an integer type (default `int32`).
- * @param {number} [id=-1] The dictionary id, should be unique in a table.
  * @param {boolean} [ordered=false] Indicates if dictionary values are
  *  ordered (default `false`).
+ * @param {number} [id=-1] The dictionary id. The default value (-1) indicates
+ *  the dictionary applies to a single column only. Provide an explicit id in
+ *  order to reuse a dictionary across columns when building, in which case
+ *  different dictionaries *must* have different unique ids. All dictionary
+ *  ids are later resolved (possibly to new values) upon IPC encoding.
  * @returns {import('./types.js').DictionaryType}
-export const dictionary = (type, indexType, id = -1, ordered = false) => ({
+export const dictionary = (type, indexType, ordered = false, id = -1) => ({
   typeId: Type.Dictionary,
   dictionary: type,
   indices: indexType || int32(),
-  id,
-  ordered
+  ordered,
+  id
@@ -12,7 +12,6 @@ import { decodeMetadata } from './metadata.js';
  * @returns {import('../types.js').Schema} The schema
 export function decodeSchema(buf, index, version) {
-  const dictionaryTypes = new Map;
   //  4: endianness (int16)
   //  6: fields (vector)
   //  8: metadata (vector)
@@ -21,25 +20,22 @@ export function decodeSchema(buf, index, version) {
   return {
     endianness: /** @type {import('../types.js').Endianness_} */ (get(4, readInt16, 0)),
-    fields: get(6, (buf, off) => decodeSchemaFields(buf, off, dictionaryTypes), []),
-    metadata: get(8, decodeMetadata),
-    dictionaryTypes
+    fields: get(6, decodeSchemaFields, []),
+    metadata: get(8, decodeMetadata)
  * @returns {import('../types.js').Field[] | null}
-function decodeSchemaFields(buf, fieldsOffset, dictionaryTypes) {
-  return readVector(buf, fieldsOffset, 4,
-    (buf, pos) => decodeField(buf, pos, dictionaryTypes)
-  );
+function decodeSchemaFields(buf, fieldsOffset) {
+  return readVector(buf, fieldsOffset, 4, decodeField);
  * @returns {import('../types.js').Field}
-function decodeField(buf, index, dictionaryTypes) {
+function decodeField(buf, index) {
   //  4: name (string)
   //  6: nullable (bool)
   //  8: type id (uint8)
@@ -51,22 +47,12 @@ function decodeField(buf, index, dictionaryTypes) {
   const typeId = get(8, readUint8, Type.NONE);
   const typeOffset = get(10, readOffset, 0);
   const dict = get(12, decodeDictionary);
-  const children = get(14, (buf, off) => decodeFieldChildren(buf, off, dictionaryTypes));
+  const children = get(14, (buf, off) => decodeFieldChildren(buf, off));
-  let type;
+  let type = decodeDataType(buf, typeOffset, typeId, children);
   if (dict) {
-    const { id } = dict;
-    let dictType = dictionaryTypes.get(id);
-    if (!dictType) {
-      // if dictionary encoded and the first time we've seen this id, decode
-      // the type and children fields and add to the dictionary map.
-      dictType = decodeDataType(buf, typeOffset, typeId, children);
-      dictionaryTypes.set(id, dictType);
-    }
-    dict.dictionary = dictType;
+    dict.dictionary = type;
     type = dict;
-  } else {
-    type = decodeDataType(buf, typeOffset, typeId, children);
   return {
@@ -80,10 +66,8 @@ function decodeField(buf, index, dictionaryTypes) {
  * @returns {import('../types.js').Field[] | null}
-function decodeFieldChildren(buf, fieldOffset, dictionaryTypes) {
-  const children = readVector(buf, fieldOffset, 4,
-    (buf, pos) => decodeField(buf, pos, dictionaryTypes)
-  );
+function decodeFieldChildren(buf, fieldOffset) {
+  const children = readVector(buf, fieldOffset, 4, decodeField);
   return children.length ? children : null;
@@ -102,8 +86,8 @@ function decodeDictionary(buf, index) {
   return dictionary(
     null, // data type will be populated by caller
     get(6, decodeInt, int32()), // index type
+    get(8, readBoolean, false), // ordered
     get(4, readInt64, 0), // id
-    get(8, readBoolean, false) // ordered
@@ -37,11 +37,20 @@ export function tableFromIPC(data, options) {
 export function createTable(data, options = {}) {
   const { schema = { fields: [] }, dictionaries, records } = data;
-  const { version, fields, dictionaryTypes } = schema;
+  const { version, fields } = schema;
   const dictionaryMap = new Map;
   const context = contextGenerator(options, version, dictionaryMap);
-  // decode dictionaries
+  // build dictionary type map
+  const dictionaryTypes = new Map;
+  visitSchemaFields(schema, field => {
+    const type = field.type;
+    if (type.typeId === Type.Dictionary) {
+      dictionaryTypes.set(type.id, type.dictionary);
+    }
+  });
+  // decode dictionaries, build dictionary column map
   const dicts = new Map;
   for (const dict of dictionaries) {
     const { id, data, isDelta, body } = dict;
@@ -70,6 +79,21 @@ export function createTable(data, options = {}) {
   return new Table(schema, cols.map(c => c.done()), options.useProxy);
+ * Visit all fields within a schema.
+ * @param {import('../types.js').Schema} schema
+ * @param {(field: import('../types.js').Field) => void} visitor
+ */
+function visitSchemaFields(schema, visitor) {
+  schema.fields.forEach(function visitField(field) {
+    visitor(field);
+    // @ts-ignore
+    field.type.dictionary?.children?.forEach(visitField);
+    // @ts-ignore
+    field.type.children?.forEach(visitField);
+  });
  * Context object generator for field visitation and buffer definition.
@@ -16,14 +16,17 @@ export function tableToIPC(table, options) {
   if (typeof options === 'string') {
     options = { format: options };
-  const schema = table.schema;
   const columns = table.children;
-  const dictionaries = assembleDictionaryBatches(columns);
+  const { dictionaries, idMap } = assembleDictionaryBatches(columns);
   const records = assembleRecordBatches(columns);
+  const schema = assembleSchema(table.schema, idMap);
   const data = { schema, dictionaries, records };
   return encodeIPC(data, options).finish();
+ * Create a new assembly context.
+ */
 function assembleContext() {
   let byteLength = 0;
   const nodes = [];
@@ -74,37 +77,107 @@ function assembleContext() {
- * @param {import('../column.js').Column[]} columns
- * @returns {import('../types.js').DictionaryBatch[]}
+ * Assemble dictionary batches and their unique ids.
+ * @param {import('../column.js').Column[]} columns The table columns.
+ * @returns {{
+ *    dictionaries: import('../types.js').DictionaryBatch[],
+ *    idMap: Map<import('../types.js').DataType, number>
+ *  }}
+ *  The assembled dictionary batches and a map from dictionary column
+ *  instances to dictionary ids.
 function assembleDictionaryBatches(columns) {
   const dictionaries = [];
-  const seen = new Set;
-  for (const col of columns) {
-    const { type } = col;
-    if (type.typeId !== -1) continue;
-    if (seen.has(type.id)) continue;
-    seen.add(type.id);
-    // pass dictionary and deltas as-is
-    // @ts-ignore
-    const dict = col.data[0].dictionary;
-    for (let i = 0; i < dict.data.length; ++i) {
-      dictionaries.push({
-        id: type.id,
-        isDelta: i > 0,
-        data: assembleRecordBatch([dict], i)
-      });
+  const dictMap = new Map;
+  const idMap = new Map;
+  let id = -1;
+  // track dictionaries, key by dictionary column, assign ids
+  const visitor = dictionaryColumn => {
+    if (!dictMap.has(dictionaryColumn)) {
+      dictMap.set(dictionaryColumn, ++id);
+      for (let i = 0; i < dictionaryColumn.data.length; ++i) {
+        dictionaries.push({
+          id,
+          isDelta: i > 0,
+          data: assembleRecordBatch([dictionaryColumn], i)
+        });
+      }
+      idMap.set(dictionaryColumn.type, id);
+    } else {
+      idMap.set(dictionaryColumn.type, dictMap.get(dictionaryColumn));
+  };
+  // recurse through column batches to find dictionaries
+  // it is sufficient to visit the first batch only,
+  // as all batches have the same dictionary column
+  columns.forEach(col => visitDictionaries(col.data[0], visitor));
+  return { dictionaries, idMap };
+ * Traverse column batches to visit dictionary columns.
+ * @param {import('../batch.js').Batch} batch
+ * @param {(column: import('../column.js').Column) => void} visitor
+ */
+function visitDictionaries(batch, visitor) {
+  if (batch?.type.typeId === Type.Dictionary) {
+    // @ts-ignore - batch has type DictionaryBatch
+    const dictionary = batch.dictionary;
+    visitor(dictionary);
+    visitDictionaries(dictionary.data[0], visitor);
+  batch?.children?.forEach(child => visitDictionaries(child, visitor));
+ * Assemble a schema with resolved dictionary ids.
+ * @param {import('../types.js').Schema} schema The schema.
+ * @param {Map<import('../types.js').DataType, number>} idMap A map
+ *  from dictionary value types to dictionary ids.
+ * @returns {import('../types.js').Schema} A new schema with resolved
+ *  dictionary ids. If there are no dictionaries, the input schema is
+ *  returned unchanged.
+ */
+function assembleSchema(schema, idMap) {
+  // early exit if no dictionaries
+  if (!idMap.size) return schema;
-  return dictionaries;
+  const visit = type => {
+    if (type.typeId === Type.Dictionary) {
+      type.id = idMap.get(type.dictionary); // lookup and set id
+      visitDictType(type);
+    }
+    if (type.children) {
+      (type.children = type.children.slice()).forEach(visitFields);
+    }
+  };
+  // visit a field in a field array
+  const visitFields = (field, index, array) => {
+    const type = { ...field.type };
+    array[index] = { ...field, type };
+    visit(type);
+  };
+  // visit a dictionary values type
+  const visitDictType = (parentType) => {
+    const type = { ...parentType.dictionary };
+    parentType.dictionary = type;
+    visit(type);
+  };
+  schema = { ...schema, fields: schema.fields.slice() };
+  schema.fields.forEach(visitFields);
+  return schema;
- * @param {import('../column.js').Column[]} columns
- * @returns {import('../types.js').RecordBatch[]}
+ * Assemble record batches with marshalled buffers.
+ * @param {import('../column.js').Column[]} columns The table columns.
+ * @returns {import('../types.js').RecordBatch[]} The assembled record batches.
 function assembleRecordBatches(columns) {
   return (columns[0]?.data || [])
@@ -112,8 +185,10 @@ function assembleRecordBatches(columns) {
- * @param {import('../column.js').Column[]} columns
- * @returns {import('../types.js').RecordBatch}
+ * Assemble a record batch with marshalled buffers.
+ * @param {import('../column.js').Column[]} columns The table columns.
+ * @param {number} batchIndex The batch index.
+ * @returns {import('../types.js').RecordBatch} The assembled record batch.
 function assembleRecordBatch(columns, batchIndex = 0) {
   const ctx = assembleContext();
@@ -124,10 +199,10 @@ function assembleRecordBatch(columns, batchIndex = 0) {
- * Visit a column batch, assembling buffer information.
- * @param {import('../types.js').DataType} type
- * @param {import('../batch.js').Batch} batch
- * @param {ReturnType<assembleContext>} ctx
+ * Visit a column batch, assembling buffer data.
+ * @param {import('../types.js').DataType} type The data type.
+ * @param {import('../batch.js').Batch} batch The column batch.
+ * @param {ReturnType<assembleContext>} ctx The assembly context.
 function visit(type, batch, ctx) {
   const { typeId } = type;
@@ -40,12 +40,14 @@ export {
 } from './data-types.js';
+export { Batch } from './batch.js';
 export { Column } from './column.js';
 export { Table } from './table.js';
-export { Batch } from './batch.js';
 export { batchType } from './batch-type.js';
 export { tableFromIPC } from './decode/table-from-ipc.js';
 export { tableToIPC } from './encode/table-to-ipc.js';
 export { tableFromArrays } from './build/table-from-arrays.js';
 export { tableFromColumns } from './build/table-from-columns.js';
 export { columnFromArray } from './build/column-from-array.js';
+export { columnFromValues } from './build/column-from-values.js';
+export { dictionaryContext } from './build/builders/dictionary.js';
@@ -105,8 +105,7 @@ export interface Schema {
   version?: Version_;
   endianness?: Endianness_;
   fields: Field[];
-  metadata?: Metadata | null;
-  dictionaryTypes?: Map<number, DataType>;
+  metadata?: Metadata | null
@@ -6,46 +6,50 @@ function matches(actual, expect) {
   assert.deepStrictEqual(actual, expect);
+function infer(values) {
+  return inferType(visitor => values.forEach(visitor));
 describe('inferType', () => {
   it('infers integer types', () => {
-    matches(inferType([1, 2, 3]), int8());
-    matches(inferType([1e3, 2e3, 3e3]), int16());
-    matches(inferType([1e6, 2e6, 3e6]), int32());
-    matches(inferType([1n, 2n, 3n]), int64());
+    matches(infer([1, 2, 3]), int8());
+    matches(infer([1e3, 2e3, 3e3]), int16());
+    matches(infer([1e6, 2e6, 3e6]), int32());
+    matches(infer([1n, 2n, 3n]), int64());
-    matches(inferType([-1, 2, 3]), int8());
-    matches(inferType([-1e3, 2e3, 3e3]), int16());
-    matches(inferType([-1e6, 2e6, 3e6]), int32());
-    matches(inferType([-1n, 2n, 3n]), int64());
+    matches(infer([-1, 2, 3]), int8());
+    matches(infer([-1e3, 2e3, 3e3]), int16());
+    matches(infer([-1e6, 2e6, 3e6]), int32());
+    matches(infer([-1n, 2n, 3n]), int64());
-    matches(inferType([1, 2, null, undefined, 3]), int8());
-    matches(inferType([1e3, 2e3, null, undefined, 3e3]), int16());
-    matches(inferType([1e6, 2e6, null, undefined, 3e6]), int32());
-    matches(inferType([1n, 2n, null, undefined, 3n]), int64());
+    matches(infer([1, 2, null, undefined, 3]), int8());
+    matches(infer([1e3, 2e3, null, undefined, 3e3]), int16());
+    matches(infer([1e6, 2e6, null, undefined, 3e6]), int32());
+    matches(infer([1n, 2n, null, undefined, 3n]), int64());
   it('infers float types', () => {
-    matches(inferType([1.1, 2.2, 3.3]), float64());
-    matches(inferType([-1.1, 2.2, 3.3]), float64());
-    matches(inferType([1, 2, 3.3]), float64());
-    matches(inferType([1, 2, NaN]), float64());
-    matches(inferType([NaN, null, undefined, NaN]), float64());
-    matches(inferType([Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER]), float64());
+    matches(infer([1.1, 2.2, 3.3]), float64());
+    matches(infer([-1.1, 2.2, 3.3]), float64());
+    matches(infer([1, 2, 3.3]), float64());
+    matches(infer([1, 2, NaN]), float64());
+    matches(infer([NaN, null, undefined, NaN]), float64());
+    matches(infer([Number.MIN_SAFE_INTEGER, Number.MAX_SAFE_INTEGER]), float64());
   it('infers utf8 dictionary types', () => {
     const type = dictionary(utf8(), int32());
-    matches(inferType(['foo', 'bar', 'baz']), type);
-    matches(inferType(['foo', 'bar', null, undefined, 'baz']), type);
+    matches(infer(['foo', 'bar', 'baz']), type);
+    matches(infer(['foo', 'bar', null, undefined, 'baz']), type);
   it('infers bool types', () => {
-    matches(inferType([true, false, true]), bool());
-    matches(inferType([true, false, null, undefined, true]), bool());
+    matches(infer([true, false, true]), bool());
+    matches(infer([true, false, null, undefined, true]), bool());
   it('infers date day types', () => {
-    matches(inferType([
+    matches(infer([
       new Date(Date.UTC(2000, 1, 2)),
       new Date(Date.UTC(2006, 3, 20)),
@@ -55,7 +59,7 @@ describe('inferType', () => {
   it('infers timestamp types', () => {
-      inferType([
+      infer([
         new Date(Date.UTC(2000, 1, 2)),
         new Date(Date.UTC(2006, 3, 20)),
@@ -67,14 +71,14 @@ describe('inferType', () => {
   it('infers list types', () => {
-    matches(inferType([[1, 2], [3, 4]]), list(int8()));
-    matches(inferType([[true, null, false], null, undefined, [false, undefined, true]]), list(bool()));
-    matches(inferType([['foo', 'bar', null], null, ['bar', 'baz']]), list(dictionary(utf8(), int32())));
+    matches(infer([[1, 2], [3, 4]]), list(int8()));
+    matches(infer([[true, null, false], null, undefined, [false, undefined, true]]), list(bool()));
+    matches(infer([['foo', 'bar', null], null, ['bar', 'baz']]), list(dictionary(utf8(), int32())));
   it('infers struct types', () => {
-      inferType([
+      infer([
         { foo: 1, bar: [1.1, 2.2] },
         { foo: null, bar: [2.2, null, 3.3] },
@@ -86,10 +90,10 @@ describe('inferType', () => {
   it('throws on bigints that exceed 64 bits', () => {
-    assert.throws(() => inferType([(1n << 200n)]));
+    assert.throws(() => infer([(1n << 200n)]));
   it('throws on mixed types', () => {
-    assert.throws(() => inferType([1, true, 'foo']));
+    assert.throws(() => infer([1, true, 'foo']));
@@ -32,7 +32,6 @@ function testEncode(bytes) {
   // ensure complete schema, override version
   const schema = {
-    dictionaryTypes: new Map,
     endianness: 0,
     metadata: null,
@@ -11,8 +11,7 @@ export function decimalDataToEncode() {
         nullable: true,
         metadata: null
-      metadata: null,
-      dictionaryTypes: new Map
+      metadata: null
     records: [{
       length: 3,