Skip to content

Commit 6bcb8c5

Browse files
committed
Unaligned bit arrays on the JavaScript target
1 parent 8a68cac commit 6bcb8c5

File tree

5 files changed

+208
-96
lines changed

5 files changed

+208
-96
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
## Unreleased
44

55
- Fixed a bug that would result in `list.unique` having quadratic runtime.
6+
- Unaligned bit arrays on the JavaScript target are now supported by the
7+
`bit_array` module. Note: unaligned bit arrays on JavaScript are supported
8+
starting with Gleam v1.8.
69

710
## v0.53.0 - 2025-01-23
811

src/gleam/bit_array.gleam

+5-7
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray
1313
/// Returns an integer which is the number of bits in the bit array.
1414
///
1515
@external(erlang, "erlang", "bit_size")
16-
pub fn bit_size(x: BitArray) -> Int {
17-
byte_size(x) * 8
18-
}
16+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
17+
pub fn bit_size(x: BitArray) -> Int
1918

2019
/// Returns an integer which is the number of bytes in the bit array.
2120
///
2221
@external(erlang, "erlang", "byte_size")
23-
@external(javascript, "../gleam_stdlib.mjs", "length")
22+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size")
2423
pub fn byte_size(x: BitArray) -> Int
2524

2625
/// Pads a bit array with zeros so that it is a whole number of bytes.
2726
///
2827
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
29-
pub fn pad_to_bytes(x: BitArray) -> BitArray {
30-
x
31-
}
28+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
29+
pub fn pad_to_bytes(x: BitArray) -> BitArray
3230

3331
/// Creates a new bit array by joining two bit arrays.
3432
///

src/gleam_stdlib.mjs

+190-31
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,64 @@ export function bit_array_from_string(string) {
316316
return toBitArray([stringBits(string)]);
317317
}
318318

319+
const BIT_ARRAY_UNALIGNED_SUPPORTED =
320+
new BitArray(new Uint8Array()).bitSize !== undefined;
321+
322+
export function bit_array_bit_size(bit_array) {
323+
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
324+
return bit_array.length * 8;
325+
}
326+
327+
return bit_array.bitSize;
328+
}
329+
330+
export function bit_array_byte_size(bit_array) {
331+
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
332+
return bit_array.length;
333+
}
334+
335+
return bit_array.byteSize;
336+
}
337+
338+
export function bit_array_pad_to_bytes(bit_array) {
339+
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;
340+
341+
// If the bit array is byte aligned it can be returned unchanged
342+
if (trailingBitsCount === 0) {
343+
return bit_array;
344+
}
345+
346+
const finalByte = bit_array.byteAt(bit_array.byteSize - 1);
347+
348+
const unusedBitsCount = 8 - trailingBitsCount;
349+
const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount;
350+
351+
// If the unused bits in the final byte are already set to zero then the
352+
// existing buffer can be re-used, avoiding a copy
353+
if (finalByte === correctFinalByte) {
354+
return new BitArray(
355+
bit_array.rawBuffer,
356+
bit_array.byteSize * 8,
357+
bit_array.bitOffset,
358+
);
359+
}
360+
361+
// Copy the bit array into a new aligned buffer and set the correct final byte
362+
const buffer = new Uint8Array(bit_array.byteSize);
363+
for (let i = 0; i < buffer.length - 1; i++) {
364+
buffer[i] = bit_array.byteAt(i);
365+
}
366+
buffer[buffer.length - 1] = correctFinalByte;
367+
368+
return new BitArray(buffer);
369+
}
370+
319371
export function bit_array_concat(bit_arrays) {
320-
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
372+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
373+
return toBitArray(bit_arrays.toArray());
374+
} else {
375+
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
376+
}
321377
}
322378

323379
export function console_log(term) {
@@ -333,9 +389,25 @@ export function crash(message) {
333389
}
334390

335391
export function bit_array_to_string(bit_array) {
392+
if (bit_array_bit_size(bit_array) % 8 !== 0) {
393+
return new Error(Nil);
394+
}
395+
336396
try {
337397
const decoder = new TextDecoder("utf-8", { fatal: true });
338-
return new Ok(decoder.decode(bit_array.buffer));
398+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
399+
if (bit_array.bitOffset === 0) {
400+
return new Ok(decoder.decode(bit_array.rawBuffer));
401+
} else {
402+
const buffer = new Uint8Array(bit_array.byteSize);
403+
for (let i = 0; i < buffer.length; i++) {
404+
buffer[i] = bit_array.byteAt(i);
405+
}
406+
return new Ok(decoder.decode(buffer));
407+
}
408+
} else {
409+
return new Ok(decoder.decode(bit_array.buffer));
410+
}
339411
} catch {
340412
return new Error(Nil);
341413
}
@@ -415,13 +487,21 @@ export function random_uniform() {
415487
export function bit_array_slice(bits, position, length) {
416488
const start = Math.min(position, position + length);
417489
const end = Math.max(position, position + length);
418-
if (start < 0 || end > bits.length) return new Error(Nil);
419-
const byteOffset = bits.buffer.byteOffset + start;
490+
491+
if (start < 0 || end * 8 > bit_array_bit_size(bits)) {
492+
return new Error(Nil);
493+
}
494+
495+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
496+
return new Ok(bits.slice(start * 8, end * 8));
497+
}
498+
420499
const buffer = new Uint8Array(
421500
bits.buffer.buffer,
422-
byteOffset,
423-
Math.abs(length),
501+
bits.buffer.byteOffset + start,
502+
end - start,
424503
);
504+
425505
return new Ok(new BitArray(buffer));
426506
}
427507

@@ -522,16 +602,20 @@ let b64TextDecoder;
522602
export function encode64(bit_array, padding) {
523603
b64TextDecoder ??= new TextDecoder();
524604

525-
const bytes = bit_array.buffer;
605+
bit_array = bit_array_pad_to_bytes(bit_array);
526606

527-
const m = bytes.length;
607+
const m = bit_array_byte_size(bit_array);
528608
const k = m % 3;
529609
const n = Math.floor(m / 3) * 4 + (k && k + 1);
530610
const N = Math.ceil(m / 3) * 4;
531611
const encoded = new Uint8Array(N);
532612

533613
for (let i = 0, j = 0; j < m; i += 4, j += 3) {
534-
const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0);
614+
const y =
615+
(bit_array.byteAt(j) << 16) +
616+
(bit_array.byteAt(j + 1) << 8) +
617+
(bit_array.byteAt(j + 2) | 0);
618+
535619
encoded[i] = b64EncodeLookup[y >> 18];
536620
encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f];
537621
encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f];
@@ -804,7 +888,7 @@ export function inspect(v) {
804888
if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`;
805889
if (v instanceof List) return inspectList(v);
806890
if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v);
807-
if (v instanceof BitArray) return inspectBitArray(v);
891+
if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`;
808892
if (v instanceof CustomType) return inspectCustomType(v);
809893
if (v instanceof Dict) return inspectDict(v);
810894
if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`;
@@ -895,17 +979,22 @@ export function inspectList(list) {
895979
return `[${list.toArray().map(inspect).join(", ")}]`;
896980
}
897981

898-
export function inspectBitArray(bits) {
899-
return `<<${Array.from(bits.buffer).join(", ")}>>`;
900-
}
901-
902982
export function inspectUtfCodepoint(codepoint) {
903983
return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`;
904984
}
905985

906986
export function base16_encode(bit_array) {
987+
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;
988+
907989
let result = "";
908-
for (const byte of bit_array.buffer) {
990+
for (let i = 0; i < bit_array.byteSize; i++) {
991+
let byte = bit_array.byteAt(i);
992+
993+
if (i === bit_array.byteSize - 1 && trailingBitsCount) {
994+
const unusedBitsCount = 8 - trailingBitsCount;
995+
byte = (byte >> unusedBitsCount) << unusedBitsCount;
996+
}
997+
909998
result += byte.toString(16).padStart(2, "0").toUpperCase();
910999
}
9111000
return result;
@@ -923,38 +1012,108 @@ export function base16_decode(string) {
9231012
}
9241013

9251014
export function bit_array_inspect(bits, acc) {
926-
return `${acc}${[...bits.buffer].join(", ")}`;
1015+
const bitSize = bit_array_bit_size(bits);
1016+
1017+
if (bitSize === 0) {
1018+
return acc;
1019+
}
1020+
1021+
const byteSize = bit_array_byte_size(bits);
1022+
1023+
for (let i = 0; i < byteSize - 1; i++) {
1024+
acc += bits.byteAt(i).toString();
1025+
acc += ", ";
1026+
}
1027+
1028+
if (byteSize * 8 === bitSize) {
1029+
acc += bits.byteAt(byteSize - 1).toString();
1030+
} else {
1031+
const trailingBitsCount = bitSize % 8;
1032+
acc += bits.byteAt(byteSize - 1) >> (8 - trailingBitsCount);
1033+
acc += `:size(${trailingBitsCount})`;
1034+
}
1035+
1036+
return acc;
9271037
}
9281038

9291039
export function bit_array_compare(first, second) {
930-
for (let i = 0; i < first.length; i++) {
931-
if (i >= second.length) {
932-
return new Gt(); // first has more items
933-
}
934-
const f = first.buffer[i];
935-
const s = second.buffer[i];
1040+
let i = 0;
1041+
1042+
let firstSize = bit_array_bit_size(first);
1043+
let secondSize = bit_array_bit_size(second);
1044+
1045+
while (firstSize >= 8 && secondSize >= 8) {
1046+
const f = first.byteAt(i);
1047+
const s = second.byteAt(i);
1048+
9361049
if (f > s) {
9371050
return new Gt();
938-
}
939-
if (f < s) {
1051+
} else if (f < s) {
9401052
return new Lt();
9411053
}
1054+
1055+
i++;
1056+
firstSize -= 8;
1057+
secondSize -= 8;
9421058
}
943-
// This means that either first did not have any items
944-
// or all items in first were equal to second.
945-
if (first.length === second.length) {
1059+
1060+
if (firstSize === 0 && secondSize === 0) {
9461061
return new Eq();
9471062
}
948-
return new Lt(); // second has more items
1063+
1064+
// First has more items, example: "AB" > "A":
1065+
if (secondSize === 0) {
1066+
return new Gt();
1067+
}
1068+
1069+
// Second has more items, example: "A" < "AB":
1070+
if (firstSize === 0) {
1071+
return new Lt();
1072+
}
1073+
1074+
// This happens when there are unaligned bit arrays
1075+
1076+
const f = first.byteAt(i) >> (8 - firstSize);
1077+
const s = second.byteAt(i) >> (8 - secondSize);
1078+
1079+
if (f > s) {
1080+
return new Gt();
1081+
}
1082+
if (f < s) {
1083+
return new Lt();
1084+
}
1085+
if (firstSize > secondSize) {
1086+
return new Gt();
1087+
}
1088+
if (firstSize < secondSize) {
1089+
return new Lt();
1090+
}
1091+
1092+
return new Eq();
9491093
}
9501094

9511095
export function bit_array_starts_with(bits, prefix) {
952-
if (prefix.length > bits.length) {
1096+
const prefixSize = bit_array_bit_size(prefix);
1097+
1098+
if (prefixSize > bit_array_bit_size(bits)) {
9531099
return false;
9541100
}
9551101

956-
for (let i = 0; i < prefix.length; i++) {
957-
if (bits.buffer[i] !== prefix.buffer[i]) {
1102+
// Check any whole bytes
1103+
const byteCount = Math.trunc(prefixSize / 8);
1104+
for (let i = 0; i < byteCount; i++) {
1105+
if (bits.byteAt(i) !== prefix.byteAt(i)) {
1106+
return false;
1107+
}
1108+
}
1109+
1110+
// Check any trailing bits at the end of the prefix
1111+
if (prefixSize % 8 !== 0) {
1112+
const unusedBitsCount = 8 - (prefixSize % 8);
1113+
if (
1114+
bits.byteAt(byteCount) >> unusedBitsCount !==
1115+
prefix.byteAt(byteCount) >> unusedBitsCount
1116+
) {
9581117
return false;
9591118
}
9601119
}

0 commit comments

Comments
 (0)