Skip to content

Commit 04fbf8e

Browse files
committed
Unaligned bit arrays on the JavaScript target
1 parent 89861ef commit 04fbf8e

6 files changed

+148
-80
lines changed

CHANGELOG.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44

55
- Comparing two `Dict`s of equal size has been optimised on the JavaScript
66
target.
7-
87
- Improved the performance of `drop_start`.
8+
- Unaligned bit arrays on the JavaScript target are now supported by the
9+
following functions in the `bit_array` module: `append`, `bit_size`,
10+
`compare`, `concat`, `inspect`, `starts_with`. Note: unaligned bit arrays on
11+
JavaScript are supported starting with Gleam v1.7.
912

1013
## v0.45.0 - 2024-11-28
1114

src/gleam/bit_array.gleam

+4-6
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ pub fn from_string(x: String) -> BitArray
1313
/// Returns an integer which is the number of bits in the bit array.
1414
///
1515
@external(erlang, "erlang", "bit_size")
16-
pub fn bit_size(x: BitArray) -> Int {
17-
byte_size(x) * 8
18-
}
16+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
17+
pub fn bit_size(x: BitArray) -> Int
1918

2019
/// Returns an integer which is the number of bytes in the bit array.
2120
///
@@ -26,9 +25,8 @@ pub fn byte_size(x: BitArray) -> Int
2625
/// Pads a bit array with zeros so that it is a whole number of bytes.
2726
///
2827
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
29-
pub fn pad_to_bytes(x: BitArray) -> BitArray {
30-
x
31-
}
28+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
29+
pub fn pad_to_bytes(x: BitArray) -> BitArray
3230

3331
/// Creates a new bit array by joining two bit arrays.
3432
///

src/gleam_stdlib.mjs

+132-15
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,49 @@ export function bit_array_from_string(string) {
327327
return toBitArray([stringBits(string)]);
328328
}
329329

330+
export function bit_array_bit_size(bit_array) {
331+
if (bit_array.bitSize === undefined) {
332+
return bit_array.length * 8;
333+
}
334+
335+
return bit_array.bitSize;
336+
}
337+
338+
export function bit_array_pad_to_bytes(bit_array) {
339+
// If the bit array is byte aligned it can be returned unchanged
340+
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;
341+
if (trailingBitsCount === 0) {
342+
return bit_array;
343+
}
344+
345+
const finalByte = bit_array.buffer[bit_array.length - 1];
346+
347+
let correctFinalByte = finalByte;
348+
correctFinalByte >>= 8 - trailingBitsCount;
349+
correctFinalByte <<= 8 - trailingBitsCount;
350+
351+
// If the unused bits in the final byte are already set to zero then the
352+
// existing buffer can be re-used, avoiding a copy
353+
if (finalByte === correctFinalByte) {
354+
return new BitArray(bit_array.buffer);
355+
}
356+
357+
// Copy the bit array into a new buffer and set the correct final byte
358+
const newBuffer = bit_array.buffer.slice();
359+
newBuffer[newBuffer.length - 1] = correctFinalByte;
360+
361+
return new BitArray(newBuffer);
362+
}
363+
364+
const BIT_ARRAY_UNALIGNED_SUPPORTED =
365+
new BitArray(new Uint8Array()).bitSize !== undefined;
366+
330367
export function bit_array_concat(bit_arrays) {
331-
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
368+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
369+
return toBitArray(bit_arrays.toArray());
370+
} else {
371+
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
372+
}
332373
}
333374

334375
export function console_log(term) {
@@ -344,6 +385,10 @@ export function crash(message) {
344385
}
345386

346387
export function bit_array_to_string(bit_array) {
388+
if (bit_array_bit_size(bit_array) % 8 !== 0) {
389+
return new Error(Nil);
390+
}
391+
347392
try {
348393
const decoder = new TextDecoder("utf-8", { fatal: true });
349394
return new Ok(decoder.decode(bit_array.buffer));
@@ -426,13 +471,19 @@ export function random_uniform() {
426471
export function bit_array_slice(bits, position, length) {
427472
const start = Math.min(position, position + length);
428473
const end = Math.max(position, position + length);
429-
if (start < 0 || end > bits.length) return new Error(Nil);
474+
475+
if (start < 0 || end * 8 > bit_array_bit_size(bits)) {
476+
return new Error(Nil);
477+
}
478+
430479
const byteOffset = bits.buffer.byteOffset + start;
480+
431481
const buffer = new Uint8Array(
432482
bits.buffer.buffer,
433483
byteOffset,
434484
Math.abs(length),
435485
);
486+
436487
return new Ok(new BitArray(buffer));
437488
}
438489

@@ -979,41 +1030,107 @@ export function base16_decode(string) {
9791030
}
9801031

9811032
export function bit_array_inspect(bits, acc) {
982-
return `${acc}${[...bits.buffer].join(", ")}`;
1033+
const bitSize = bit_array_bit_size(bits);
1034+
1035+
if (bitSize % 8 === 0) {
1036+
return `${acc}${[...bits.buffer].join(", ")}`;
1037+
}
1038+
1039+
for (let i = 0; i < bits.length - 1; i++) {
1040+
acc += bits.buffer[i].toString();
1041+
acc += ", ";
1042+
}
1043+
1044+
const trailingBitsCount = bitSize % 8;
1045+
acc += bits.buffer[bits.length - 1] >> (8 - trailingBitsCount);
1046+
acc += `:size(${trailingBitsCount})`;
1047+
1048+
return acc;
9831049
}
9841050

9851051
export function bit_array_compare(first, second) {
986-
for (let i = 0; i < first.length; i++) {
987-
if (i >= second.length) {
988-
return new Gt(); // first has more items
989-
}
1052+
let i = 0;
1053+
1054+
let firstSize = bit_array_bit_size(first);
1055+
let secondSize = bit_array_bit_size(second);
1056+
1057+
while (firstSize >= 8 && secondSize >= 8) {
9901058
const f = first.buffer[i];
9911059
const s = second.buffer[i];
1060+
9921061
if (f > s) {
9931062
return new Gt();
994-
}
995-
if (f < s) {
1063+
} else if (f < s) {
9961064
return new Lt();
9971065
}
1066+
1067+
i++;
1068+
firstSize -= 8;
1069+
secondSize -= 8;
9981070
}
999-
// This means that either first did not have any items
1000-
// or all items in first were equal to second.
1001-
if (first.length === second.length) {
1071+
1072+
if (firstSize === 0 && secondSize === 0) {
10021073
return new Eq();
10031074
}
1004-
return new Lt(); // second has more items
1075+
1076+
// First has more items, example: "AB" > "A":
1077+
if (secondSize === 0) {
1078+
return new Gt();
1079+
}
1080+
1081+
// Second has more items, example: "A" < "AB":
1082+
if (firstSize === 0) {
1083+
return new Lt();
1084+
}
1085+
1086+
// This happens when there are unaligned bit arrays
1087+
1088+
const f = first.buffer[i] >> (8 - firstSize);
1089+
const s = second.buffer[i] >> (8 - secondSize);
1090+
1091+
if (f > s) {
1092+
return new Gt();
1093+
}
1094+
if (f < s) {
1095+
return new Lt();
1096+
}
1097+
if (firstSize > secondSize) {
1098+
return new Gt();
1099+
}
1100+
if (firstSize < secondSize) {
1101+
return new Lt();
1102+
}
1103+
1104+
return new Eq();
10051105
}
10061106

10071107
export function bit_array_starts_with(bits, prefix) {
1008-
if (prefix.length > bits.length) {
1108+
const prefixSize = bit_array_bit_size(prefix);
1109+
1110+
if (prefixSize > bit_array_bit_size(bits)) {
10091111
return false;
10101112
}
10111113

1012-
for (let i = 0; i < prefix.length; i++) {
1114+
const isPrefixAligned = prefixSize % 8 === 0;
1115+
1116+
// Check any whole bytes
1117+
const byteCount = isPrefixAligned ? prefix.length : prefix.length - 1;
1118+
for (let i = 0; i < byteCount; i++) {
10131119
if (bits.buffer[i] !== prefix.buffer[i]) {
10141120
return false;
10151121
}
10161122
}
10171123

1124+
// Check any trailing bits at the end of the prefix
1125+
if (!isPrefixAligned) {
1126+
const unusedBitsCount = 8 - (prefixSize % 8);
1127+
if (
1128+
bits.buffer[prefix.length - 1] >> unusedBitsCount !==
1129+
prefix.buffer[prefix.length - 1] >> unusedBitsCount
1130+
) {
1131+
return false;
1132+
}
1133+
}
1134+
10181135
return true;
10191136
}

0 commit comments

Comments
 (0)