Skip to content

Commit d2bfa0c

Browse files
committed
Unaligned bit arrays on the JavaScript target
1 parent f15d41b commit d2bfa0c

File tree

5 files changed

+199
-97
lines changed

5 files changed

+199
-97
lines changed

CHANGELOG.md

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
- Unaligned bit arrays on the JavaScript target are now supported by the
6+
following functions in the `bit_array` module: `append`, `bit_size`,
7+
`compare`, `concat`, `inspect`, `starts_with`. Note: unaligned bit arrays on
8+
JavaScript are supported starting with Gleam v1.7.
9+
310
## v0.47.0 - 2024-12-10
411

512
- The `compare` and `to_string` functions from the `gleam/bool` module have been

src/gleam/bit_array.gleam

+5-7
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray
1313
/// Returns an integer which is the number of bits in the bit array.
1414
///
1515
@external(erlang, "erlang", "bit_size")
16-
pub fn bit_size(x: BitArray) -> Int {
17-
byte_size(x) * 8
18-
}
16+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
17+
pub fn bit_size(x: BitArray) -> Int
1918

2019
/// Returns an integer which is the number of bytes in the bit array.
2120
///
2221
@external(erlang, "erlang", "byte_size")
23-
@external(javascript, "../gleam_stdlib.mjs", "length")
22+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size")
2423
pub fn byte_size(x: BitArray) -> Int
2524

2625
/// Pads a bit array with zeros so that it is a whole number of bytes.
2726
///
2827
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
29-
pub fn pad_to_bytes(x: BitArray) -> BitArray {
30-
x
31-
}
28+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
29+
pub fn pad_to_bytes(x: BitArray) -> BitArray
3230

3331
/// Creates a new bit array by joining two bit arrays.
3432
///

src/gleam_stdlib.mjs

+182-32
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,56 @@ export function bit_array_from_string(string) {
320320
return toBitArray([stringBits(string)]);
321321
}
322322

323+
const BIT_ARRAY_UNALIGNED_SUPPORTED =
324+
new BitArray(new Uint8Array()).bitSize !== undefined;
325+
326+
export function bit_array_bit_size(bit_array) {
327+
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
328+
return bit_array.length * 8;
329+
}
330+
331+
return bit_array.bitSize;
332+
}
333+
334+
export function bit_array_byte_size(bit_array) {
335+
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
336+
return bit_array.length;
337+
}
338+
339+
return bit_array.byteSize;
340+
}
341+
342+
export function bit_array_pad_to_bytes(bit_array) {
343+
// If the bit array is byte aligned it can be returned unchanged
344+
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;
345+
if (trailingBitsCount === 0) {
346+
return bit_array;
347+
}
348+
349+
const finalByte = bit_array.byteAt(bit_array.byteSize - 1);
350+
351+
const unusedBitsCount = 8 - trailingBitsCount;
352+
const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount;
353+
354+
// If the unused bits in the final byte are already set to zero then the
355+
// existing buffer can be re-used, avoiding a copy
356+
if (finalByte === correctFinalByte) {
357+
return new BitArray(bit_array.rawBuffer);
358+
}
359+
360+
// Copy the bit array into a new buffer and set the correct final byte
361+
const newBuffer = bit_array.rawBuffer.slice();
362+
newBuffer[newBuffer.length - 1] = correctFinalByte;
363+
364+
return new BitArray(newBuffer);
365+
}
366+
323367
export function bit_array_concat(bit_arrays) {
324-
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
368+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
369+
return toBitArray(bit_arrays.toArray());
370+
} else {
371+
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
372+
}
325373
}
326374

327375
export function console_log(term) {
@@ -337,9 +385,17 @@ export function crash(message) {
337385
}
338386

339387
export function bit_array_to_string(bit_array) {
388+
if (bit_array_bit_size(bit_array) % 8 !== 0) {
389+
return new Error(Nil);
390+
}
391+
340392
try {
341393
const decoder = new TextDecoder("utf-8", { fatal: true });
342-
return new Ok(decoder.decode(bit_array.buffer));
394+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
395+
return new Ok(decoder.decode(bit_array.rawBuffer));
396+
} else {
397+
return new Ok(decoder.decode(bit_array.buffer));
398+
}
343399
} catch {
344400
return new Error(Nil);
345401
}
@@ -419,13 +475,22 @@ export function random_uniform() {
419475
export function bit_array_slice(bits, position, length) {
420476
const start = Math.min(position, position + length);
421477
const end = Math.max(position, position + length);
422-
if (start < 0 || end > bits.length) return new Error(Nil);
423-
const byteOffset = bits.buffer.byteOffset + start;
424-
const buffer = new Uint8Array(
425-
bits.buffer.buffer,
426-
byteOffset,
427-
Math.abs(length),
428-
);
478+
479+
if (start < 0 || end * 8 > bit_array_bit_size(bits)) {
480+
return new Error(Nil);
481+
}
482+
483+
let srcBuffer;
484+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
485+
srcBuffer = bits.rawBuffer;
486+
} else {
487+
srcBuffer = bits.buffer;
488+
}
489+
490+
const byteOffset = srcBuffer.byteOffset + start;
491+
492+
const buffer = new Uint8Array(srcBuffer.buffer, byteOffset, Math.abs(length));
493+
429494
return new Ok(new BitArray(buffer));
430495
}
431496

@@ -571,7 +636,14 @@ let b64TextDecoder;
571636
export function encode64(bit_array, padding) {
572637
b64TextDecoder ??= new TextDecoder();
573638

574-
const bytes = bit_array.buffer;
639+
bit_array = bit_array_pad_to_bytes(bit_array);
640+
641+
let bytes;
642+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
643+
bytes = bit_array.rawBuffer;
644+
} else {
645+
bytes = bit_array.buffer;
646+
}
575647

576648
const m = bytes.length;
577649
const k = m % 3;
@@ -853,7 +925,7 @@ export function inspect(v) {
853925
if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`;
854926
if (v instanceof List) return inspectList(v);
855927
if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v);
856-
if (v instanceof BitArray) return inspectBitArray(v);
928+
if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`;
857929
if (v instanceof CustomType) return inspectCustomType(v);
858930
if (v instanceof Dict) return inspectDict(v);
859931
if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`;
@@ -944,17 +1016,13 @@ export function inspectList(list) {
9441016
return `[${list.toArray().map(inspect).join(", ")}]`;
9451017
}
9461018

947-
export function inspectBitArray(bits) {
948-
return `<<${Array.from(bits.buffer).join(", ")}>>`;
949-
}
950-
9511019
export function inspectUtfCodepoint(codepoint) {
9521020
return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`;
9531021
}
9541022

9551023
export function base16_encode(bit_array) {
9561024
let result = "";
957-
for (const byte of bit_array.buffer) {
1025+
for (const byte of bit_array_pad_to_bytes(bit_array).iterateBytes()) {
9581026
result += byte.toString(16).padStart(2, "0").toUpperCase();
9591027
}
9601028
return result;
@@ -972,38 +1040,120 @@ export function base16_decode(string) {
9721040
}
9731041

9741042
export function bit_array_inspect(bits, acc) {
975-
return `${acc}${[...bits.buffer].join(", ")}`;
1043+
const bitSize = bit_array_bit_size(bits);
1044+
1045+
let srcBuffer;
1046+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
1047+
srcBuffer = bits.rawBuffer;
1048+
} else {
1049+
srcBuffer = bits.buffer;
1050+
}
1051+
1052+
if (bitSize % 8 === 0) {
1053+
return `${acc}${[...srcBuffer].join(", ")}`;
1054+
}
1055+
1056+
for (let i = 0; i < srcBuffer.length - 1; i++) {
1057+
acc += srcBuffer[i].toString();
1058+
acc += ", ";
1059+
}
1060+
1061+
const trailingBitsCount = bitSize % 8;
1062+
acc += srcBuffer[srcBuffer.length - 1] >> (8 - trailingBitsCount);
1063+
acc += `:size(${trailingBitsCount})`;
1064+
1065+
return acc;
9761066
}
9771067

9781068
export function bit_array_compare(first, second) {
979-
for (let i = 0; i < first.length; i++) {
980-
if (i >= second.length) {
981-
return new Gt(); // first has more items
982-
}
983-
const f = first.buffer[i];
984-
const s = second.buffer[i];
1069+
let i = 0;
1070+
1071+
let firstSize = bit_array_bit_size(first);
1072+
let secondSize = bit_array_bit_size(second);
1073+
1074+
let firstBuffer, secondBuffer;
1075+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
1076+
firstBuffer = first.rawBuffer;
1077+
secondBuffer = second.rawBuffer;
1078+
} else {
1079+
firstBuffer = first.buffer;
1080+
secondBuffer = second.buffer;
1081+
}
1082+
1083+
while (firstSize >= 8 && secondSize >= 8) {
1084+
const f = firstBuffer[i];
1085+
const s = secondBuffer[i];
1086+
9851087
if (f > s) {
9861088
return new Gt();
987-
}
988-
if (f < s) {
1089+
} else if (f < s) {
9891090
return new Lt();
9901091
}
1092+
1093+
i++;
1094+
firstSize -= 8;
1095+
secondSize -= 8;
9911096
}
992-
// This means that either first did not have any items
993-
// or all items in first were equal to second.
994-
if (first.length === second.length) {
1097+
1098+
if (firstSize === 0 && secondSize === 0) {
9951099
return new Eq();
9961100
}
997-
return new Lt(); // second has more items
1101+
1102+
// First has more items, example: "AB" > "A":
1103+
if (secondSize === 0) {
1104+
return new Gt();
1105+
}
1106+
1107+
// Second has more items, example: "A" < "AB":
1108+
if (firstSize === 0) {
1109+
return new Lt();
1110+
}
1111+
1112+
// This happens when there are unaligned bit arrays
1113+
1114+
const f = firstBuffer[i] >> (8 - firstSize);
1115+
const s = secondBuffer[i] >> (8 - secondSize);
1116+
1117+
if (f > s) {
1118+
return new Gt();
1119+
}
1120+
if (f < s) {
1121+
return new Lt();
1122+
}
1123+
if (firstSize > secondSize) {
1124+
return new Gt();
1125+
}
1126+
if (firstSize < secondSize) {
1127+
return new Lt();
1128+
}
1129+
1130+
return new Eq();
9981131
}
9991132

10001133
export function bit_array_starts_with(bits, prefix) {
1001-
if (prefix.length > bits.length) {
1134+
const prefixSize = bit_array_bit_size(prefix);
1135+
1136+
if (prefixSize > bit_array_bit_size(bits)) {
10021137
return false;
10031138
}
10041139

1005-
for (let i = 0; i < prefix.length; i++) {
1006-
if (bits.buffer[i] !== prefix.buffer[i]) {
1140+
const isPrefixAligned = prefixSize % 8 === 0;
1141+
1142+
// Check any whole bytes
1143+
const byteCount = Math.trunc(prefixSize / 8);
1144+
for (let i = 0; i < byteCount; i++) {
1145+
if (bits.byteAt(i) !== prefix.byteAt(i)) {
1146+
return false;
1147+
}
1148+
}
1149+
1150+
// Check any trailing bits at the end of the prefix
1151+
if (!isPrefixAligned) {
1152+
const unusedBitsCount = 8 - (prefixSize % 8);
1153+
if (
1154+
bits.byteAt(byteCount) >> unusedBitsCount !==
1155+
prefix.byteAt(byteCount) >> unusedBitsCount
1156+
) {
10071157
return false;
10081158
}
10091159
}

0 commit comments

Comments
 (0)