Skip to content

Commit 12b7ec3

Browse files
committed
Unaligned bit arrays on the JavaScript target
1 parent 87b9ba4 commit 12b7ec3

File tree

5 files changed

+202
-98
lines changed

5 files changed

+202
-98
lines changed

CHANGELOG.md

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
- Unaligned bit arrays on the JavaScript target are now supported by the
6+
following functions in the `bit_array` module: `append`, `bit_size`,
7+
`compare`, `concat`, `inspect`, `starts_with`. Note: unaligned bit arrays on
8+
JavaScript are supported starting with Gleam v1.7.
9+
310
## v0.51.0 - 2024-12-22
411

512
- `dynamic/decode` now has its own error type.

src/gleam/bit_array.gleam

+5-7
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray
1313
/// Returns an integer which is the number of bits in the bit array.
1414
///
1515
@external(erlang, "erlang", "bit_size")
16-
pub fn bit_size(x: BitArray) -> Int {
17-
byte_size(x) * 8
18-
}
16+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
17+
pub fn bit_size(x: BitArray) -> Int
1918

2019
/// Returns an integer which is the number of bytes in the bit array.
2120
///
2221
@external(erlang, "erlang", "byte_size")
23-
@external(javascript, "../gleam_stdlib.mjs", "length")
22+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size")
2423
pub fn byte_size(x: BitArray) -> Int
2524

2625
/// Pads a bit array with zeros so that it is a whole number of bytes.
2726
///
2827
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
29-
pub fn pad_to_bytes(x: BitArray) -> BitArray {
30-
x
31-
}
28+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
29+
pub fn pad_to_bytes(x: BitArray) -> BitArray
3230

3331
/// Creates a new bit array by joining two bit arrays.
3432
///

src/gleam_stdlib.mjs

+185-33
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,56 @@ export function bit_array_from_string(string) {
316316
return toBitArray([stringBits(string)]);
317317
}
318318

319+
const BIT_ARRAY_UNALIGNED_SUPPORTED =
320+
new BitArray(new Uint8Array()).bitSize !== undefined;
321+
322+
export function bit_array_bit_size(bit_array) {
323+
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
324+
return bit_array.length * 8;
325+
}
326+
327+
return bit_array.bitSize;
328+
}
329+
330+
export function bit_array_byte_size(bit_array) {
331+
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
332+
return bit_array.length;
333+
}
334+
335+
return bit_array.byteSize;
336+
}
337+
338+
export function bit_array_pad_to_bytes(bit_array) {
339+
// If the bit array is byte aligned it can be returned unchanged
340+
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;
341+
if (trailingBitsCount === 0) {
342+
return bit_array;
343+
}
344+
345+
const finalByte = bit_array.rawBuffer[bit_array.byteSize - 1];
346+
347+
const unusedBitsCount = 8 - trailingBitsCount;
348+
const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount;
349+
350+
// If the unused bits in the final byte are already set to zero then the
351+
// existing buffer can be re-used, avoiding a copy
352+
if (finalByte === correctFinalByte) {
353+
return new BitArray(bit_array.rawBuffer);
354+
}
355+
356+
// Copy the bit array into a new buffer and set the correct final byte
357+
const newBuffer = bit_array.rawBuffer.slice();
358+
newBuffer[newBuffer.length - 1] = correctFinalByte;
359+
360+
return new BitArray(newBuffer);
361+
}
362+
319363
export function bit_array_concat(bit_arrays) {
320-
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
364+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
365+
return toBitArray(bit_arrays.toArray());
366+
} else {
367+
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
368+
}
321369
}
322370

323371
export function console_log(term) {
@@ -333,9 +381,17 @@ export function crash(message) {
333381
}
334382

335383
export function bit_array_to_string(bit_array) {
384+
if (bit_array_bit_size(bit_array) % 8 !== 0) {
385+
return new Error(Nil);
386+
}
387+
336388
try {
337389
const decoder = new TextDecoder("utf-8", { fatal: true });
338-
return new Ok(decoder.decode(bit_array.buffer));
390+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
391+
return new Ok(decoder.decode(bit_array.rawBuffer));
392+
} else {
393+
return new Ok(decoder.decode(bit_array.buffer));
394+
}
339395
} catch {
340396
return new Error(Nil);
341397
}
@@ -415,13 +471,22 @@ export function random_uniform() {
415471
export function bit_array_slice(bits, position, length) {
416472
const start = Math.min(position, position + length);
417473
const end = Math.max(position, position + length);
418-
if (start < 0 || end > bits.length) return new Error(Nil);
419-
const byteOffset = bits.buffer.byteOffset + start;
420-
const buffer = new Uint8Array(
421-
bits.buffer.buffer,
422-
byteOffset,
423-
Math.abs(length),
424-
);
474+
475+
if (start < 0 || end * 8 > bit_array_bit_size(bits)) {
476+
return new Error(Nil);
477+
}
478+
479+
let srcBuffer;
480+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
481+
srcBuffer = bits.rawBuffer;
482+
} else {
483+
srcBuffer = bits.buffer;
484+
}
485+
486+
const byteOffset = srcBuffer.byteOffset + start;
487+
488+
const buffer = new Uint8Array(srcBuffer.buffer, byteOffset, Math.abs(length));
489+
425490
return new Ok(new BitArray(buffer));
426491
}
427492

@@ -522,7 +587,14 @@ let b64TextDecoder;
522587
export function encode64(bit_array, padding) {
523588
b64TextDecoder ??= new TextDecoder();
524589

525-
const bytes = bit_array.buffer;
590+
bit_array = bit_array_pad_to_bytes(bit_array);
591+
592+
let bytes;
593+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
594+
bytes = bit_array.rawBuffer;
595+
} else {
596+
bytes = bit_array.buffer;
597+
}
526598

527599
const m = bytes.length;
528600
const k = m % 3;
@@ -804,7 +876,7 @@ export function inspect(v) {
804876
if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`;
805877
if (v instanceof List) return inspectList(v);
806878
if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v);
807-
if (v instanceof BitArray) return inspectBitArray(v);
879+
if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`;
808880
if (v instanceof CustomType) return inspectCustomType(v);
809881
if (v instanceof Dict) return inspectDict(v);
810882
if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`;
@@ -895,18 +967,16 @@ export function inspectList(list) {
895967
return `[${list.toArray().map(inspect).join(", ")}]`;
896968
}
897969

898-
export function inspectBitArray(bits) {
899-
return `<<${Array.from(bits.buffer).join(", ")}>>`;
900-
}
901-
902970
export function inspectUtfCodepoint(codepoint) {
903971
return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`;
904972
}
905973

906974
export function base16_encode(bit_array) {
975+
bit_array = bit_array_pad_to_bytes(bit_array);
976+
907977
let result = "";
908-
for (const byte of bit_array.buffer) {
909-
result += byte.toString(16).padStart(2, "0").toUpperCase();
978+
for (let i = 0; i < bit_array.byteSize; i++) {
979+
result += bit_array.byteAt(i).toString(16).padStart(2, "0").toUpperCase();
910980
}
911981
return result;
912982
}
@@ -923,38 +993,120 @@ export function base16_decode(string) {
923993
}
924994

925995
export function bit_array_inspect(bits, acc) {
926-
return `${acc}${[...bits.buffer].join(", ")}`;
996+
const bitSize = bit_array_bit_size(bits);
997+
998+
let srcBuffer;
999+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
1000+
srcBuffer = bits.rawBuffer;
1001+
} else {
1002+
srcBuffer = bits.buffer;
1003+
}
1004+
1005+
if (bitSize % 8 === 0) {
1006+
return `${acc}${[...srcBuffer].join(", ")}`;
1007+
}
1008+
1009+
for (let i = 0; i < srcBuffer.length - 1; i++) {
1010+
acc += srcBuffer[i].toString();
1011+
acc += ", ";
1012+
}
1013+
1014+
const trailingBitsCount = bitSize % 8;
1015+
acc += srcBuffer[srcBuffer.length - 1] >> (8 - trailingBitsCount);
1016+
acc += `:size(${trailingBitsCount})`;
1017+
1018+
return acc;
9271019
}
9281020

9291021
export function bit_array_compare(first, second) {
930-
for (let i = 0; i < first.length; i++) {
931-
if (i >= second.length) {
932-
return new Gt(); // first has more items
933-
}
934-
const f = first.buffer[i];
935-
const s = second.buffer[i];
1022+
let i = 0;
1023+
1024+
let firstSize = bit_array_bit_size(first);
1025+
let secondSize = bit_array_bit_size(second);
1026+
1027+
let firstBuffer, secondBuffer;
1028+
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
1029+
firstBuffer = first.rawBuffer;
1030+
secondBuffer = second.rawBuffer;
1031+
} else {
1032+
firstBuffer = first.buffer;
1033+
secondBuffer = second.buffer;
1034+
}
1035+
1036+
while (firstSize >= 8 && secondSize >= 8) {
1037+
const f = firstBuffer[i];
1038+
const s = secondBuffer[i];
1039+
9361040
if (f > s) {
9371041
return new Gt();
938-
}
939-
if (f < s) {
1042+
} else if (f < s) {
9401043
return new Lt();
9411044
}
1045+
1046+
i++;
1047+
firstSize -= 8;
1048+
secondSize -= 8;
9421049
}
943-
// This means that either first did not have any items
944-
// or all items in first were equal to second.
945-
if (first.length === second.length) {
1050+
1051+
if (firstSize === 0 && secondSize === 0) {
9461052
return new Eq();
9471053
}
948-
return new Lt(); // second has more items
1054+
1055+
// First has more items, example: "AB" > "A":
1056+
if (secondSize === 0) {
1057+
return new Gt();
1058+
}
1059+
1060+
// Second has more items, example: "A" < "AB":
1061+
if (firstSize === 0) {
1062+
return new Lt();
1063+
}
1064+
1065+
// This happens when there are unaligned bit arrays
1066+
1067+
const f = firstBuffer[i] >> (8 - firstSize);
1068+
const s = secondBuffer[i] >> (8 - secondSize);
1069+
1070+
if (f > s) {
1071+
return new Gt();
1072+
}
1073+
if (f < s) {
1074+
return new Lt();
1075+
}
1076+
if (firstSize > secondSize) {
1077+
return new Gt();
1078+
}
1079+
if (firstSize < secondSize) {
1080+
return new Lt();
1081+
}
1082+
1083+
return new Eq();
9491084
}
9501085

9511086
export function bit_array_starts_with(bits, prefix) {
952-
if (prefix.length > bits.length) {
1087+
const prefixSize = bit_array_bit_size(prefix);
1088+
1089+
if (prefixSize > bit_array_bit_size(bits)) {
9531090
return false;
9541091
}
9551092

956-
for (let i = 0; i < prefix.length; i++) {
957-
if (bits.buffer[i] !== prefix.buffer[i]) {
1093+
const isPrefixAligned = prefixSize % 8 === 0;
1094+
1095+
// Check any whole bytes
1096+
const byteCount = Math.trunc(prefixSize / 8);
1097+
for (let i = 0; i < byteCount; i++) {
1098+
if (bits.rawBuffer[i] !== prefix.rawBuffer[i]) {
1099+
return false;
1100+
}
1101+
}
1102+
1103+
// Check any trailing bits at the end of the prefix
1104+
if (!isPrefixAligned) {
1105+
const unusedBitsCount = 8 - (prefixSize % 8);
1106+
if (
1107+
bits.rawBuffer[byteCount] >> unusedBitsCount !==
1108+
prefix.rawBuffer[byteCount] >> unusedBitsCount
1109+
) {
9581110
return false;
9591111
}
9601112
}

0 commit comments

Comments
 (0)