Skip to content

Commit 4a43ea8

Browse files
committed
Unaligned bit arrays on the JavaScript target
1 parent 7914051 commit 4a43ea8

File tree

7 files changed

+153
-120
lines changed

7 files changed

+153
-120
lines changed

.github/workflows/ci.yml

+6-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ jobs:
4444
- uses: erlef/setup-beam@v1
4545
with:
4646
otp-version: "27.0"
47-
gleam-version: "1.6.0"
47+
gleam-version: nightly
48+
version-type: strict
4849
- uses: actions/setup-node@v4
4950
with:
5051
node-version: ${{ matrix.node_version }}
@@ -62,7 +63,8 @@ jobs:
6263
- uses: erlef/setup-beam@v1
6364
with:
6465
otp-version: "27.0"
65-
gleam-version: "1.6.0"
66+
gleam-version: nightly
67+
version-type: strict
6668
- uses: oven-sh/setup-bun@v2
6769
with:
6870
bun-version: ${{ matrix.bun_version }}
@@ -80,7 +82,8 @@ jobs:
8082
- uses: erlef/setup-beam@v1
8183
with:
8284
otp-version: "27.0"
83-
gleam-version: "1.6.0"
85+
gleam-version: nightly
86+
version-type: strict
8487
- uses: denoland/setup-deno@v1
8588
with:
8689
deno-version: ${{ matrix.deno_version }}

CHANGELOG.md

+3-4
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22

33
## Unreleased
44

5-
- The performance of `dict.is_empty` has been improved.
6-
7-
## v0.54.0 - 2025-02-04
8-
95
- The `uri` module gains the `empty` value, representing an empty URI which
106
equivalent to `""`.
7+
- The performance of `dict.is_empty` has been improved.
8+
- Unaligned bit arrays on the JavaScript target are now supported by the
9+
functions in the `bit_array` module.
1110

1211
## v0.54.0 - 2025-02-04
1312

gleam.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "gleam_stdlib"
22
version = "0.54.0"
3-
gleam = ">= 0.32.0"
3+
gleam = ">= 1.9.0"
44
licences = ["Apache-2.0"]
55
description = "A standard library for the Gleam programming language"
66

src/gleam/bit_array.gleam

+6-8
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray
1313
/// Returns an integer which is the number of bits in the bit array.
1414
///
1515
@external(erlang, "erlang", "bit_size")
16-
pub fn bit_size(x: BitArray) -> Int {
17-
byte_size(x) * 8
18-
}
16+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
17+
pub fn bit_size(x: BitArray) -> Int
1918

2019
/// Returns an integer which is the number of bytes in the bit array.
2120
///
2221
@external(erlang, "erlang", "byte_size")
23-
@external(javascript, "../gleam_stdlib.mjs", "length")
22+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size")
2423
pub fn byte_size(x: BitArray) -> Int
2524

2625
/// Pads a bit array with zeros so that it is a whole number of bytes.
2726
///
2827
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
29-
pub fn pad_to_bytes(x: BitArray) -> BitArray {
30-
x
31-
}
28+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
29+
pub fn pad_to_bytes(x: BitArray) -> BitArray
3230

3331
/// Creates a new bit array by joining two bit arrays.
3432
///
@@ -228,7 +226,6 @@ fn inspect_loop(input: BitArray, accumulator: String) -> String {
228226
/// // -> Eq
229227
/// ```
230228
///
231-
@external(javascript, "../gleam_stdlib.mjs", "bit_array_compare")
232229
pub fn compare(a: BitArray, with b: BitArray) -> order.Order {
233230
case a, b {
234231
<<first_byte, first_rest:bits>>, <<second_byte, second_rest:bits>> ->
@@ -257,6 +254,7 @@ pub fn compare(a: BitArray, with b: BitArray) -> order.Order {
257254
}
258255

259256
@external(erlang, "gleam_stdlib", "bit_array_to_int_and_size")
257+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_to_int_and_size")
260258
fn bit_array_to_int_and_size(a: BitArray) -> #(Int, Int)
261259

262260
/// Checks whether the first `BitArray` starts with the second one.

src/gleam_stdlib.mjs

+126-44
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
UtfCodepoint,
88
stringBits,
99
toBitArray,
10+
bitArraySlice,
1011
NonEmpty,
1112
CustomType,
1213
} from "./gleam.mjs";
@@ -316,8 +317,49 @@ export function bit_array_from_string(string) {
316317
return toBitArray([stringBits(string)]);
317318
}
318319

320+
export function bit_array_bit_size(bit_array) {
321+
return bit_array.bitSize;
322+
}
323+
324+
export function bit_array_byte_size(bit_array) {
325+
return bit_array.byteSize;
326+
}
327+
328+
export function bit_array_pad_to_bytes(bit_array) {
329+
const trailingBitsCount = bit_array.bitSize % 8;
330+
331+
// If the bit array is a whole number of bytes it can be returned unchanged
332+
if (trailingBitsCount === 0) {
333+
return bit_array;
334+
}
335+
336+
const finalByte = bit_array.byteAt(bit_array.byteSize - 1);
337+
338+
const unusedBitsCount = 8 - trailingBitsCount;
339+
const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount;
340+
341+
// If the unused bits in the final byte are already set to zero then the
342+
// existing buffer can be re-used, avoiding a copy
343+
if (finalByte === correctFinalByte) {
344+
return new BitArray(
345+
bit_array.rawBuffer,
346+
bit_array.byteSize * 8,
347+
bit_array.bitOffset,
348+
);
349+
}
350+
351+
// Copy the bit array into a new aligned buffer and set the correct final byte
352+
const buffer = new Uint8Array(bit_array.byteSize);
353+
for (let i = 0; i < buffer.length - 1; i++) {
354+
buffer[i] = bit_array.byteAt(i);
355+
}
356+
buffer[buffer.length - 1] = correctFinalByte;
357+
358+
return new BitArray(buffer);
359+
}
360+
319361
export function bit_array_concat(bit_arrays) {
320-
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
362+
return toBitArray(bit_arrays.toArray());
321363
}
322364

323365
export function console_log(term) {
@@ -333,9 +375,25 @@ export function crash(message) {
333375
}
334376

335377
export function bit_array_to_string(bit_array) {
378+
// If the bit array isn't a whole number of bytes then return an error
379+
if (bit_array.bitSize % 8 !== 0) {
380+
return new Error(Nil);
381+
}
382+
336383
try {
337384
const decoder = new TextDecoder("utf-8", { fatal: true });
338-
return new Ok(decoder.decode(bit_array.buffer));
385+
386+
if (bit_array.bitOffset === 0) {
387+
return new Ok(decoder.decode(bit_array.rawBuffer));
388+
} else {
389+
// The input data isn't aligned, so copy it into a new aligned buffer so
390+
// that TextDecoder can be used
391+
const buffer = new Uint8Array(bit_array.byteSize);
392+
for (let i = 0; i < buffer.length; i++) {
393+
buffer[i] = bit_array.byteAt(i);
394+
}
395+
return new Ok(decoder.decode(buffer));
396+
}
339397
} catch {
340398
return new Error(Nil);
341399
}
@@ -413,16 +471,14 @@ export function random_uniform() {
413471
}
414472

415473
export function bit_array_slice(bits, position, length) {
416-
const start = Math.min(position, position + length);
417-
const end = Math.max(position, position + length);
418-
if (start < 0 || end > bits.length) return new Error(Nil);
419-
const byteOffset = bits.buffer.byteOffset + start;
420-
const buffer = new Uint8Array(
421-
bits.buffer.buffer,
422-
byteOffset,
423-
Math.abs(length),
424-
);
425-
return new Ok(new BitArray(buffer));
474+
let start = Math.min(position, position + length);
475+
let end = Math.max(position, position + length);
476+
477+
if (start < 0 || end * 8 > bits.bitSize) {
478+
return new Error(Nil);
479+
}
480+
481+
return new Ok(bitArraySlice(bits, start * 8, end * 8));
426482
}
427483

428484
export function codepoint(int) {
@@ -522,16 +578,20 @@ let b64TextDecoder;
522578
export function encode64(bit_array, padding) {
523579
b64TextDecoder ??= new TextDecoder();
524580

525-
const bytes = bit_array.buffer;
581+
bit_array = bit_array_pad_to_bytes(bit_array);
526582

527-
const m = bytes.length;
583+
const m = bit_array.byteSize;
528584
const k = m % 3;
529585
const n = Math.floor(m / 3) * 4 + (k && k + 1);
530586
const N = Math.ceil(m / 3) * 4;
531587
const encoded = new Uint8Array(N);
532588

533589
for (let i = 0, j = 0; j < m; i += 4, j += 3) {
534-
const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0);
590+
const y =
591+
(bit_array.byteAt(j) << 16) +
592+
(bit_array.byteAt(j + 1) << 8) +
593+
(bit_array.byteAt(j + 2) | 0);
594+
535595
encoded[i] = b64EncodeLookup[y >> 18];
536596
encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f];
537597
encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f];
@@ -804,7 +864,7 @@ export function inspect(v) {
804864
if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`;
805865
if (v instanceof List) return inspectList(v);
806866
if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v);
807-
if (v instanceof BitArray) return inspectBitArray(v);
867+
if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`;
808868
if (v instanceof CustomType) return inspectCustomType(v);
809869
if (v instanceof Dict) return inspectDict(v);
810870
if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`;
@@ -895,19 +955,26 @@ export function inspectList(list) {
895955
return `[${list.toArray().map(inspect).join(", ")}]`;
896956
}
897957

898-
export function inspectBitArray(bits) {
899-
return `<<${Array.from(bits.buffer).join(", ")}>>`;
900-
}
901-
902958
export function inspectUtfCodepoint(codepoint) {
903959
return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`;
904960
}
905961

906962
export function base16_encode(bit_array) {
963+
const trailingBitsCount = bit_array.bitSize % 8;
964+
907965
let result = "";
908-
for (const byte of bit_array.buffer) {
966+
967+
for (let i = 0; i < bit_array.byteSize; i++) {
968+
let byte = bit_array.byteAt(i);
969+
970+
if (i === bit_array.byteSize - 1 && trailingBitsCount !== 0) {
971+
const unusedBitsCount = 8 - trailingBitsCount;
972+
byte = (byte >> unusedBitsCount) << unusedBitsCount;
973+
}
974+
909975
result += byte.toString(16).padStart(2, "0").toUpperCase();
910976
}
977+
911978
return result;
912979
}
913980

@@ -923,38 +990,53 @@ export function base16_decode(string) {
923990
}
924991

925992
export function bit_array_inspect(bits, acc) {
926-
return `${acc}${[...bits.buffer].join(", ")}`;
927-
}
993+
if (bits.bitSize === 0) {
994+
return acc;
995+
}
928996

929-
export function bit_array_compare(first, second) {
930-
for (let i = 0; i < first.length; i++) {
931-
if (i >= second.length) {
932-
return new Gt(); // first has more items
933-
}
934-
const f = first.buffer[i];
935-
const s = second.buffer[i];
936-
if (f > s) {
937-
return new Gt();
938-
}
939-
if (f < s) {
940-
return new Lt();
941-
}
997+
for (let i = 0; i < bits.byteSize - 1; i++) {
998+
acc += bits.byteAt(i).toString();
999+
acc += ", ";
9421000
}
943-
// This means that either first did not have any items
944-
// or all items in first were equal to second.
945-
if (first.length === second.length) {
946-
return new Eq();
1001+
1002+
if (bits.byteSize * 8 === bits.bitSize) {
1003+
acc += bits.byteAt(bits.byteSize - 1).toString();
1004+
} else {
1005+
const trailingBitsCount = bits.bitSize % 8;
1006+
acc += bits.byteAt(bits.byteSize - 1) >> (8 - trailingBitsCount);
1007+
acc += `:size(${trailingBitsCount})`;
9471008
}
948-
return new Lt(); // second has more items
1009+
1010+
return acc;
1011+
}
1012+
1013+
export function bit_array_to_int_and_size(bits) {
1014+
const trailingBitsCount = bits.bitSize % 8;
1015+
const unusedBitsCount = trailingBitsCount === 0 ? 0 : 8 - trailingBitsCount;
1016+
1017+
return [bits.byteAt(0) >> unusedBitsCount, bits.bitSize];
9491018
}
9501019

9511020
export function bit_array_starts_with(bits, prefix) {
952-
if (prefix.length > bits.length) {
1021+
if (prefix.bitSize > bits.bitSize) {
9531022
return false;
9541023
}
9551024

956-
for (let i = 0; i < prefix.length; i++) {
957-
if (bits.buffer[i] !== prefix.buffer[i]) {
1025+
// Check any whole bytes
1026+
const byteCount = Math.trunc(prefix.bitSize / 8);
1027+
for (let i = 0; i < byteCount; i++) {
1028+
if (bits.byteAt(i) !== prefix.byteAt(i)) {
1029+
return false;
1030+
}
1031+
}
1032+
1033+
// Check any trailing bits at the end of the prefix
1034+
if (prefix.bitSize % 8 !== 0) {
1035+
const unusedBitsCount = 8 - (prefix.bitSize % 8);
1036+
if (
1037+
bits.byteAt(byteCount) >> unusedBitsCount !==
1038+
prefix.byteAt(byteCount) >> unusedBitsCount
1039+
) {
9581040
return false;
9591041
}
9601042
}

0 commit comments

Comments
 (0)