Skip to content

Commit 430a444

Browse files
committed
Unaligned bit arrays on the JavaScript target
1 parent 2af0691 commit 430a444

9 files changed

+184
-131
lines changed

.github/workflows/ci.yml

+8-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ jobs:
2828
- uses: erlef/setup-beam@v1
2929
with:
3030
otp-version: ${{ matrix.erlang_version }}
31-
gleam-version: "1.6.0"
31+
gleam-version: 1.9.0-rc1
32+
version-type: strict
3233
- run: gleam test --target erlang
3334
- run: gleam format --check src test
3435

@@ -44,7 +45,8 @@ jobs:
4445
- uses: erlef/setup-beam@v1
4546
with:
4647
otp-version: "27.0"
47-
gleam-version: "1.6.0"
48+
gleam-version: 1.9.0-rc1
49+
version-type: strict
4850
- uses: actions/setup-node@v4
4951
with:
5052
node-version: ${{ matrix.node_version }}
@@ -62,7 +64,8 @@ jobs:
6264
- uses: erlef/setup-beam@v1
6365
with:
6466
otp-version: "27.0"
65-
gleam-version: "1.6.0"
67+
gleam-version: 1.9.0-rc1
68+
version-type: strict
6669
- uses: oven-sh/setup-bun@v2
6770
with:
6871
bun-version: ${{ matrix.bun_version }}
@@ -80,7 +83,8 @@ jobs:
8083
- uses: erlef/setup-beam@v1
8184
with:
8285
otp-version: "27.0"
83-
gleam-version: "1.6.0"
86+
gleam-version: 1.9.0-rc1
87+
version-type: strict
8488
- uses: denoland/setup-deno@v1
8589
with:
8690
deno-version: ${{ matrix.deno_version }}

CHANGELOG.md

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
- The functions in the `bit_array` module now support unaligned bit arrays on
6+
the JavaScript target.
7+
38
## v0.55.0 - 2025-02-21
49

510
- The performance of `dict.is_empty` has been improved.
611
- The `flip` function in the `function` module has been deprecated.
7-
8-
## v0.54.0 - 2025-02-04
9-
1012
- The `uri` module gains the `empty` value, representing an empty URI which
1113
equivalent to `""`.
14+
- The performance of `dict.is_empty` has been improved.
1215

1316
## v0.54.0 - 2025-02-04
1417

gleam.toml

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "gleam_stdlib"
22
version = "0.55.0"
3-
gleam = ">= 0.32.0"
3+
gleam = ">= 1.9.0"
44
licences = ["Apache-2.0"]
55
description = "A standard library for the Gleam programming language"
66

@@ -11,6 +11,4 @@ links = [
1111
]
1212

1313
[javascript.deno]
14-
allow_read = [
15-
"./",
16-
]
14+
allow_read = ["./"]

src/gleam/bit_array.gleam

+6-8
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray
1313
/// Returns an integer which is the number of bits in the bit array.
1414
///
1515
@external(erlang, "erlang", "bit_size")
16-
pub fn bit_size(x: BitArray) -> Int {
17-
byte_size(x) * 8
18-
}
16+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
17+
pub fn bit_size(x: BitArray) -> Int
1918

2019
/// Returns an integer which is the number of bytes in the bit array.
2120
///
2221
@external(erlang, "erlang", "byte_size")
23-
@external(javascript, "../gleam_stdlib.mjs", "length")
22+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size")
2423
pub fn byte_size(x: BitArray) -> Int
2524

2625
/// Pads a bit array with zeros so that it is a whole number of bytes.
2726
///
2827
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
29-
pub fn pad_to_bytes(x: BitArray) -> BitArray {
30-
x
31-
}
28+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
29+
pub fn pad_to_bytes(x: BitArray) -> BitArray
3230

3331
/// Creates a new bit array by joining two bit arrays.
3432
///
@@ -228,7 +226,6 @@ fn inspect_loop(input: BitArray, accumulator: String) -> String {
228226
/// // -> Eq
229227
/// ```
230228
///
231-
@external(javascript, "../gleam_stdlib.mjs", "bit_array_compare")
232229
pub fn compare(a: BitArray, with b: BitArray) -> order.Order {
233230
case a, b {
234231
<<first_byte, first_rest:bits>>, <<second_byte, second_rest:bits>> ->
@@ -257,6 +254,7 @@ pub fn compare(a: BitArray, with b: BitArray) -> order.Order {
257254
}
258255

259256
@external(erlang, "gleam_stdlib", "bit_array_to_int_and_size")
257+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_to_int_and_size")
260258
fn bit_array_to_int_and_size(a: BitArray) -> #(Int, Int)
261259

262260
/// Checks whether the first `BitArray` starts with the second one.

src/gleam_stdlib.mjs

+125-42
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
UtfCodepoint,
88
stringBits,
99
toBitArray,
10+
bitArraySlice,
1011
NonEmpty,
1112
CustomType,
1213
} from "./gleam.mjs";
@@ -316,8 +317,50 @@ export function bit_array_from_string(string) {
316317
return toBitArray([stringBits(string)]);
317318
}
318319

320+
export function bit_array_bit_size(bit_array) {
321+
return bit_array.bitSize;
322+
}
323+
324+
export function bit_array_byte_size(bit_array) {
325+
return bit_array.byteSize;
326+
}
327+
328+
export function bit_array_pad_to_bytes(bit_array) {
329+
const trailingBitsCount = bit_array.bitSize % 8;
330+
331+
// If the bit array is a whole number of bytes it can be returned unchanged
332+
if (trailingBitsCount === 0) {
333+
return bit_array;
334+
}
335+
336+
const finalByte = bit_array.byteAt(bit_array.byteSize - 1);
337+
338+
// The required final byte has its unused trailing bits set to zero
339+
const unusedBitsCount = 8 - trailingBitsCount;
340+
const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount;
341+
342+
// If the unused bits in the final byte are already set to zero then the
343+
// existing buffer can be re-used, avoiding a copy
344+
if (finalByte === correctFinalByte) {
345+
return new BitArray(
346+
bit_array.rawBuffer,
347+
bit_array.byteSize * 8,
348+
bit_array.bitOffset,
349+
);
350+
}
351+
352+
// Copy the bit array into a new aligned buffer and set the correct final byte
353+
const buffer = new Uint8Array(bit_array.byteSize);
354+
for (let i = 0; i < buffer.length - 1; i++) {
355+
buffer[i] = bit_array.byteAt(i);
356+
}
357+
buffer[buffer.length - 1] = correctFinalByte;
358+
359+
return new BitArray(buffer);
360+
}
361+
319362
export function bit_array_concat(bit_arrays) {
320-
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
363+
return toBitArray(bit_arrays.toArray());
321364
}
322365

323366
export function console_log(term) {
@@ -333,9 +376,25 @@ export function crash(message) {
333376
}
334377

335378
export function bit_array_to_string(bit_array) {
379+
// If the bit array isn't a whole number of bytes then return an error
380+
if (bit_array.bitSize % 8 !== 0) {
381+
return new Error(Nil);
382+
}
383+
336384
try {
337385
const decoder = new TextDecoder("utf-8", { fatal: true });
338-
return new Ok(decoder.decode(bit_array.buffer));
386+
387+
if (bit_array.bitOffset === 0) {
388+
return new Ok(decoder.decode(bit_array.rawBuffer));
389+
} else {
390+
// The input data isn't aligned, so copy it into a new aligned buffer so
391+
// that TextDecoder can be used
392+
const buffer = new Uint8Array(bit_array.byteSize);
393+
for (let i = 0; i < buffer.length; i++) {
394+
buffer[i] = bit_array.byteAt(i);
395+
}
396+
return new Ok(decoder.decode(buffer));
397+
}
339398
} catch {
340399
return new Error(Nil);
341400
}
@@ -415,14 +474,12 @@ export function random_uniform() {
415474
export function bit_array_slice(bits, position, length) {
416475
const start = Math.min(position, position + length);
417476
const end = Math.max(position, position + length);
418-
if (start < 0 || end > bits.length) return new Error(Nil);
419-
const byteOffset = bits.buffer.byteOffset + start;
420-
const buffer = new Uint8Array(
421-
bits.buffer.buffer,
422-
byteOffset,
423-
Math.abs(length),
424-
);
425-
return new Ok(new BitArray(buffer));
477+
478+
if (start < 0 || end * 8 > bits.bitSize) {
479+
return new Error(Nil);
480+
}
481+
482+
return new Ok(bitArraySlice(bits, start * 8, end * 8));
426483
}
427484

428485
export function codepoint(int) {
@@ -522,16 +579,20 @@ let b64TextDecoder;
522579
export function encode64(bit_array, padding) {
523580
b64TextDecoder ??= new TextDecoder();
524581

525-
const bytes = bit_array.buffer;
582+
bit_array = bit_array_pad_to_bytes(bit_array);
526583

527-
const m = bytes.length;
584+
const m = bit_array.byteSize;
528585
const k = m % 3;
529586
const n = Math.floor(m / 3) * 4 + (k && k + 1);
530587
const N = Math.ceil(m / 3) * 4;
531588
const encoded = new Uint8Array(N);
532589

533590
for (let i = 0, j = 0; j < m; i += 4, j += 3) {
534-
const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0);
591+
const y =
592+
(bit_array.byteAt(j) << 16) +
593+
(bit_array.byteAt(j + 1) << 8) +
594+
(bit_array.byteAt(j + 2) | 0);
595+
535596
encoded[i] = b64EncodeLookup[y >> 18];
536597
encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f];
537598
encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f];
@@ -804,7 +865,7 @@ export function inspect(v) {
804865
if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`;
805866
if (v instanceof List) return inspectList(v);
806867
if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v);
807-
if (v instanceof BitArray) return inspectBitArray(v);
868+
if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`;
808869
if (v instanceof CustomType) return inspectCustomType(v);
809870
if (v instanceof Dict) return inspectDict(v);
810871
if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`;
@@ -895,19 +956,26 @@ export function inspectList(list) {
895956
return `[${list.toArray().map(inspect).join(", ")}]`;
896957
}
897958

898-
export function inspectBitArray(bits) {
899-
return `<<${Array.from(bits.buffer).join(", ")}>>`;
900-
}
901-
902959
export function inspectUtfCodepoint(codepoint) {
903960
return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`;
904961
}
905962

906963
export function base16_encode(bit_array) {
964+
const trailingBitsCount = bit_array.bitSize % 8;
965+
907966
let result = "";
908-
for (const byte of bit_array.buffer) {
967+
968+
for (let i = 0; i < bit_array.byteSize; i++) {
969+
let byte = bit_array.byteAt(i);
970+
971+
if (i === bit_array.byteSize - 1 && trailingBitsCount !== 0) {
972+
const unusedBitsCount = 8 - trailingBitsCount;
973+
byte = (byte >> unusedBitsCount) << unusedBitsCount;
974+
}
975+
909976
result += byte.toString(16).padStart(2, "0").toUpperCase();
910977
}
978+
911979
return result;
912980
}
913981

@@ -923,38 +991,53 @@ export function base16_decode(string) {
923991
}
924992

925993
export function bit_array_inspect(bits, acc) {
926-
return `${acc}${[...bits.buffer].join(", ")}`;
927-
}
994+
if (bits.bitSize === 0) {
995+
return acc;
996+
}
928997

929-
export function bit_array_compare(first, second) {
930-
for (let i = 0; i < first.length; i++) {
931-
if (i >= second.length) {
932-
return new Gt(); // first has more items
933-
}
934-
const f = first.buffer[i];
935-
const s = second.buffer[i];
936-
if (f > s) {
937-
return new Gt();
938-
}
939-
if (f < s) {
940-
return new Lt();
941-
}
998+
for (let i = 0; i < bits.byteSize - 1; i++) {
999+
acc += bits.byteAt(i).toString();
1000+
acc += ", ";
9421001
}
943-
// This means that either first did not have any items
944-
// or all items in first were equal to second.
945-
if (first.length === second.length) {
946-
return new Eq();
1002+
1003+
if (bits.byteSize * 8 === bits.bitSize) {
1004+
acc += bits.byteAt(bits.byteSize - 1).toString();
1005+
} else {
1006+
const trailingBitsCount = bits.bitSize % 8;
1007+
acc += bits.byteAt(bits.byteSize - 1) >> (8 - trailingBitsCount);
1008+
acc += `:size(${trailingBitsCount})`;
9471009
}
948-
return new Lt(); // second has more items
1010+
1011+
return acc;
1012+
}
1013+
1014+
export function bit_array_to_int_and_size(bits) {
1015+
const trailingBitsCount = bits.bitSize % 8;
1016+
const unusedBitsCount = trailingBitsCount === 0 ? 0 : 8 - trailingBitsCount;
1017+
1018+
return [bits.byteAt(0) >> unusedBitsCount, bits.bitSize];
9491019
}
9501020

9511021
export function bit_array_starts_with(bits, prefix) {
952-
if (prefix.length > bits.length) {
1022+
if (prefix.bitSize > bits.bitSize) {
9531023
return false;
9541024
}
9551025

956-
for (let i = 0; i < prefix.length; i++) {
957-
if (bits.buffer[i] !== prefix.buffer[i]) {
1026+
// Check any whole bytes
1027+
const byteCount = Math.trunc(prefix.bitSize / 8);
1028+
for (let i = 0; i < byteCount; i++) {
1029+
if (bits.byteAt(i) !== prefix.byteAt(i)) {
1030+
return false;
1031+
}
1032+
}
1033+
1034+
// Check any trailing bits at the end of the prefix
1035+
if (prefix.bitSize % 8 !== 0) {
1036+
const unusedBitsCount = 8 - (prefix.bitSize % 8);
1037+
if (
1038+
bits.byteAt(byteCount) >> unusedBitsCount !==
1039+
prefix.byteAt(byteCount) >> unusedBitsCount
1040+
) {
9581041
return false;
9591042
}
9601043
}

0 commit comments

Comments
 (0)