Skip to content

Commit 00f8f3f

Browse files
committed
Add bit_array.split, rework tests for split_once
1 parent 8536d3c commit 00f8f3f

File tree

5 files changed

+153
-12
lines changed

5 files changed

+153
-12
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## Unreleased
44

5+
- The `bit_array` module gains the `split` and `split_once` functions.
56
- The deprecated `drop_left`, `drop_right`, `pad_left`, `pad_right`,
67
`trim_left`, and `trim_right` functions have been removed.
78
- Fixed a bug that would result in `list.unique` having quadratic runtime.

src/gleam/bit_array.gleam

+28
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,16 @@ pub fn slice(
6464
/// The result will not include the pattern, and returns an error if the
6565
/// pattern is not found.
6666
///
67+
/// This function runs in linear time.
68+
///
6769
/// ## Examples
6870
///
6971
/// ```gleam
7072
/// split_once(from: <<1, 2, 3>>, on: <<2>>)
7173
/// // -> Ok(#(<<1>>, <<3>>))
74+
///
75+
/// split_once(from: <<0>>, on: <<1>>)
76+
/// // -> Error(Nil)
7277
/// ```
7378
@external(erlang, "gleam_stdlib", "bit_array_split_once")
7479
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once")
@@ -77,6 +82,29 @@ pub fn split_once(
7782
on pattern: BitArray,
7883
) -> Result(#(BitArray, BitArray), Nil)
7984

85+
/// Splits a bit array into parts at the locations of the pattern.
86+
///
87+
/// The result will not include the pattern, and returns an empty
88+
/// list if the pattern is not found.
89+
///
90+
/// This function runs in linear time.
91+
///
92+
/// ## Examples
93+
///
94+
/// ```gleam
95+
/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>)
96+
/// // -> Ok([<<1>>, <<2>>, <<3>>])
97+
///
98+
/// split(from: <<0>>, on: <<1>>)
99+
/// // -> Ok([])
100+
/// ```
101+
@external(erlang, "gleam_stdlib", "bit_array_split")
102+
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split")
103+
pub fn split(
104+
from bits: BitArray,
105+
on pattern: BitArray,
106+
) -> Result(List(BitArray), Nil)
107+
80108
/// Tests to see whether a bit array is valid UTF-8.
81109
///
82110
pub fn is_utf8(bits: BitArray) -> Bool {

src/gleam_stdlib.erl

+8-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
inspect/1, float_to_string/1, int_from_base_string/2,
1515
utf_codepoint_list_to_string/1, contains_string/2, crop_string/2,
1616
base16_encode/1, base16_decode/1, string_replace/3, slice/3,
17-
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2
17+
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2,
18+
bit_array_split/2
1819
]).
1920

2021
%% Taken from OTP's uri_string module
@@ -235,12 +236,17 @@ bit_array_split_once(Bin, Sub) ->
235236
try
236237
case binary:split(Bin, [Sub]) of
237238
[<<>>, <<>>] -> {error, nil};
238-
[Part1, Part2] -> {ok, {Part1, Part2}};
239+
[A, B] -> {ok, {A, B}};
239240
_ -> {error, nil}
240241
end
241242
catch error:badarg -> {error, nil}
242243
end.
243244

245+
bit_array_split(Bin, Sub) ->
246+
try {ok, binary:split(Bin, [Sub], [global, trim_all])}
247+
catch error:badarg -> {error, nil}
248+
end.
249+
244250
base_decode64(S) ->
245251
try {ok, base64:decode(S)}
246252
catch error:_ -> {error, nil}

src/gleam_stdlib.mjs

+60-5
Original file line numberDiff line numberDiff line change
@@ -427,16 +427,19 @@ export function bit_array_slice(bits, position, length) {
427427

428428
export function bit_array_split_once(bits, pattern) {
429429
try {
430-
if (!(bits instanceof BitArray) || !(pattern instanceof BitArray) || pattern.buffer.length < 1 || pattern.buffer.length >= bits.buffer.length) {
430+
if (!(bits instanceof BitArray)
431+
|| !(pattern instanceof BitArray)
432+
|| pattern.buffer.length < 1
433+
|| pattern.buffer.length >= bits.buffer.length) {
431434
return new Error(Nil);
432435
}
433436

434-
let i = 0;
435437
const n = bits.buffer.length - pattern.buffer.length + 1;
436-
437-
find: for (; i < n; i++) {
438+
find: for (let i = 0; i < n; i++) {
438439
for (let j = 0; j < pattern.buffer.length; j++) {
439-
if (bits.buffer[i + j] !== pattern.buffer[j]) continue find;
440+
if (bits.buffer[i + j] !== pattern.buffer[j]) {
441+
continue find;
442+
}
440443
}
441444
const before = bits.buffer.slice(0, i);
442445
const after = bits.buffer.slice(i + pattern.buffer.length);
@@ -449,6 +452,58 @@ export function bit_array_split_once(bits, pattern) {
449452
}
450453
}
451454

455+
export function bit_array_split(bits, pattern) {
456+
try {
457+
if (!(bits instanceof BitArray)
458+
|| !(pattern instanceof BitArray)
459+
|| pattern.buffer.length < 1) {
460+
return new Error(Nil);
461+
}
462+
463+
if (bits.buffer.length === 0) {
464+
return new Ok(List.fromArray([]));
465+
}
466+
467+
if (bits.buffer.length === pattern.buffer.length) {
468+
let isEqual = true;
469+
for (let i = 0; i < bits.buffer.length; i++) {
470+
if (bits.buffer[i] !== pattern.buffer[i]) {
471+
isEqual = false;
472+
break;
473+
}
474+
}
475+
if (isEqual) {
476+
return new Ok(List.fromArray([]));
477+
}
478+
}
479+
480+
const results = [];
481+
let lastIndex = 0;
482+
const n = bits.buffer.length - pattern.buffer.length + 1;
483+
484+
find: for (let i = 0; i < n; i++) {
485+
for (let j = 0; j < pattern.buffer.length; j++) {
486+
if (bits.buffer[i + j] !== pattern.buffer[j]) {
487+
continue find;
488+
}
489+
}
490+
if (i > lastIndex) {
491+
results.push(new BitArray(bits.buffer.slice(lastIndex, i)));
492+
}
493+
lastIndex = i + pattern.buffer.length;
494+
i = lastIndex - 1;
495+
}
496+
497+
if (lastIndex < bits.buffer.length) {
498+
results.push(new BitArray(bits.buffer.slice(lastIndex)));
499+
}
500+
501+
return new Ok(List.fromArray(results.length ? results : [bits]));
502+
} catch (e) {
503+
return new Error(Nil);
504+
}
505+
}
506+
452507
export function codepoint(int) {
453508
return new UtfCodepoint(int);
454509
}

test/gleam/bit_array_test.gleam

+56-5
Original file line numberDiff line numberDiff line change
@@ -215,16 +215,24 @@ pub fn split_once_test() {
215215
|> bit_array.split_once(<<"h":utf8>>)
216216
|> should.equal(Ok(#(<<>>, <<"ello":utf8>>)))
217217

218-
<<"hello":utf8>>
218+
<<0, 1, 0, 2, 0, 3>>
219+
|> bit_array.split_once(<<0, 2>>)
220+
|> should.equal(Ok(#(<<0, 1>>, <<0, 3>>)))
221+
222+
<<0, 1, 2, 0, 3, 4, 5>>
223+
|> bit_array.split_once(<<>>)
224+
|> should.equal(Error(Nil))
225+
226+
<<>>
219227
|> bit_array.split_once(<<1>>)
220228
|> should.equal(Error(Nil))
221229

222-
<<"hello":utf8>>
223-
|> bit_array.split_once(<<"":utf8>>)
230+
<<1>>
231+
|> bit_array.split_once(<<1>>)
224232
|> should.equal(Error(Nil))
225233

226-
<<"hello":utf8>>
227-
|> bit_array.split_once(<<"hello":utf8>>)
234+
<<0>>
235+
|> bit_array.split_once(<<1>>)
228236
|> should.equal(Error(Nil))
229237
}
230238

@@ -237,6 +245,49 @@ pub fn split_once_erlang_only_test() {
237245
|> should.equal(Error(Nil))
238246
}
239247

248+
pub fn split_test() {
249+
<<"hello":utf8>>
250+
|> bit_array.split(<<"l":utf8>>)
251+
|> should.equal(Ok([<<"he":utf8>>, <<"o":utf8>>]))
252+
253+
<<0, 1, 0, 2, 0, 3>>
254+
|> bit_array.split(<<0>>)
255+
|> should.equal(Ok([<<1>>, <<2>>, <<3>>]))
256+
257+
<<1, 0>>
258+
|> bit_array.split(<<0>>)
259+
|> should.equal(Ok([<<1>>]))
260+
261+
<<0, 1, 0, 2, 0, 3>>
262+
|> bit_array.split(<<0, 2>>)
263+
|> should.equal(Ok([<<0, 1>>, <<0, 3>>]))
264+
265+
<<1>>
266+
|> bit_array.split(<<0>>)
267+
|> should.equal(Ok([<<1>>]))
268+
269+
<<1>>
270+
|> bit_array.split(<<1>>)
271+
|> should.equal(Ok([]))
272+
273+
<<>>
274+
|> bit_array.split(<<1>>)
275+
|> should.equal(Ok([]))
276+
277+
<<0, 1, 2, 0, 3, 4, 5>>
278+
|> bit_array.split(<<>>)
279+
|> should.equal(Error(Nil))
280+
}
281+
282+
// This test is target specific since it's using non byte-aligned BitArrays
283+
// and those are not supported on the JavaScript target.
284+
@target(erlang)
285+
pub fn split_erlang_only_test() {
286+
<<0, 1, 2:7>>
287+
|> bit_array.split(<<1>>)
288+
|> should.equal(Error(Nil))
289+
}
290+
240291
pub fn to_string_test() {
241292
<<>>
242293
|> bit_array.to_string

0 commit comments

Comments
 (0)