Skip to content

Commit 26fc288

Browse files
committed
Add bit_array.to_string_lossy
1 parent 9f465da commit 26fc288

File tree

3 files changed

+61
-0
lines changed

3 files changed

+61
-0
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
the JavaScript target.
88
- Fixed a bug where tuples with atoms in the first position could be formatted
99
incorrectly by `string.inspect`.
10+
- The `bit_array` module gains the `to_string_lossy` function.
1011

1112
## v0.56.0 - 2025-03-09
1213

src/gleam/bit_array.gleam

+40
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,46 @@ pub fn to_string(bits: BitArray) -> Result(String, Nil) {
9595
@external(erlang, "gleam_stdlib", "identity")
9696
fn unsafe_to_string(a: BitArray) -> String
9797

98+
/// Converts a bit array to a string. Invalid bits are passed to the provided
99+
/// callback and its result is included in the final string in place of the
100+
/// invalid data.
101+
///
102+
/// ## Examples
103+
///
104+
/// ```gleam
105+
/// to_string_lossy(<<"A":utf8, 0x80, "1":utf8, 0:size(5)>>, fn(_) { "�" })
106+
/// // -> "A�1�"
107+
/// ```
108+
///
109+
pub fn to_string_lossy(
110+
bits: BitArray,
111+
map_invalid_bits: fn(BitArray) -> String,
112+
) -> String {
113+
to_string_lossy_impl(bits, map_invalid_bits, "")
114+
}
115+
116+
fn to_string_lossy_impl(
117+
bits: BitArray,
118+
map_invalid_bits: fn(BitArray) -> String,
119+
acc: String,
120+
) -> String {
121+
case bits {
122+
<<>> -> acc
123+
124+
<<x:utf8_codepoint, rest:bits>> ->
125+
to_string_lossy_impl(
126+
rest,
127+
map_invalid_bits,
128+
acc <> string.from_utf_codepoints([x]),
129+
)
130+
131+
<<x:bytes-1, rest:bits>> ->
132+
to_string_lossy_impl(rest, map_invalid_bits, acc <> map_invalid_bits(x))
133+
134+
_ -> acc <> map_invalid_bits(bits)
135+
}
136+
}
137+
98138
/// Creates a new bit array by joining multiple binaries.
99139
///
100140
/// ## Examples

test/gleam/bit_array_test.gleam

+20
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,26 @@ pub fn to_string_test() {
207207
|> should.equal(Ok("ø"))
208208
}
209209

210+
pub fn to_string_lossy_test() {
211+
<<>>
212+
|> bit_array.to_string_lossy(fn(_) { "�" })
213+
|> should.equal("")
214+
215+
<<0x80, "A":utf8, 0x81>>
216+
|> bit_array.to_string_lossy(fn(_) { "�" })
217+
|> should.equal("�A�")
218+
219+
// Test some codepoints that require 2/3/4 bytes to be stored as UTF-8
220+
<<"£И한𐍈":utf8>>
221+
|> bit_array.to_string_lossy(fn(_) { "�" })
222+
|> should.equal("£И한𐍈")
223+
224+
// Test unaligned bit array
225+
<<"ø":utf8, 50:4>>
226+
|> bit_array.to_string_lossy(fn(_) { "�" })
227+
|> should.equal("ø�")
228+
}
229+
210230
pub fn is_utf8_test() {
211231
<<>>
212232
|> bit_array.is_utf8

0 commit comments

Comments
 (0)