Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- `uri.query_to_string` now correctly handles `+` in query params.
- The deprecated `result.then`, `result.unwrap_both`, `function.tap`,
`int.digits`, and `int.undigits` functions have been removed.
- The `bit_array` module gains the `split` and `split_once` functions.

## v0.67.1 - 2025-12-03

Expand Down
46 changes: 46 additions & 0 deletions src/gleam/bit_array.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,52 @@ pub fn slice(
take length: Int,
) -> Result(BitArray, Nil)

/// Splits a bit array into two parts at the location of the pattern.
///
/// The result will not include the pattern, and returns an error if the
/// pattern is not found.
///
/// This function runs in linear time.
///
/// ## Examples
///
/// ```gleam
/// split_once(from: <<1, 2, 3>>, on: <<2>>)
/// // -> Ok(#(<<1>>, <<3>>))
///
/// split_once(from: <<0>>, on: <<1>>)
/// // -> Error(Nil)
/// ```
@external(erlang, "gleam_stdlib", "bit_array_split_once")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not implement this in Gleam rather than Erlang? Could be a bunch nicer, and we wouldn't need to use any private APIs which should not be used.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because of this comment: #629 (comment)

I can give a stab at implementing it in Gleam if you'd prefer it. The Erlang binary:split is a BIF so performance-wise it makes sense to use it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I made a typo here. I meant to say JavaScript rather than Erlang 😅

pub fn split_once(
from bits: BitArray,
on pattern: BitArray,
) -> Result(#(BitArray, BitArray), Nil)

/// Splits a bit array into parts at the locations of the pattern.
///
/// The result will not include the pattern, and returns the input
/// as is if the pattern is not found.
///
/// This function runs in linear time.
///
/// ## Examples
///
/// ```gleam
/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>)
/// // -> Ok([<<1>>, <<2>>, <<3>>])
///
/// split(from: <<0>>, on: <<1>>)
/// // -> Ok([<<0>>])
/// ```
@external(erlang, "gleam_stdlib", "bit_array_split")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split")
pub fn split(
from bits: BitArray,
on pattern: BitArray,
) -> Result(List(BitArray), Nil)

/// Tests to see whether a bit array is valid UTF-8.
///
pub fn is_utf8(bits: BitArray) -> Bool {
Expand Down
18 changes: 17 additions & 1 deletion src/gleam_stdlib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2,
crop_string/2, base16_encode/1, base16_decode/1, string_replace/3, slice/3,
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, index/2, list/5,
dict/1, int/1, float/1, bit_array/1, is_null/1
dict/1, int/1, float/1, bit_array/1, is_null/1, bit_array_split/2,
bit_array_split_once/2
]).

%% Taken from OTP's uri_string module
Expand Down Expand Up @@ -154,6 +155,21 @@ bit_array_slice(Bin, Pos, Len) ->
catch error:badarg -> {error, nil}
end.

bit_array_split_once(Bin, Sub) ->
try
case binary:split(Bin, [Sub]) of
[<<>>, <<>>] -> {error, nil};
[A, B] -> {ok, {A, B}};
_ -> {error, nil}
end
catch error:badarg -> {error, nil}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this try-catch for?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It catches this case, where Erlang would raise.

Now that I reread about the Erlang implementation I realize that it can also raise a nif_error, so maybe the catch should be more generic to avoid raising altogether.

end.

bit_array_split(Bin, Sub) ->
try {ok, binary:split(Bin, [Sub], [global, trim_all])}
catch error:badarg -> {error, nil}
end.

base64_decode(S) ->
try {ok, base64:decode(S)}
catch error:_ -> {error, nil}
Expand Down
74 changes: 74 additions & 0 deletions src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,80 @@ export function bit_array_slice(bits, position, length) {
return new Ok(bitArraySlice(bits, start * 8, end * 8));
}

export function bit_array_split_once(bits, pattern) {
try {
const patternEmpty = pattern.buffer.length < 1
const patternLongerThanBits = pattern.buffer.length >= bits.buffer.length
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may not be the length of the bit array itself.

const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No runtime type checking please 🙏

if (incorrectArguments || patternEmpty || patternLongerThanBits) {
return new Error(Nil);
}

const n = bits.buffer.length - pattern.buffer.length + 1;
find: for (let i = 0; i < n; i++) {
for (let j = 0; j < pattern.buffer.length; j++) {
if (bits.buffer[i + j] !== pattern.buffer[j]) {
continue find;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like quite an expensive algorithm, it is checking bytes multiple times even when we know they could not match.

There's a few established algorithms we could use https://en.wikipedia.org/wiki/String-searching_algorithm. Boyer–Moore–Horspool seems fairly straightforward, but two-way algorithm seems to be the most popular approach.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's a naive approach. I'll have a go at one of the more efficient algorithms.

}
const before = bits.buffer.slice(0, i);
const after = bits.buffer.slice(i + pattern.buffer.length);
return new Ok([new BitArray(before), new BitArray(after)]);
}

return new Error(Nil);
} catch (e) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this try-catch for?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember if it catches a specific case. I'll check and remove it if not.

return new Error(Nil);
}
}

export function bit_array_split(bits, pattern) {
try {
const patternEmpty = pattern.buffer.length < 1
const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray)
if (incorrectArguments || patternEmpty) {
return new Error(Nil);
}

const bitsShorter = bits.buffer.length < pattern.buffer.length
if (bitsShorter) {
return new Ok(List.fromArray([bits]))
}

const results = [];
let lastIndex = 0;
const n = bits.buffer.length - pattern.buffer.length + 1;

find: for (let i = 0; i < n; i++) {
for (let j = 0; j < pattern.buffer.length; j++) {
if (bits.buffer[i + j] !== pattern.buffer[j]) {
continue find;
}
}

const bitsEqualsPattern = bits.buffer.length === pattern.buffer.length
if (bitsEqualsPattern) {
return new Ok(List.fromArray([]));
}

if (i > lastIndex) {
results.push(new BitArray(bits.buffer.slice(lastIndex, i)));
}

lastIndex = i + pattern.buffer.length;
i = lastIndex - 1;
}

if (lastIndex < bits.buffer.length) {
results.push(new BitArray(bits.buffer.slice(lastIndex)));
}

return new Ok(List.fromArray(results))
} catch (e) {
return new Error(Nil);
}
}

export function codepoint(int) {
return new UtfCodepoint(int);
}
Expand Down
86 changes: 86 additions & 0 deletions test/gleam/bit_array_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,92 @@ pub fn slice_test() {
== Error(Nil)
}

pub fn split_once_test() {
<<"hello":utf8>>
|> bit_array.split_once(<<"l":utf8>>)
|> should.equal(Ok(#(<<"he":utf8>>, <<"lo":utf8>>)))

<<"hello":utf8>>
|> bit_array.split_once(<<"o":utf8>>)
|> should.equal(Ok(#(<<"hell":utf8>>, <<>>)))

<<"hello":utf8>>
|> bit_array.split_once(<<"h":utf8>>)
|> should.equal(Ok(#(<<>>, <<"ello":utf8>>)))

<<0, 1, 0, 2, 0, 3>>
|> bit_array.split_once(<<0, 2>>)
|> should.equal(Ok(#(<<0, 1>>, <<0, 3>>)))

<<0, 1, 2, 0, 3, 4, 5>>
|> bit_array.split_once(<<>>)
|> should.equal(Error(Nil))

<<>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))

<<1>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))

<<0>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))
}

// This test is target specific since it's using non byte-aligned BitArrays
// and those are not supported on the JavaScript target.
@target(erlang)
pub fn split_once_erlang_only_test() {
<<0, 1, 2:7>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))
}

pub fn split_test() {
<<"hello":utf8>>
|> bit_array.split(<<"l":utf8>>)
|> should.equal(Ok([<<"he":utf8>>, <<"o":utf8>>]))

<<0, 1, 0, 2, 0, 3>>
|> bit_array.split(<<0>>)
|> should.equal(Ok([<<1>>, <<2>>, <<3>>]))

<<0, 1, 0, 2, 0, 3>>
|> bit_array.split(<<0, 2>>)
|> should.equal(Ok([<<0, 1>>, <<0, 3>>]))

<<1, 0>>
|> bit_array.split(<<0>>)
|> should.equal(Ok([<<1>>]))

<<1, 0>>
|> bit_array.split(<<1>>)
|> should.equal(Ok([<<0>>]))

<<1>>
|> bit_array.split(<<0>>)
|> should.equal(Ok([<<1>>]))

<<1, 2>>
|> bit_array.split(<<1, 2>>)
|> should.equal(Ok([]))

<<0, 1, 2, 0, 3, 4, 5>>
|> bit_array.split(<<>>)
|> should.equal(Error(Nil))
}

// This test is target specific since it's using non byte-aligned BitArrays
// and those are not supported on the JavaScript target.
@target(erlang)
pub fn split_erlang_only_test() {
<<0, 1, 2:7>>
|> bit_array.split(<<1>>)
|> should.equal(Error(Nil))
}

pub fn to_string_test() {
assert bit_array.to_string(<<>>) == Ok("")

Expand Down