Skip to content

Commit b98a050

Browse files
committed
chore(performance): Use simd for base64 encoding/decoding
1 parent 9151ce5 commit b98a050

12 files changed

+71
-114
lines changed

Cargo.lock

+23-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2-4
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ stdlib = [
6464
"dep:aes-siv",
6565
"dep:base16",
6666
"dep:base62",
67-
"dep:base64",
67+
"dep:base64-simd",
6868
"dep:cbc",
6969
"dep:cfb-mode",
7070
"dep:chacha20poly1305",
@@ -77,7 +77,6 @@ stdlib = [
7777
"dep:crypto_secretbox",
7878
"dep:csv",
7979
"dep:ctr",
80-
"dep:data-encoding",
8180
"dep:digest",
8281
"dep:domain",
8382
"dep:dns-lookup",
@@ -129,7 +128,7 @@ ansi_term = { version = "0.12", optional = true }
129128
arbitrary = { version = "1", optional = true, features = ["derive"] }
130129
base16 = { version = "0.2", optional = true }
131130
base62 = { version = "2.2.1", optional = true }
132-
base64 = { version = "0.22", optional = true }
131+
base64-simd = { version = "0.8", optional = true }
133132
bytes = { version = "1", default-features = false, optional = true }
134133
charset = { version = "0.1", optional = true }
135134
encoding_rs = { version = "0.8.35", optional = true }
@@ -142,7 +141,6 @@ clap.workspace = true
142141
codespan-reporting = { version = "0.12", optional = true }
143142
convert_case = { version = "0.7.1", optional = true }
144143
crc = { version = "3.2.1", optional = true }
145-
data-encoding = { version = "2", optional = true }
146144
digest = { version = "0.10", optional = true }
147145
dyn-clone = { version = "1", default-features = false, optional = true }
148146
exitcode = { version = "1", optional = true }

LICENSE-3rdparty.csv

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ base16,https://github.com/thomcc/rust-base16,CC0-1.0,Thom Chiovoloni <tchiovolon
2121
base62,https://github.com/fbernier/base62,MIT,"François Bernier <[email protected]>, Chai T. Rex <[email protected]>"
2222
base64,https://github.com/marshallpierce/rust-base64,MIT OR Apache-2.0,"Alice Maz <[email protected]>, Marshall Pierce <[email protected]>"
2323
base64,https://github.com/marshallpierce/rust-base64,MIT OR Apache-2.0,Marshall Pierce <[email protected]>
24+
base64-simd,https://github.com/Nugine/simd,MIT,The base64-simd Authors
2425
bit-set,https://github.com/contain-rs/bit-set,Apache-2.0 OR MIT,Alexis Beingessner <[email protected]>
2526
bit-vec,https://github.com/contain-rs/bit-vec,Apache-2.0 OR MIT,Alexis Beingessner <[email protected]>
2627
bitflags,https://github.com/bitflags/bitflags,MIT OR Apache-2.0,The Rust Project Developers
@@ -65,7 +66,6 @@ crypto-common,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto
6566
crypto_secretbox,https://github.com/RustCrypto/nacl-compat/tree/master/crypto_secretbox,Apache-2.0 OR MIT,RustCrypto Developers
6667
csv,https://github.com/BurntSushi/rust-csv,Unlicense OR MIT,Andrew Gallant <[email protected]>
6768
ctr,https://github.com/RustCrypto/block-modes,MIT OR Apache-2.0,RustCrypto Developers
68-
data-encoding,https://github.com/ia0/data-encoding,MIT,Julien Cretin <[email protected]>
6969
dbl,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
7070
deranged,https://github.com/jhpratt/deranged,MIT OR Apache-2.0,Jacob Pratt <[email protected]>
7171
digest,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers
@@ -162,6 +162,7 @@ once_cell,https://github.com/matklad/once_cell,MIT OR Apache-2.0,Aleksey Kladov
162162
onig,http://github.com/iwillspeak/rust-onig,MIT,"Will Speak <[email protected]>, Ivan Ivashchenko <[email protected]>"
163163
opaque-debug,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
164164
ordered-float,https://github.com/reem/rust-ordered-float,MIT,"Jonathan Reem <[email protected]>, Matt Brubeck <[email protected]>"
165+
outref,https://github.com/Nugine/outref,MIT,The outref Authors
165166
overload,https://github.com/danaugrs/overload,MIT,Daniel Salvadori <[email protected]>
166167
owo-colors,https://github.com/owo-colors/owo-colors,MIT,jam1garner <[email protected]>
167168
pad,https://github.com/ogham/rust-pad,MIT,Ben S <[email protected]>
@@ -259,6 +260,7 @@ utf8-width,https://github.com/magiclen/utf8-width,MIT,Magic Len <[email protected]
259260
utf8_iter,https://github.com/hsivonen/utf8_iter,Apache-2.0 OR MIT,Henri Sivonen <[email protected]>
260261
uuid,https://github.com/uuid-rs/uuid,Apache-2.0 OR MIT,"Ashley Mannix<[email protected]>, Dylan DPC<[email protected]>, Hunar Roop Kahlon<[email protected]>"
261262
valuable,https://github.com/tokio-rs/valuable,MIT,The valuable Authors
263+
vsimd,https://github.com/Nugine/simd,MIT,The vsimd Authors
262264
vte,https://github.com/alacritty/vte,Apache-2.0 OR MIT,"Joe Wilm <[email protected]>, Christian Duerr <[email protected]>"
263265
wasi,https://github.com/bytecodealliance/wasi,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,The Cranelift Project Developers
264266
wasm-bindgen,https://github.com/rustwasm/wasm-bindgen,MIT OR Apache-2.0,The wasm-bindgen Developers

src/stdlib/decode_base64.rs

+18-18
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,23 @@
11
use crate::compiler::prelude::*;
2-
use base64::Engine as _;
3-
use std::str::FromStr;
4-
5-
use super::util::Base64Charset;
62

73
fn decode_base64(charset: Option<Value>, value: Value) -> Resolved {
4+
let value = value.try_bytes()?;
85
let charset = charset
96
.map(Value::try_bytes)
107
.transpose()?
11-
.map(|c| Base64Charset::from_str(&String::from_utf8_lossy(&c)))
12-
.transpose()?
13-
.unwrap_or_default();
14-
let alphabet = match charset {
15-
Base64Charset::Standard => base64::alphabet::STANDARD,
16-
Base64Charset::UrlSafe => base64::alphabet::URL_SAFE,
8+
.unwrap_or(Bytes::from_static(b"standard"));
9+
10+
let decoder = match charset.as_ref() {
11+
b"standard" => base64_simd::STANDARD_NO_PAD,
12+
b"url_safe" => base64_simd::URL_SAFE_NO_PAD,
13+
_ => Err("unknown charset")?,
1714
};
18-
let value = value.try_bytes()?;
19-
let config = base64::engine::general_purpose::GeneralPurposeConfig::new()
20-
.with_decode_padding_mode(base64::engine::DecodePaddingMode::Indifferent);
21-
let engine = base64::engine::GeneralPurpose::new(&alphabet, config);
2215

23-
match engine.decode(value) {
24-
Ok(s) => Ok(Value::from(Bytes::from(s))),
25-
Err(_) => Err("unable to decode value to base64".into()),
26-
}
16+
let decoded = decoder
17+
.decode_to_vec(value)
18+
.map_err(|_| "unable to decode value to base64")?;
19+
20+
Ok(Value::Bytes(Bytes::from(decoded)))
2721
}
2822

2923
#[derive(Clone, Copy, Debug)]
@@ -117,6 +111,12 @@ mod test {
117111
tdef: TypeDef::bytes().fallible(),
118112
}
119113

114+
with_invalid_charset {
115+
args: func_args![value: value!("c29tZSs9c3RyaW5nL3ZhbHVl"), charset: value!("invalid")],
116+
want: Err("unknown charset"),
117+
tdef: TypeDef::bytes().fallible(),
118+
}
119+
120120
empty_string_standard_charset {
121121
args: func_args![value: value!(""), charset: value!("standard")],
122122
want: Ok(value!("")),

src/stdlib/decode_lz4.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,12 @@ mod tests {
7171
use super::*;
7272
use crate::value;
7373

74-
use base64::Engine;
7574
use nom::AsBytes;
7675

7776
fn decode_base64(text: &str) -> Vec<u8> {
78-
let engine = base64::engine::GeneralPurpose::new(
79-
&base64::alphabet::STANDARD,
80-
base64::engine::general_purpose::GeneralPurposeConfig::new(),
81-
);
82-
83-
engine.decode(text).expect("Cannot decode from Base64")
77+
base64_simd::STANDARD
78+
.decode_to_vec(text)
79+
.expect("Cannot decode from Base64")
8480
}
8581

8682
test_function![

src/stdlib/decode_mime_q.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use charset::Charset;
2-
use data_encoding::BASE64_MIME;
32
use nom::{
43
branch::alt,
54
bytes::complete::{tag, take_until, take_until1},
@@ -160,8 +159,8 @@ impl EncodedWord<'_> {
160159

161160
// Decode
162161
let decoded = match self.encoding {
163-
"B" | "b" => BASE64_MIME
164-
.decode(self.input.as_bytes())
162+
"B" | "b" => base64_simd::STANDARD_NO_PAD
163+
.decode_to_vec(self.input.as_bytes())
165164
.map_err(|_| "Unable to decode base64 value")?,
166165
"Q" | "q" => {
167166
// The quoted_printable module does a trim_end on the input, so if

src/stdlib/decode_snappy.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,12 @@ impl FunctionExpression for DecodeSnappyFn {
7070
mod tests {
7171
use super::*;
7272
use crate::value;
73-
use base64::Engine;
7473
use nom::AsBytes;
7574

7675
fn decode_base64(text: &str) -> Vec<u8> {
77-
let engine = base64::engine::GeneralPurpose::new(
78-
&base64::alphabet::STANDARD,
79-
base64::engine::general_purpose::GeneralPurposeConfig::new(),
80-
);
81-
82-
engine.decode(text).expect("Cannot decode from Base64")
76+
base64_simd::STANDARD
77+
.decode_to_vec(text)
78+
.expect("Cannot decode from Base64")
8379
}
8480

8581
test_function![

src/stdlib/encode_base64.rs

+10-13
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,26 @@
11
use crate::compiler::prelude::*;
2-
use base64::Engine as _;
3-
use std::str::FromStr;
4-
5-
use super::util::Base64Charset;
62

73
fn encode_base64(value: Value, padding: Option<Value>, charset: Option<Value>) -> Resolved {
84
let value = value.try_bytes()?;
95
let padding = padding
106
.map(VrlValueConvert::try_boolean)
117
.transpose()?
128
.unwrap_or(true);
9+
1310
let charset = charset
1411
.map(VrlValueConvert::try_bytes)
1512
.transpose()?
16-
.map(|c| Base64Charset::from_str(&String::from_utf8_lossy(&c)))
17-
.transpose()?
18-
.unwrap_or_default();
13+
.unwrap_or(Bytes::from_static(b"standard"));
1914

20-
let engine = base64::engine::GeneralPurpose::new(
21-
&base64::alphabet::Alphabet::from(charset),
22-
base64::engine::general_purpose::GeneralPurposeConfig::default()
23-
.with_encode_padding(padding),
24-
);
15+
let encoder = match (padding, charset.as_ref()) {
16+
(true, b"standard") => base64_simd::STANDARD,
17+
(false, b"standard") => base64_simd::STANDARD_NO_PAD,
18+
(true, b"url_safe") => base64_simd::URL_SAFE,
19+
(false, b"url_safe") => base64_simd::URL_SAFE_NO_PAD,
20+
_ => Err("unknown charset")?,
21+
};
2522

26-
Ok(engine.encode(value).into())
23+
Ok(encoder.encode_to_string(value).into())
2724
}
2825

2926
#[derive(Clone, Copy, Debug)]

src/stdlib/encode_lz4.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,12 @@ impl FunctionExpression for EncodeLz4Fn {
6666
mod test {
6767
use super::*;
6868
use crate::value;
69-
use base64::Engine;
7069
use nom::AsBytes;
7170

7271
fn decode_base64(text: &str) -> Vec<u8> {
73-
let engine = base64::engine::GeneralPurpose::new(
74-
&base64::alphabet::STANDARD,
75-
base64::engine::general_purpose::GeneralPurposeConfig::new(),
76-
);
77-
78-
engine.decode(text).expect("Cannot decode from Base64")
72+
base64_simd::STANDARD
73+
.decode_to_vec(text)
74+
.expect("Cannot decode from Base64")
7975
}
8076

8177
test_function![

src/stdlib/encode_snappy.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,12 @@ impl FunctionExpression for EncodeSnappyFn {
7070
mod test {
7171
use super::*;
7272
use crate::value;
73-
use base64::Engine;
7473
use nom::AsBytes;
7574

7675
fn decode_base64(text: &str) -> Vec<u8> {
77-
let engine = base64::engine::GeneralPurpose::new(
78-
&base64::alphabet::STANDARD,
79-
base64::engine::general_purpose::GeneralPurposeConfig::new(),
80-
);
81-
82-
engine.decode(text).expect("Cannot decode from Base64")
76+
base64_simd::STANDARD
77+
.decode_to_vec(text)
78+
.expect("Cannot decode from Base64")
8379
}
8480

8581
test_function![

src/stdlib/redact.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use crate::compiler::prelude::*;
2-
use base64::engine::Engine;
32
use std::{
43
borrow::Cow,
54
convert::{TryFrom, TryInto},
@@ -460,7 +459,7 @@ impl Encoder {
460459
fn encode(self, data: &[u8]) -> String {
461460
use Encoder::{Base16, Base64};
462461
match self {
463-
Base64 => base64::engine::general_purpose::STANDARD.encode(data),
462+
Base64 => base64_simd::STANDARD.encode_to_string(data),
464463
Base16 => base16::encode_lower(data),
465464
}
466465
}

src/stdlib/util.rs

-37
Original file line numberDiff line numberDiff line change
@@ -81,43 +81,6 @@ pub(crate) fn is_nullish(value: &Value) -> bool {
8181
}
8282
}
8383

84-
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85-
pub enum Base64Charset {
86-
Standard,
87-
UrlSafe,
88-
}
89-
90-
impl Default for Base64Charset {
91-
fn default() -> Self {
92-
Self::Standard
93-
}
94-
}
95-
96-
impl From<Base64Charset> for base64::alphabet::Alphabet {
97-
fn from(charset: Base64Charset) -> base64::alphabet::Alphabet {
98-
use Base64Charset::{Standard, UrlSafe};
99-
100-
match charset {
101-
Standard => base64::alphabet::STANDARD,
102-
UrlSafe => base64::alphabet::URL_SAFE,
103-
}
104-
}
105-
}
106-
107-
impl std::str::FromStr for Base64Charset {
108-
type Err = &'static str;
109-
110-
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
111-
use Base64Charset::{Standard, UrlSafe};
112-
113-
match s {
114-
"standard" => Ok(Standard),
115-
"url_safe" => Ok(UrlSafe),
116-
_ => Err("unknown charset"),
117-
}
118-
}
119-
}
120-
12184
#[derive(Clone, Debug)]
12285
pub(super) enum ConstOrExpr {
12386
Const(Value),

0 commit comments

Comments
 (0)