Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ b'data to be encoded'
| b64encode (100 KB data) | 0.307 | 0.325 | 0.318 | 0.047 (6.6x) | 0.061 (5.3x) | 0.050 (6.4x) |
| b64encode (1 MB data) | 3.383 | 3.456 | 3.411 | 0.447 (7.6x) | 0.487 (7.1x) | 0.467 (7.3x) |
| b64encode (altchars + 100 KB data) | 0.472 | 0.490 | 0.483 | 0.303 (1.6x) | 0.320 (1.5x) | 0.313 (1.5x) |
| b64decode (100 KB data) | 0.512 | 0.569 | 0.538 | 0.110 (4.7x) | 0.125 (4.5x) | 0.117 (4.6x) |

## How to develop locally

Expand Down
39 changes: 29 additions & 10 deletions benchmarks/bench_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,59 @@


ITERATIONS = 1_000

SMALL_DATA = b"t" * 1_000 # 1 KB
MEDIUM_DATA = b"t" * 100_000 # 100 KB
LARGE_DATA = b"t" * 1_000_000 # 1 MB

SMALL_DATA_ENCODED = base64.b64encode(SMALL_DATA)
MEDIUM_DATA_ENCODED = base64.b64encode(MEDIUM_DATA)
LARGE_DATA_ENCODED = base64.b64encode(LARGE_DATA)


def stdlib_base64encode(data, altchars=None) -> None:
def stdlib_b64encode(data, altchars=None) -> None:
for _ in range(ITERATIONS):
base64.b64encode(data, altchars=altchars)


def base64_utils_base64encode(data, altchars=None) -> None:
def base64_utils_b64encode(data, altchars=None) -> None:
for _ in range(ITERATIONS):
base64_utils.b64encode(data, altchars=altchars)

def stdlib_b64decode(data, altchars=None, validate=False) -> None:
for _ in range(ITERATIONS):
base64.b64decode(data, altchars=altchars, validate=validate)


def base64_utils_b64decode(data, altchars=None, validate=False) -> None:
for _ in range(ITERATIONS):
base64_utils.b64decode(data, altchars=altchars, validate=validate)


__benchmarks__ = [
(
lambda: stdlib_base64encode(SMALL_DATA),
lambda: base64_utils_base64encode(SMALL_DATA),
lambda: stdlib_b64encode(SMALL_DATA),
lambda: base64_utils_b64encode(SMALL_DATA),
"b64encode (1 KB data)",
),
(
lambda: stdlib_base64encode(MEDIUM_DATA),
lambda: base64_utils_base64encode(MEDIUM_DATA),
lambda: stdlib_b64encode(MEDIUM_DATA),
lambda: base64_utils_b64encode(MEDIUM_DATA),
"b64encode (100 KB data)",
),
(
lambda: stdlib_base64encode(LARGE_DATA),
lambda: base64_utils_base64encode(LARGE_DATA),
lambda: stdlib_b64encode(LARGE_DATA),
lambda: base64_utils_b64encode(LARGE_DATA),
"b64encode (1 MB data)",
),
(
lambda: stdlib_base64encode(MEDIUM_DATA, altchars=b"-_"),
lambda: base64_utils_base64encode(MEDIUM_DATA, altchars=b"-_"),
lambda: stdlib_b64encode(MEDIUM_DATA, altchars=b"-_"),
lambda: base64_utils_b64encode(MEDIUM_DATA, altchars=b"-_"),
"b64encode (altchars + 100 KB data)",
),
(
lambda: stdlib_b64decode(MEDIUM_DATA_ENCODED),
lambda: base64_utils_b64decode(MEDIUM_DATA_ENCODED),
"b64decode (100 KB data)",
)
]
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ b'data to be encoded'
| b64encode (100 KB data) | 0.307 | 0.325 | 0.318 | 0.047 (6.6x) | 0.061 (5.3x) | 0.050 (6.4x) |
| b64encode (1 MB data) | 3.383 | 3.456 | 3.411 | 0.447 (7.6x) | 0.487 (7.1x) | 0.467 (7.3x) |
| b64encode (altchars + 100 KB data) | 0.472 | 0.490 | 0.483 | 0.303 (1.6x) | 0.320 (1.5x) | 0.313 (1.5x) |
| b64decode (100 KB data) | 0.512 | 0.569 | 0.538 | 0.110 (4.7x) | 0.125 (4.5x) | 0.117 (4.6x) |

## How to develop locally

Expand Down
2 changes: 2 additions & 0 deletions python/base64_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from ._base64_utils import (
b64decode,
b64encode,
standard_b64encode,
urlsafe_b64encode,
)

__all__ = [
"b64encode",
"b64decode",
"standard_b64encode",
"urlsafe_b64encode",
]
6 changes: 6 additions & 0 deletions python/base64_utils/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@ __version__: str

__all__ = [
"b64encode",
"b64decode",
"standard_b64encode",
"urlsafe_b64encode",
]

def b64encode(s: ReadableBuffer, altchars: ReadableBuffer | None = None) -> bytes: ...
def b64decode(
s: str | ReadableBuffer,
altchars: str | ReadableBuffer | None = None,
validate: bool = False,
) -> bytes: ...
def standard_b64encode(s: ReadableBuffer) -> bytes: ...
def urlsafe_b64encode(s: ReadableBuffer) -> bytes: ...
70 changes: 69 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
use base64_simd::{Out, STANDARD, URL_SAFE};
use base64_simd::{Out, STANDARD, URL_SAFE, forgiving_decode_inplace};
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::PyBytes;

#[derive(FromPyObject)]
pub enum StringOrBytes {
#[pyo3(transparent, annotation = "str")]
String(String),
#[pyo3(transparent, annotation = "bytes")]
Bytes(Vec<u8>),
}

impl StringOrBytes {
fn into_bytes(self) -> Vec<u8> {
match self {
StringOrBytes::String(s) => s.into_bytes(),
StringOrBytes::Bytes(b) => b,
}
}
}

#[pyfunction]
#[pyo3(signature = (s, altchars=None))]
pub fn b64encode(py: Python<'_>, s: &[u8], altchars: Option<&[u8]>) -> PyResult<Py<PyBytes>> {
Expand Down Expand Up @@ -37,6 +54,56 @@ pub fn b64encode(py: Python<'_>, s: &[u8], altchars: Option<&[u8]>) -> PyResult<
Ok(output.into())
}

#[pyfunction]
#[pyo3(signature = (s, altchars=None, validate=false))]
pub fn b64decode(
py: Python<'_>,
s: StringOrBytes,
altchars: Option<StringOrBytes>,
validate: bool,
) -> PyResult<Py<PyBytes>> {
let mut input: Vec<u8> = s.into_bytes();

if let Some(alt) = altchars {
let bytes = alt.into_bytes();
if bytes.len() != 2 {
return Err(PyValueError::new_err(
"altchars must be a bytes-like object of length 2",
));
}

for byte in input.iter_mut() {
if *byte == bytes[0] {
*byte = b'+';
} else if *byte == bytes[1] {
*byte = b'/';
}
}
}

if validate {
STANDARD
.check(&input)
.map_err(|_| PyValueError::new_err("Invalid base64-encoded string"))?;

let output_len = STANDARD
.decoded_length(&input)
.map_err(|_| PyValueError::new_err("Invalid base64-encoded string"))?;

let output: Bound<'_, PyBytes> = PyBytes::new_with(py, output_len, |buf| {
STANDARD
.decode(&input, Out::from_slice(buf))
.map_err(|_| PyValueError::new_err("Invalid base64-encoded string"))?;
Ok(())
})?;
Ok(output.into())
} else {
let output = forgiving_decode_inplace(&mut input)
.map_err(|_| PyValueError::new_err("Invalid base64-encoded string"))?;
Ok(PyBytes::new(py, output).into())
}
}

#[pyfunction]
pub fn standard_b64encode(py: Python<'_>, s: &[u8]) -> PyResult<Py<PyBytes>> {
let output_len = STANDARD.encoded_length(s.len());
Expand All @@ -61,6 +128,7 @@ pub fn urlsafe_b64encode(py: Python<'_>, s: &[u8]) -> PyResult<Py<PyBytes>> {
fn _base64_utils(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
m.add_function(wrap_pyfunction!(b64encode, m)?)?;
m.add_function(wrap_pyfunction!(b64decode, m)?)?;
m.add_function(wrap_pyfunction!(standard_b64encode, m)?)?;
m.add_function(wrap_pyfunction!(urlsafe_b64encode, m)?)?;
Ok(())
Expand Down
56 changes: 56 additions & 0 deletions tests/test_base64_decode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import base64

import base64_utils
import pytest


def test_b64decode() -> None:
data = b"dGVzdA=="
decoded = base64_utils.b64decode(data)
expected = base64.b64decode(data)

assert isinstance(decoded, bytes)
assert expected == decoded


def test_b64decode_str() -> None:
data = "dGVzdA=="
decoded = base64_utils.b64decode(data)
expected = base64.b64decode(data)

assert isinstance(decoded, bytes)
assert expected == decoded


def test_b64decode_altchars() -> None:
data = b"dGVzdA+/"
altchars = b"-_"
decoded = base64_utils.b64decode(data, altchars=altchars)
expected = base64.b64decode(data, altchars=altchars)

assert isinstance(decoded, bytes)
assert expected == decoded


def test_b64decode_altchars_invalid() -> None:
with pytest.raises(ValueError):
base64_utils.b64decode(b"dGVzdA+/", altchars=b"-")


def test_b64decode_validate() -> None:
data_with_spaces = b"dGVz dA==" # "test" with a space in the middle
decoded = base64_utils.b64decode(data_with_spaces, validate=False)
expected = base64.b64decode(data_with_spaces, validate=False)
assert decoded == expected
assert decoded == b"test"

with pytest.raises(ValueError):
base64_utils.b64decode(data_with_spaces, validate=True)
with pytest.raises(ValueError):
base64.b64decode(data_with_spaces, validate=True)


def test_b64decode_invalid_data() -> None:
data = b"invalid_base64!!"
with pytest.raises(ValueError):
base64_utils.b64decode(data)
File renamed without changes.
10 changes: 10 additions & 0 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,13 @@ def test_standard_b64encode() -> None:
@pytest.mark.benchmark
def test_urlsafe_b64encode() -> None:
base64_utils.urlsafe_b64encode(b"test data")


@pytest.mark.benchmark
def test_b64decode() -> None:
base64_utils.b64decode(b"dGVzdA==")


@pytest.mark.benchmark
def test_b64decode_str() -> None:
base64_utils.b64decode("dGVzdA==")