Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 106 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,109 @@
/pyo3-polars/target
Cargo.lock
.idea/
venv/
target/
rust-toolchain.toml
*.pyc
*.so
*.dll
*.pyd

# Created by https://www.toptal.com/developers/gitignore/api/linux,rust,python,osx
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,rust,python,osx
### OSX ###
# General
.DS_Store
.AppleDouble
.LSOverride

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# PyBuilder
.pybuilder/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

### Rust ###
# Generated by Cargo
# will have compiled files and executables
debug/

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

# End of https://www.toptal.com/developers/gitignore/api/linux,rust,python,osx
1 change: 1 addition & 0 deletions polars_hash/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ md5 = { version = "0.7.0" }
h3o = { version = "0.6.4" }
xxhash-rust = { version = "0.8.12", features = ["xxh32", "xxh64"] }
mur3 = { version = "0.1.0" }
hex = {version = "0.4"}


[target.'cfg(target_os = "linux")'.dependencies]
Expand Down
10 changes: 10 additions & 0 deletions polars_hash/polars_hash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ def sha3_224(self) -> pl.Expr:
is_elementwise=True,
)

def sha3_shake128(self, *, length: int) -> pl.Expr:
"""Takes Utf8 as input and returns utf8 hash with shake128 from SHA-3 family."""
return register_plugin_function(
plugin_path=Path(__file__).parent,
function_name="sha3_shake128",
args=self._expr,
is_elementwise=True,
kwargs={"length": length},
)

def blake3(self) -> pl.Expr:
"""Takes Utf8 as input and returns utf8 hash with blake3."""
return register_plugin_function(
Expand Down
16 changes: 16 additions & 0 deletions polars_hash/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ struct SeedKwargs64bit {
seed: u64,
}

#[derive(Deserialize)]
struct LengthKwargs {
length: usize,
}

pub fn blake3_hash_str(value: &str, output: &mut string::String) {
let hash = blake3::hash(value.as_bytes());
write!(output, "{}", hash).unwrap()
Expand Down Expand Up @@ -186,6 +191,17 @@ fn sha3_224(inputs: &[Series]) -> PolarsResult<Series> {
Ok(out.into_series())
}

#[polars_expr(output_type=String)]
fn sha3_shake128(inputs: &[Series], kwargs: LengthKwargs) -> PolarsResult<Series> {

let ca = inputs[0].str()?;
let out: StringChunked = ca.apply_into_string_amortized(|value: &str, output: &mut string::String| {
sha3_shake128_hash(value, output, kwargs.length)
});

Ok(out.into_series())
}

#[polars_expr(output_type=String)]
fn ghash_encode(inputs: &[Series]) -> PolarsResult<Series> {
let ca = inputs[0].struct_()?;
Expand Down
11 changes: 10 additions & 1 deletion polars_hash/src/sha_hashers.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use sha1::Sha1;
use sha2::{Digest, Sha224, Sha256, Sha384, Sha512};
use sha3::{Sha3_224, Sha3_256, Sha3_384, Sha3_512};
use sha3::{digest::{ExtendableOutput, Update, XofReader}, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128};
use std::fmt::Write;

pub fn sha1_hash(value: &str, output: &mut String) {
Expand Down Expand Up @@ -47,3 +47,12 @@ pub fn sha3_224_hash(value: &str, output: &mut String) {
let hash = Sha3_224::digest(value);
write!(output, "{:x}", hash).unwrap()
}

pub fn sha3_shake128_hash(value: &str, output: &mut String, length: usize) {
let mut hasher = Shake128::default();
hasher.update(value.as_bytes());
let mut reader = hasher.finalize_xof();
let mut result = vec![0u8; length];
reader.read(&mut result);
write!(output, "{}", hex::encode(result)).unwrap()
}
Loading