From 0565982516e077b0ec8f7cda99aa3061dd759d66 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 11:28:29 +0200 Subject: [PATCH 01/16] added base to geohashers --- polars_hash/src/geohashers.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index ab51890..d5e6036 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -5,13 +5,19 @@ pub fn geohash_encoder( lat: Option, long: Option, len: Option, + base: Option, ) -> PolarsResult> { match (lat, long) { (Some(lat), Some(long)) => match len { - Some(len) => Ok(Some( - encode(Coord { x: long, y: lat }, len as usize) - .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, - )), + Some(len) => match base { + Some(base) => Ok(Some( + encode(Coord { x: long, y: lat }, len as usize, base as u32) + .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, + )), + _ => Err(PolarsError::ComputeError( + "Base may not be null".to_string().into(), + )), + }, _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), From 024bf2bcf9ca388d55cd77d6444cfa8e0e809be8 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 11:44:30 +0200 Subject: [PATCH 02/16] trying to fix errors --- polars_hash/src/geohashers.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index d5e6036..c76d6be 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,6 +1,7 @@ use geohash::{decode, encode, neighbors, Coord}; use polars::prelude::*; +// Geohash encoder function pub fn geohash_encoder( lat: Option, long: Option, @@ -14,11 +15,11 @@ pub fn geohash_encoder( encode(Coord { x: long, y: lat }, len as usize, base as u32) .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, )), - _ => Err(PolarsError::ComputeError( + None => Err(PolarsError::ComputeError( "Base may not be null".to_string().into(), )), }, - _ => Err(PolarsError::ComputeError( + None => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), }, @@ -33,6 +34,7 @@ pub fn geohash_encoder( } } +// Geohash decoder function pub fn geohash_decoder(ca: &StringChunked) -> PolarsResult { let mut longitude: PrimitiveChunkedBuilder = PrimitiveChunkedBuilder::new("longitude", ca.len()); @@ -48,7 +50,7 @@ pub fn geohash_decoder(ca: &StringChunked) -> PolarsResult { longitude.append_value(x_value); latitude.append_value(y_value); } - _ => { + None => { longitude.append_null(); latitude.append_null(); } @@ -59,6 +61,7 @@ pub fn geohash_decoder(ca: &StringChunked) -> PolarsResult { StructChunked::new(ca.name(), &[ser_long, ser_lat]) } +// Geohash neighbors function pub fn geohash_neighbors(ca: &StringChunked) -> PolarsResult { let mut n_ca = StringChunkedBuilder::new("n", ca.len()); let mut ne_ca = StringChunkedBuilder::new("ne", ca.len()); @@ -83,7 +86,7 @@ pub fn geohash_neighbors(ca: &StringChunked) -> PolarsResult { w_ca.append_value(neighbors_result.w); nw_ca.append_value(neighbors_result.nw); } - _ => { + None => { n_ca.append_null(); ne_ca.append_null(); e_ca.append_null(); From dca45aca4aa72e589df340f4aef43e42b84fcbc9 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 11:57:29 +0200 Subject: [PATCH 03/16] Adding functions to encode the base --- polars_hash/src/geohashers.rs | 38 +++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index c76d6be..9927136 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,6 +1,26 @@ use geohash::{decode, encode, neighbors, Coord}; use polars::prelude::*; +// Helper function to encode with base 32 +fn encode_base32(coord: Coord, len: usize) -> Result { + encode(coord, len).map_err(|e| PolarsError::ComputeError(e.to_string().into())) +} + +// Helper function to encode with base 16 +fn encode_base16(coord: Coord, len: usize) -> Result { + // Placeholder for actual base 16 encoding logic + // Replace this with the actual base 16 encode function when available + encode(coord, len).map_err(|e| PolarsError::ComputeError(e.to_string().into())) +} + +// Function to select the appropriate encoding function based on the base +fn select_encode_function(base: i64) -> fn(Coord, usize) -> Result { + match base { + 32 => encode_base32, + _ => encode_base16, // Default to base 16 + } +} + // Geohash encoder function pub fn geohash_encoder( lat: Option, @@ -11,15 +31,17 @@ pub fn geohash_encoder( match (lat, long) { (Some(lat), Some(long)) => match len { Some(len) => match base { - Some(base) => Ok(Some( - encode(Coord { x: long, y: lat }, len as usize, base as u32) - .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, - )), - None => Err(PolarsError::ComputeError( + Some(base) => { + let coord = Coord { x: long, y: lat }; + let encode_fn = select_encode_function(base); + let encoded = encode_fn(coord, len as usize)?; + Ok(Some(encoded)) + } + _ => Err(PolarsError::ComputeError( "Base may not be null".to_string().into(), )), }, - None => Err(PolarsError::ComputeError( + _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), }, @@ -50,7 +72,7 @@ pub fn geohash_decoder(ca: &StringChunked) -> PolarsResult { longitude.append_value(x_value); latitude.append_value(y_value); } - None => { + _ => { longitude.append_null(); latitude.append_null(); } @@ -86,7 +108,7 @@ pub fn geohash_neighbors(ca: &StringChunked) -> PolarsResult { w_ca.append_value(neighbors_result.w); nw_ca.append_value(neighbors_result.nw); } - None => { + _ => { n_ca.append_null(); ne_ca.append_null(); e_ca.append_null(); From b2c0ff30c01f7714d4ba08becec93a6dc5f02de4 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 12:03:56 +0200 Subject: [PATCH 04/16] Correcting mismatch of arguments --- polars_hash/src/geohashers.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index 9927136..b980ac3 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -26,21 +26,16 @@ pub fn geohash_encoder( lat: Option, long: Option, len: Option, - base: Option, ) -> PolarsResult> { + let base = 16; // Set default base to 16 match (lat, long) { (Some(lat), Some(long)) => match len { - Some(len) => match base { - Some(base) => { - let coord = Coord { x: long, y: lat }; - let encode_fn = select_encode_function(base); - let encoded = encode_fn(coord, len as usize)?; - Ok(Some(encoded)) - } - _ => Err(PolarsError::ComputeError( - "Base may not be null".to_string().into(), - )), - }, + Some(len) => { + let coord = Coord { x: long, y: lat }; + let encode_fn = select_encode_function(base); + let encoded = encode_fn(coord, len as usize)?; + Ok(Some(encoded)) + } _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), From 860381b3244202048905ee711467d3a0d2a8e6e6 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 13:42:53 +0200 Subject: [PATCH 05/16] modifying expressions and geohashers code with support for geohash 16 --- polars_hash/src/expressions.rs | 6 ++++-- polars_hash/src/geohashers.rs | 39 +++++++++------------------------- 2 files changed, 14 insertions(+), 31 deletions(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 47ab732..104f70e 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -202,13 +202,15 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { let out: StringChunked = match len.len() { 1 => match unsafe { len.get_unchecked(0) } { Some(len) => try_binary_elementwise(ca_lat, ca_long, |ca_lat_opt, ca_long_opt| { - geohash_encoder(ca_lat_opt, ca_long_opt, Some(len)) + geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), None) // Pass None for base }), _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), }, - _ => try_ternary_elementwise(ca_lat, ca_long, len, geohash_encoder), + _ => try_ternary_elementwise(ca_lat, ca_long, len, |ca_lat_opt, ca_long_opt, len| { + geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), None) // Pass None for base + }), }?; Ok(out.into_series()) } diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index b980ac3..218a53a 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,40 +1,23 @@ use geohash::{decode, encode, neighbors, Coord}; use polars::prelude::*; -// Helper function to encode with base 32 -fn encode_base32(coord: Coord, len: usize) -> Result { - encode(coord, len).map_err(|e| PolarsError::ComputeError(e.to_string().into())) -} - -// Helper function to encode with base 16 -fn encode_base16(coord: Coord, len: usize) -> Result { - // Placeholder for actual base 16 encoding logic - // Replace this with the actual base 16 encode function when available - encode(coord, len).map_err(|e| PolarsError::ComputeError(e.to_string().into())) -} - -// Function to select the appropriate encoding function based on the base -fn select_encode_function(base: i64) -> fn(Coord, usize) -> Result { - match base { - 32 => encode_base32, - _ => encode_base16, // Default to base 16 - } -} - -// Geohash encoder function pub fn geohash_encoder( lat: Option, long: Option, len: Option, + base: Option, // New parameter for base ) -> PolarsResult> { - let base = 16; // Set default base to 16 match (lat, long) { (Some(lat), Some(long)) => match len { Some(len) => { - let coord = Coord { x: long, y: lat }; - let encode_fn = select_encode_function(base); - let encoded = encode_fn(coord, len as usize)?; - Ok(Some(encoded)) + let base = match base { + Some(base) => base, // Use the provided base + None => 32, // Default to base 32 + }; + Ok(Some( + encode(Coord { x: long, y: lat }, len as usize, base) + .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, + )) } _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), @@ -51,7 +34,6 @@ pub fn geohash_encoder( } } -// Geohash decoder function pub fn geohash_decoder(ca: &StringChunked) -> PolarsResult { let mut longitude: PrimitiveChunkedBuilder = PrimitiveChunkedBuilder::new("longitude", ca.len()); @@ -78,7 +60,6 @@ pub fn geohash_decoder(ca: &StringChunked) -> PolarsResult { StructChunked::new(ca.name(), &[ser_long, ser_lat]) } -// Geohash neighbors function pub fn geohash_neighbors(ca: &StringChunked) -> PolarsResult { let mut n_ca = StringChunkedBuilder::new("n", ca.len()); let mut ne_ca = StringChunkedBuilder::new("ne", ca.len()); @@ -137,4 +118,4 @@ pub fn geohash_neighbors(ca: &StringChunked) -> PolarsResult { ser_north_west, ], ) -} +} \ No newline at end of file From 9931edb9b7780b6080caa38a299366fffd4e6cb0 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 13:48:52 +0200 Subject: [PATCH 06/16] type mismatch and argument correction --- polars_hash/src/expressions.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 104f70e..70b2023 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -202,14 +202,14 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { let out: StringChunked = match len.len() { 1 => match unsafe { len.get_unchecked(0) } { Some(len) => try_binary_elementwise(ca_lat, ca_long, |ca_lat_opt, ca_long_opt| { - geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), None) // Pass None for base + geohash_encoder(ca_lat_opt, ca_long_opt, Some(len.expect("Length may not be null")), None) // Pass None for base }), _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), }, _ => try_ternary_elementwise(ca_lat, ca_long, len, |ca_lat_opt, ca_long_opt, len| { - geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), None) // Pass None for base + geohash_encoder(ca_lat_opt, ca_long_opt, Some(len.expect("Length may not be null")), None) // Pass None for base }), }?; Ok(out.into_series()) From 3a01e4f5394d00e6fe0c7624e154c10d439676e4 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 13:54:14 +0200 Subject: [PATCH 07/16] added use std::option::Option; and 16 as default --- polars_hash/src/expressions.rs | 2 ++ polars_hash/src/geohashers.rs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 70b2023..16959fc 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -12,8 +12,10 @@ use polars_core::datatypes::{ use pyo3_polars::derive::polars_expr; use std::fmt::Write; use std::{str, string}; +use std::option::Option; use wyhash::wyhash as real_wyhash; + pub fn blake3_hash_str(value: &str, output: &mut string::String) { let hash = blake3::hash(value.as_bytes()); write!(output, "{}", hash).unwrap() diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index 218a53a..a3f36e7 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -12,7 +12,7 @@ pub fn geohash_encoder( Some(len) => { let base = match base { Some(base) => base, // Use the provided base - None => 32, // Default to base 32 + None => 16, // Default to base 16 }; Ok(Some( encode(Coord { x: long, y: lat }, len as usize, base) From 4a2ed3047afd6869746f672300c841382119ae1a Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 14:01:19 +0200 Subject: [PATCH 08/16] moving import to top --- .DS_Store | Bin 0 -> 6148 bytes polars_hash/.DS_Store | Bin 0 -> 6148 bytes polars_hash/src/expressions.rs | 3 +-- 3 files changed, 1 insertion(+), 2 deletions(-) create mode 100644 .DS_Store create mode 100644 polars_hash/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b4bce191d7a25a54ffe4ae5a9ce952b492dd3df1 GIT binary patch literal 6148 zcmeHKF;2rk5S$AZI0~RZMd>OfQhs0}Q$Q6d4}gS72~v!sKpJS=@EAH?!2|dKKVkOn zNbDGcM1c_6mGKy)d?K=KRaF`IM;S6Mo* zc&g#AG9b?`z%eGc#$D<8HO@N4wKPkT(JUL$7WNrCpsO8MCLA1%z0VTK_VxWxst8WN}Fyq@lv zSB+;ezYfwvpI@!_I?wODOZl3ZNvJ3T%78NPV+L5WMZ%Ut9hCuPKp9vuAm4|GDj0jr z9okO^2ipP=1DdUHF1>`rL>^<0xkIi{j0+{YP~%$+FKqVEF;nbSvmu64n>7vL>*~VzJKQb?pWS$jsL_|ktcm3z%x67}hm*0pk)8;BJ zcrji4x>mFZ*LU;2ooTM;{Hk7$&&!J~e|t9MtC;VAHuMP24Xs1Ylh<&z$@_6z^EUFE z+`A(}e#`u7s(>n>3jDMJsM&1MKu}v%Kow90Y6bZFVBw6h$4bzBI?%W)0I-K)XXx`? zf^)3L*kdJ#8JLk&prjgq#W0eNaqHt^kCmXLlX1*=k7qXihGHB$#;pq{6ANmq3aA31 z0$q6=a{a&FfBz4Y^hp&^1^$%+rk6~U3AW^GYin~{Yc2d9&c<<-V4Z@FE5(TAQhW+` ZhH=XsVC=CH#0X6P2sjzEQ3ZZgflpowWGw&y literal 0 HcmV?d00001 diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 16959fc..4bac4ca 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -1,3 +1,4 @@ +use std::option::Option; // Import Option and its methods use crate::geohashers::{geohash_decoder, geohash_encoder, geohash_neighbors}; use crate::h3::h3_encoder; use crate::sha_hashers::*; @@ -12,10 +13,8 @@ use polars_core::datatypes::{ use pyo3_polars::derive::polars_expr; use std::fmt::Write; use std::{str, string}; -use std::option::Option; use wyhash::wyhash as real_wyhash; - pub fn blake3_hash_str(value: &str, output: &mut string::String) { let hash = blake3::hash(value.as_bytes()); write!(output, "{}", hash).unwrap() From 747e0c0fd3e9e4c830831c69572367ac86b6494f Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 14:18:17 +0200 Subject: [PATCH 09/16] change use crate:: --- polars_hash/src/expressions.rs | 2 ++ polars_hash/src/geohashers.rs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 4bac4ca..3a5047f 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -1,4 +1,5 @@ use std::option::Option; // Import Option and its methods + use crate::geohashers::{geohash_decoder, geohash_encoder, geohash_neighbors}; use crate::h3::h3_encoder; use crate::sha_hashers::*; @@ -216,6 +217,7 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { Ok(out.into_series()) } + #[polars_expr(output_type=String)] fn h3_encode(inputs: &[Series]) -> PolarsResult { let ca = inputs[0].struct_()?; diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index a3f36e7..0e84e8b 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,4 +1,4 @@ -use geohash::{decode, encode, neighbors, Coord}; +use crate::geohash::{decode, encode, neighbors, Coord}; use polars::prelude::*; pub fn geohash_encoder( From f80c081ef679f60f9280fca9652a9929d390ff74 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 14:24:21 +0200 Subject: [PATCH 10/16] update geohash = { version = "0.13.1" } --- polars_hash/Cargo.toml | 2 +- polars_hash/src/geohashers.rs | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/polars_hash/Cargo.toml b/polars_hash/Cargo.toml index 09da452..456e681 100644 --- a/polars_hash/Cargo.toml +++ b/polars_hash/Cargo.toml @@ -14,7 +14,7 @@ pyo3 = { version = "0.20.3", features = ["extension-module", "abi3-py38"] } pyo3-polars = { version = "0.12.0", features = ["derive"] } serde = { version = "1", features = ["derive"] } wyhash = { version = "0.5.0" } -geohash = { version = "0.13.0" } +geohash = { version = "0.13.1" } sha1 = { version = "0.10.6" } sha2 = { version = "0.10.8" } sha3 = { version = "0.10.8" } diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index 0e84e8b..2eeb81c 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,5 +1,18 @@ -use crate::geohash::{decode, encode, neighbors, Coord}; -use polars::prelude::*; +use crate::geohashers::{geohash_decoder, geohash_encoder, geohash_neighbors}; +use crate::h3::h3_encoder; +use crate::sha_hashers::*; +use polars::{ + chunked_array::ops::arity::{try_binary_elementwise, try_ternary_elementwise}, + prelude::*, +}; +use polars_core::datatypes::{ + DataType::{Float64, String, Struct}, + Field, +}; +use pyo3_polars::derive::polars_expr; +use std::fmt::Write; +use std::{str, string}; +use wyhash::wyhash as real_wyhash; pub fn geohash_encoder( lat: Option, From 53ae8f4287c191951a177945a36b48b3294adfac Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 14:38:03 +0200 Subject: [PATCH 11/16] pushing back to old import --- polars_hash/src/geohashers.rs | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index 2eeb81c..0e84e8b 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,18 +1,5 @@ -use crate::geohashers::{geohash_decoder, geohash_encoder, geohash_neighbors}; -use crate::h3::h3_encoder; -use crate::sha_hashers::*; -use polars::{ - chunked_array::ops::arity::{try_binary_elementwise, try_ternary_elementwise}, - prelude::*, -}; -use polars_core::datatypes::{ - DataType::{Float64, String, Struct}, - Field, -}; -use pyo3_polars::derive::polars_expr; -use std::fmt::Write; -use std::{str, string}; -use wyhash::wyhash as real_wyhash; +use crate::geohash::{decode, encode, neighbors, Coord}; +use polars::prelude::*; pub fn geohash_encoder( lat: Option, From 4b6d2fb333a677735116057ed6837dad6c1baf81 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 16:23:53 +0200 Subject: [PATCH 12/16] revert --- polars_hash/Cargo.toml | 2 +- polars_hash/src/geohashers.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/polars_hash/Cargo.toml b/polars_hash/Cargo.toml index 456e681..09da452 100644 --- a/polars_hash/Cargo.toml +++ b/polars_hash/Cargo.toml @@ -14,7 +14,7 @@ pyo3 = { version = "0.20.3", features = ["extension-module", "abi3-py38"] } pyo3-polars = { version = "0.12.0", features = ["derive"] } serde = { version = "1", features = ["derive"] } wyhash = { version = "0.5.0" } -geohash = { version = "0.13.1" } +geohash = { version = "0.13.0" } sha1 = { version = "0.10.6" } sha2 = { version = "0.10.8" } sha3 = { version = "0.10.8" } diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index 0e84e8b..a3f36e7 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,4 +1,4 @@ -use crate::geohash::{decode, encode, neighbors, Coord}; +use geohash::{decode, encode, neighbors, Coord}; use polars::prelude::*; pub fn geohash_encoder( From 408dd35cf5b41d92fb8ff0e8fb540cdb94ce5bd6 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 17:16:31 +0200 Subject: [PATCH 13/16] another attempt with differing syntax --- polars_hash/src/expressions.rs | 19 +++++++++++++++---- polars_hash/src/geohashers.rs | 13 +++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 3a5047f..a246f7f 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -184,6 +184,15 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { }; let len = len.i64()?; + let base = match inputs.get(2) { + Some(base_series) => match base_series.dtype() { + DataType::UInt8 => base_series.u8()?, + _ => polars_bail!(InvalidOperation:"Base input needs to be uint8"), + }, + None => Series::new("base", &[16u8]), // Default to base 16 if not provided + }; + let base = base.u8()?; + let lat = ca.field_by_name("latitude")?; let long = ca.field_by_name("longitude")?; let lat = match lat.dtype() { @@ -203,21 +212,23 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { let out: StringChunked = match len.len() { 1 => match unsafe { len.get_unchecked(0) } { - Some(len) => try_binary_elementwise(ca_lat, ca_long, |ca_lat_opt, ca_long_opt| { - geohash_encoder(ca_lat_opt, ca_long_opt, Some(len.expect("Length may not be null")), None) // Pass None for base + Some(len) => try_quaternary_elementwise(ca_lat, ca_long, base, |ca_lat_opt, ca_long_opt, base| { + geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), Some(base)) }), _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), }, - _ => try_ternary_elementwise(ca_lat, ca_long, len, |ca_lat_opt, ca_long_opt, len| { - geohash_encoder(ca_lat_opt, ca_long_opt, Some(len.expect("Length may not be null")), None) // Pass None for base + _ => try_quaternary_elementwise(ca_lat, ca_long, len, base, |ca_lat_opt, ca_long_opt, len, base| { + geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), Some(base)) }), }?; Ok(out.into_series()) } + + #[polars_expr(output_type=String)] fn h3_encode(inputs: &[Series]) -> PolarsResult { let ca = inputs[0].struct_()?; diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index a3f36e7..bfc589e 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,21 +1,18 @@ -use geohash::{decode, encode, neighbors, Coord}; +use geohash::{decode, encode_with_base, neighbors, Coord}; use polars::prelude::*; pub fn geohash_encoder( lat: Option, long: Option, len: Option, - base: Option, // New parameter for base + base: Option, ) -> PolarsResult> { match (lat, long) { (Some(lat), Some(long)) => match len { Some(len) => { - let base = match base { - Some(base) => base, // Use the provided base - None => 16, // Default to base 16 - }; + let base = base.unwrap_or(16); // Default to base 16 if not provided Ok(Some( - encode(Coord { x: long, y: lat }, len as usize, base) + encode_with_base(Coord { x: long, y: lat }, len as usize, base) .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, )) } @@ -118,4 +115,4 @@ pub fn geohash_neighbors(ca: &StringChunked) -> PolarsResult { ser_north_west, ], ) -} \ No newline at end of file +} From b4ce022db9998ea167a4c8c72d4913cce1beccbb Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 17:27:14 +0200 Subject: [PATCH 14/16] additional attempts --- polars_hash/src/expressions.rs | 12 ++++++------ polars_hash/src/geohashers.rs | 14 +++++--------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index a246f7f..17e00a8 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -189,9 +189,8 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { DataType::UInt8 => base_series.u8()?, _ => polars_bail!(InvalidOperation:"Base input needs to be uint8"), }, - None => Series::new("base", &[16u8]), // Default to base 16 if not provided + None => UInt8Chunked::full("base", 16, ca.len()), // Default to base 16 if not provided }; - let base = base.u8()?; let lat = ca.field_by_name("latitude")?; let long = ca.field_by_name("longitude")?; @@ -212,15 +211,15 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { let out: StringChunked = match len.len() { 1 => match unsafe { len.get_unchecked(0) } { - Some(len) => try_quaternary_elementwise(ca_lat, ca_long, base, |ca_lat_opt, ca_long_opt, base| { - geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), Some(base)) + Some(len) => try_ternary_elementwise(ca_lat, ca_long, base, |ca_lat_opt, ca_long_opt, base_opt| { + geohash_encoder(ca_lat_opt, ca_long_opt, Some(len)) }), _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), }, - _ => try_quaternary_elementwise(ca_lat, ca_long, len, base, |ca_lat_opt, ca_long_opt, len, base| { - geohash_encoder(ca_lat_opt, ca_long_opt, Some(len), Some(base)) + _ => try_ternary_elementwise(ca_lat, ca_long, len, |ca_lat_opt, ca_long_opt, len_opt| { + geohash_encoder(ca_lat_opt, ca_long_opt, len_opt) }), }?; Ok(out.into_series()) @@ -229,6 +228,7 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { + #[polars_expr(output_type=String)] fn h3_encode(inputs: &[Series]) -> PolarsResult { let ca = inputs[0].struct_()?; diff --git a/polars_hash/src/geohashers.rs b/polars_hash/src/geohashers.rs index bfc589e..ab51890 100644 --- a/polars_hash/src/geohashers.rs +++ b/polars_hash/src/geohashers.rs @@ -1,21 +1,17 @@ -use geohash::{decode, encode_with_base, neighbors, Coord}; +use geohash::{decode, encode, neighbors, Coord}; use polars::prelude::*; pub fn geohash_encoder( lat: Option, long: Option, len: Option, - base: Option, ) -> PolarsResult> { match (lat, long) { (Some(lat), Some(long)) => match len { - Some(len) => { - let base = base.unwrap_or(16); // Default to base 16 if not provided - Ok(Some( - encode_with_base(Coord { x: long, y: lat }, len as usize, base) - .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, - )) - } + Some(len) => Ok(Some( + encode(Coord { x: long, y: lat }, len as usize) + .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?, + )), _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), From 89464c412057ea4a2fe04613ae5e80ea482bf477 Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 17:30:35 +0200 Subject: [PATCH 15/16] reference to the default UInt8Chunked instead of the owned value. --- polars_hash/src/expressions.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 17e00a8..01412c3 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -180,16 +180,16 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { DataType::Int32 => inputs[1].cast(&DataType::Int64)?, DataType::Int16 => inputs[1].cast(&DataType::Int64)?, DataType::Int8 => inputs[1].cast(&DataType::Int64)?, - _ => polars_bail!(InvalidOperation:"Length input needs to be integer"), + _ => polars_bail!(InvalidOperation: "Length input needs to be integer"), }; let len = len.i64()?; let base = match inputs.get(2) { Some(base_series) => match base_series.dtype() { DataType::UInt8 => base_series.u8()?, - _ => polars_bail!(InvalidOperation:"Base input needs to be uint8"), + _ => polars_bail!(InvalidOperation: "Base input needs to be uint8"), }, - None => UInt8Chunked::full("base", 16, ca.len()), // Default to base 16 if not provided + None => &UInt8Chunked::full("base", 16, ca.len()), // Borrow the default base 16 chunked array }; let lat = ca.field_by_name("latitude")?; @@ -197,13 +197,13 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { let lat = match lat.dtype() { DataType::Float32 => lat.cast(&DataType::Float64)?, DataType::Float64 => lat, - _ => polars_bail!(InvalidOperation:"Latitude input needs to be float"), + _ => polars_bail!(InvalidOperation: "Latitude input needs to be float"), }; let long = match long.dtype() { DataType::Float32 => long.cast(&DataType::Float64)?, DataType::Float64 => long, - _ => polars_bail!(InvalidOperation:"Longitude input needs to be float"), + _ => polars_bail!(InvalidOperation: "Longitude input needs to be float"), }; let ca_lat = lat.f64()?; @@ -229,6 +229,7 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { + #[polars_expr(output_type=String)] fn h3_encode(inputs: &[Series]) -> PolarsResult { let ca = inputs[0].struct_()?; From 03e882033d0184d2cdd912cfa6f7037941a5c9ee Mon Sep 17 00:00:00 2001 From: ThomasMAhern <33540039+ThomasMAhern@users.noreply.github.com> Date: Wed, 19 Jun 2024 17:34:50 +0200 Subject: [PATCH 16/16] Unused Variable and Temporary Value Dropped: --- polars_hash/src/expressions.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/polars_hash/src/expressions.rs b/polars_hash/src/expressions.rs index 01412c3..99fa9ef 100644 --- a/polars_hash/src/expressions.rs +++ b/polars_hash/src/expressions.rs @@ -184,12 +184,15 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { }; let len = len.i64()?; + // Create a default base if not provided + let default_base = UInt8Chunked::full("base", 16, ca.len()); + let base = match inputs.get(2) { Some(base_series) => match base_series.dtype() { DataType::UInt8 => base_series.u8()?, _ => polars_bail!(InvalidOperation: "Base input needs to be uint8"), }, - None => &UInt8Chunked::full("base", 16, ca.len()), // Borrow the default base 16 chunked array + None => &default_base, // Borrow the default base 16 chunked array }; let lat = ca.field_by_name("latitude")?; @@ -211,15 +214,15 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { let out: StringChunked = match len.len() { 1 => match unsafe { len.get_unchecked(0) } { - Some(len) => try_ternary_elementwise(ca_lat, ca_long, base, |ca_lat_opt, ca_long_opt, base_opt| { + Some(len) => try_ternary_elementwise(ca_lat, ca_long, base, |ca_lat_opt, ca_long_opt, _base_opt| { geohash_encoder(ca_lat_opt, ca_long_opt, Some(len)) }), _ => Err(PolarsError::ComputeError( "Length may not be null".to_string().into(), )), }, - _ => try_ternary_elementwise(ca_lat, ca_long, len, |ca_lat_opt, ca_long_opt, len_opt| { - geohash_encoder(ca_lat_opt, ca_long_opt, len_opt) + _ => try_ternary_elementwise(ca_lat, ca_long, len, |ca_lat_opt, ca_long_opt, _base_opt| { + geohash_encoder(ca_lat_opt, ca_long_opt, len) }), }?; Ok(out.into_series()) @@ -230,6 +233,7 @@ fn ghash_encode(inputs: &[Series]) -> PolarsResult { + #[polars_expr(output_type=String)] fn h3_encode(inputs: &[Series]) -> PolarsResult { let ca = inputs[0].struct_()?;