Skip to content

Commit c04077b

Browse files
authored
replace FromIterator with builder trait (#67)
1 parent 73c8da2 commit c04077b

12 files changed

+328
-198
lines changed

src/common.rs

Lines changed: 119 additions & 91 deletions
Large diffs are not rendered by default.

src/common_union.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ pub(crate) struct JsonUnion {
6161
}
6262

6363
impl JsonUnion {
64-
fn new(length: usize) -> Self {
64+
pub fn new(length: usize) -> Self {
6565
Self {
6666
bools: vec![None; length],
6767
ints: vec![None; length],
@@ -79,7 +79,7 @@ impl JsonUnion {
7979
DataType::Union(union_fields(), UnionMode::Sparse)
8080
}
8181

82-
fn push(&mut self, field: JsonUnionField) {
82+
pub fn push(&mut self, field: JsonUnionField) {
8383
self.type_ids[self.index] = field.type_id();
8484
match field {
8585
JsonUnionField::JsonNull => (),
@@ -94,7 +94,7 @@ impl JsonUnion {
9494
debug_assert!(self.index <= self.length);
9595
}
9696

97-
fn push_none(&mut self) {
97+
pub fn push_none(&mut self) {
9898
self.index += 1;
9999
debug_assert!(self.index <= self.length);
100100
}

src/json_as_text.rs

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::any::Any;
22
use std::sync::Arc;
33

4-
use datafusion::arrow::array::{ArrayRef, StringArray};
4+
use datafusion::arrow::array::{ArrayRef, StringArray, StringBuilder};
55
use datafusion::arrow::datatypes::DataType;
66
use datafusion::common::{Result as DataFusionResult, ScalarValue};
77
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
88
use jiter::Peek;
99

10-
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, JsonPath};
10+
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, InvokeResult, JsonPath};
1111
use crate::common_macros::make_udf_function;
1212

1313
make_udf_function!(
@@ -50,20 +50,38 @@ impl ScalarUDFImpl for JsonAsText {
5050
}
5151

5252
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
53-
invoke::<StringArray, String>(
54-
args,
55-
jiter_json_as_text,
56-
|c| Ok(Arc::new(c) as ArrayRef),
57-
ScalarValue::Utf8,
58-
true,
59-
)
53+
invoke::<StringArray>(args, jiter_json_as_text)
6054
}
6155

6256
fn aliases(&self) -> &[String] {
6357
&self.aliases
6458
}
6559
}
6660

61+
impl InvokeResult for StringArray {
62+
type Item = String;
63+
64+
type Builder = StringBuilder;
65+
66+
const ACCEPT_DICT_RETURN: bool = true;
67+
68+
fn builder(capacity: usize) -> Self::Builder {
69+
StringBuilder::with_capacity(capacity, 0)
70+
}
71+
72+
fn append_value(builder: &mut Self::Builder, value: Option<Self::Item>) {
73+
builder.append_option(value);
74+
}
75+
76+
fn finish(mut builder: Self::Builder) -> DataFusionResult<ArrayRef> {
77+
Ok(Arc::new(builder.finish()))
78+
}
79+
80+
fn scalar(value: Option<Self::Item>) -> ScalarValue {
81+
ScalarValue::Utf8(value)
82+
}
83+
}
84+
6785
fn jiter_json_as_text(opt_json: Option<&str>, path: &[JsonPath]) -> Result<String, GetError> {
6886
if let Some((mut jiter, peek)) = jiter_json_find(opt_json, path) {
6987
match peek {

src/json_contains.rs

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
use std::any::Any;
22
use std::sync::Arc;
33

4+
use datafusion::arrow::array::BooleanBuilder;
45
use datafusion::arrow::datatypes::DataType;
56
use datafusion::common::arrow::array::{ArrayRef, BooleanArray};
67
use datafusion::common::{plan_err, Result, ScalarValue};
78
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
89

9-
use crate::common::{invoke, jiter_json_find, return_type_check, GetError, JsonPath};
10+
use crate::common::{invoke, jiter_json_find, return_type_check, GetError, InvokeResult, JsonPath};
1011
use crate::common_macros::make_udf_function;
1112

1213
make_udf_function!(
@@ -53,20 +54,39 @@ impl ScalarUDFImpl for JsonContains {
5354
}
5455

5556
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
56-
invoke::<BooleanArray, bool>(
57-
args,
58-
jiter_json_contains,
59-
|c| Ok(Arc::new(c) as ArrayRef),
60-
ScalarValue::Boolean,
61-
false,
62-
)
57+
invoke::<BooleanArray>(args, jiter_json_contains)
6358
}
6459

6560
fn aliases(&self) -> &[String] {
6661
&self.aliases
6762
}
6863
}
6964

65+
impl InvokeResult for BooleanArray {
66+
type Item = bool;
67+
68+
type Builder = BooleanBuilder;
69+
70+
// Using boolean inside a dictionary is not an optimization!
71+
const ACCEPT_DICT_RETURN: bool = false;
72+
73+
fn builder(capacity: usize) -> Self::Builder {
74+
BooleanBuilder::with_capacity(capacity)
75+
}
76+
77+
fn append_value(builder: &mut Self::Builder, value: Option<Self::Item>) {
78+
builder.append_option(value);
79+
}
80+
81+
fn finish(mut builder: Self::Builder) -> Result<ArrayRef> {
82+
Ok(Arc::new(builder.finish()))
83+
}
84+
85+
fn scalar(value: Option<Self::Item>) -> ScalarValue {
86+
ScalarValue::Boolean(value)
87+
}
88+
}
89+
7090
#[allow(clippy::unnecessary_wraps)]
7191
fn jiter_json_contains(json_data: Option<&str>, path: &[JsonPath]) -> Result<bool, GetError> {
7292
Ok(jiter_json_find(json_data, path).is_some())

src/json_get.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ use datafusion::arrow::array::UnionArray;
66
use datafusion::arrow::datatypes::DataType;
77
use datafusion::common::Result as DataFusionResult;
88
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
9+
use datafusion::scalar::ScalarValue;
910
use jiter::{Jiter, NumberAny, NumberInt, Peek};
1011

12+
use crate::common::InvokeResult;
1113
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, JsonPath};
1214
use crate::common_macros::make_udf_function;
1315
use crate::common_union::{JsonUnion, JsonUnionField};
@@ -54,18 +56,43 @@ impl ScalarUDFImpl for JsonGet {
5456
}
5557

5658
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
57-
let to_array = |c: JsonUnion| {
58-
let array: UnionArray = c.try_into()?;
59-
Ok(Arc::new(array) as ArrayRef)
60-
};
61-
invoke::<JsonUnion, JsonUnionField>(args, jiter_json_get_union, to_array, JsonUnionField::scalar_value, true)
59+
invoke::<JsonUnion>(args, jiter_json_get_union)
6260
}
6361

6462
fn aliases(&self) -> &[String] {
6563
&self.aliases
6664
}
6765
}
6866

67+
impl InvokeResult for JsonUnion {
68+
type Item = JsonUnionField;
69+
70+
type Builder = JsonUnion;
71+
72+
const ACCEPT_DICT_RETURN: bool = true;
73+
74+
fn builder(capacity: usize) -> Self::Builder {
75+
JsonUnion::new(capacity)
76+
}
77+
78+
fn append_value(builder: &mut Self::Builder, value: Option<Self::Item>) {
79+
if let Some(value) = value {
80+
builder.push(value);
81+
} else {
82+
builder.push_none();
83+
}
84+
}
85+
86+
fn finish(builder: Self::Builder) -> DataFusionResult<ArrayRef> {
87+
let array: UnionArray = builder.try_into()?;
88+
Ok(Arc::new(array) as ArrayRef)
89+
}
90+
91+
fn scalar(value: Option<Self::Item>) -> ScalarValue {
92+
JsonUnionField::scalar_value(value)
93+
}
94+
}
95+
6996
fn jiter_json_get_union(opt_json: Option<&str>, path: &[JsonPath]) -> Result<JsonUnionField, GetError> {
7097
if let Some((mut jiter, peek)) = jiter_json_find(opt_json, path) {
7198
build_union(&mut jiter, peek)

src/json_get_bool.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use std::any::Any;
2-
use std::sync::Arc;
32

4-
use datafusion::arrow::array::{ArrayRef, BooleanArray};
3+
use datafusion::arrow::array::BooleanArray;
54
use datafusion::arrow::datatypes::DataType;
6-
use datafusion::common::{Result as DataFusionResult, ScalarValue};
5+
use datafusion::common::Result as DataFusionResult;
76
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
87
use jiter::Peek;
98

@@ -50,13 +49,7 @@ impl ScalarUDFImpl for JsonGetBool {
5049
}
5150

5251
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
53-
invoke::<BooleanArray, bool>(
54-
args,
55-
jiter_json_get_bool,
56-
|c| Ok(Arc::new(c) as ArrayRef),
57-
ScalarValue::Boolean,
58-
false,
59-
)
52+
invoke::<BooleanArray>(args, jiter_json_get_bool)
6053
}
6154

6255
fn aliases(&self) -> &[String] {

src/json_get_float.rs

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::any::Any;
22
use std::sync::Arc;
33

4-
use datafusion::arrow::array::{ArrayRef, Float64Array};
4+
use datafusion::arrow::array::{ArrayRef, Float64Array, Float64Builder};
55
use datafusion::arrow::datatypes::DataType;
66
use datafusion::common::{Result as DataFusionResult, ScalarValue};
77
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
88
use jiter::{NumberAny, Peek};
99

10-
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, JsonPath};
10+
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, InvokeResult, JsonPath};
1111
use crate::common_macros::make_udf_function;
1212

1313
make_udf_function!(
@@ -50,20 +50,39 @@ impl ScalarUDFImpl for JsonGetFloat {
5050
}
5151

5252
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
53-
invoke::<Float64Array, f64>(
54-
args,
55-
jiter_json_get_float,
56-
|c| Ok(Arc::new(c) as ArrayRef),
57-
ScalarValue::Float64,
58-
true,
59-
)
53+
invoke::<Float64Array>(args, jiter_json_get_float)
6054
}
6155

6256
fn aliases(&self) -> &[String] {
6357
&self.aliases
6458
}
6559
}
6660

61+
impl InvokeResult for Float64Array {
62+
type Item = f64;
63+
64+
type Builder = Float64Builder;
65+
66+
// Cheaper to produce a float array rather than dict-encoded floats
67+
const ACCEPT_DICT_RETURN: bool = false;
68+
69+
fn builder(capacity: usize) -> Self::Builder {
70+
Float64Builder::with_capacity(capacity)
71+
}
72+
73+
fn append_value(builder: &mut Self::Builder, value: Option<Self::Item>) {
74+
builder.append_option(value);
75+
}
76+
77+
fn finish(mut builder: Self::Builder) -> DataFusionResult<ArrayRef> {
78+
Ok(Arc::new(builder.finish()))
79+
}
80+
81+
fn scalar(value: Option<Self::Item>) -> ScalarValue {
82+
ScalarValue::Float64(value)
83+
}
84+
}
85+
6786
fn jiter_json_get_float(json_data: Option<&str>, path: &[JsonPath]) -> Result<f64, GetError> {
6887
if let Some((mut jiter, peek)) = jiter_json_find(json_data, path) {
6988
match peek {

src/json_get_int.rs

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::any::Any;
22
use std::sync::Arc;
33

4-
use datafusion::arrow::array::{ArrayRef, Int64Array};
4+
use datafusion::arrow::array::{ArrayRef, Int64Array, Int64Builder};
55
use datafusion::arrow::datatypes::DataType;
66
use datafusion::common::{Result as DataFusionResult, ScalarValue};
77
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
88
use jiter::{NumberInt, Peek};
99

10-
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, JsonPath};
10+
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, InvokeResult, JsonPath};
1111
use crate::common_macros::make_udf_function;
1212

1313
make_udf_function!(
@@ -50,20 +50,39 @@ impl ScalarUDFImpl for JsonGetInt {
5050
}
5151

5252
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
53-
invoke::<Int64Array, i64>(
54-
args,
55-
jiter_json_get_int,
56-
|c| Ok(Arc::new(c) as ArrayRef),
57-
ScalarValue::Int64,
58-
true,
59-
)
53+
invoke::<Int64Array>(args, jiter_json_get_int)
6054
}
6155

6256
fn aliases(&self) -> &[String] {
6357
&self.aliases
6458
}
6559
}
6660

61+
impl InvokeResult for Int64Array {
62+
type Item = i64;
63+
64+
type Builder = Int64Builder;
65+
66+
// Cheaper to return an int array rather than dict-encoded ints
67+
const ACCEPT_DICT_RETURN: bool = false;
68+
69+
fn builder(capacity: usize) -> Self::Builder {
70+
Int64Builder::with_capacity(capacity)
71+
}
72+
73+
fn append_value(builder: &mut Self::Builder, value: Option<Self::Item>) {
74+
builder.append_option(value);
75+
}
76+
77+
fn finish(mut builder: Self::Builder) -> DataFusionResult<ArrayRef> {
78+
Ok(Arc::new(builder.finish()))
79+
}
80+
81+
fn scalar(value: Option<Self::Item>) -> ScalarValue {
82+
ScalarValue::Int64(value)
83+
}
84+
}
85+
6786
fn jiter_json_get_int(json_data: Option<&str>, path: &[JsonPath]) -> Result<i64, GetError> {
6887
if let Some((mut jiter, peek)) = jiter_json_find(json_data, path) {
6988
match peek {

src/json_get_json.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use std::any::Any;
2-
use std::sync::Arc;
32

4-
use datafusion::arrow::array::{ArrayRef, StringArray};
3+
use datafusion::arrow::array::StringArray;
54
use datafusion::arrow::datatypes::DataType;
6-
use datafusion::common::{Result as DataFusionResult, ScalarValue};
5+
use datafusion::common::Result as DataFusionResult;
76
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
87

98
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, JsonPath};
@@ -49,13 +48,7 @@ impl ScalarUDFImpl for JsonGetJson {
4948
}
5049

5150
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
52-
invoke::<StringArray, String>(
53-
args,
54-
jiter_json_get_json,
55-
|c| Ok(Arc::new(c) as ArrayRef),
56-
ScalarValue::Utf8,
57-
true,
58-
)
51+
invoke::<StringArray>(args, jiter_json_get_json)
5952
}
6053

6154
fn aliases(&self) -> &[String] {

0 commit comments

Comments
 (0)