Skip to content

Commit aafd1e4

Browse files
authored
update to datafusion 39, jiter 0.4, test on MSRV 1.73.0 (#15)
1 parent 8fc08ee commit aafd1e4

File tree

5 files changed

+72
-45
lines changed

5 files changed

+72
-45
lines changed

.github/workflows/ci.yml

+20
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,32 @@ jobs:
2828
PRE_COMMIT_COLOR: always
2929
SKIP: test
3030

31+
resolve:
32+
runs-on: ubuntu-latest
33+
outputs:
34+
MSRV: ${{ steps.resolve-msrv.outputs.MSRV }}
35+
steps:
36+
- uses: actions/checkout@v4
37+
38+
- name: set up python
39+
uses: actions/setup-python@v5
40+
with:
41+
python-version: '3.12'
42+
43+
- name: resolve MSRV
44+
id: resolve-msrv
45+
run:
46+
echo MSRV=`python -c 'import tomllib; print(tomllib.load(open("Cargo.toml", "rb"))["package"]["rust-version"])'` >> $GITHUB_OUTPUT
47+
3148
test:
49+
needs: [resolve]
3250
name: test rust-${{ matrix.rust-version }}
3351
strategy:
3452
fail-fast: false
3553
matrix:
3654
rust-version: [stable, nightly]
55+
include:
56+
- rust-version: ${{ needs.resolve.outputs.MSRV }}
3757

3858
runs-on: ubuntu-latest
3959

Cargo.toml

+11-10
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@ license = "Apache-2.0"
88
keywords = ["datafusion", "JSON", "SQL"]
99
categories = ["database-implementations", "parsing"]
1010
repository = "https://github.com/datafusion-contrib/datafusion-functions-json/"
11+
rust-version = "1.73.0"
1112

1213
[dependencies]
13-
arrow = ">=51"
14-
arrow-schema = ">=51"
15-
datafusion-common = ">=38"
16-
datafusion-expr = ">=38"
17-
jiter = ">=0.3"
18-
paste = ">=1.0.14"
19-
log = ">=0.4.21"
20-
datafusion-execution = ">=38"
14+
arrow = "52"
15+
arrow-schema = "52"
16+
datafusion-common = "39"
17+
datafusion-expr = "39"
18+
jiter = "0.4"
19+
paste = "1"
20+
log = "0.4"
21+
datafusion-execution = "39"
2122

2223
[dev-dependencies]
23-
datafusion = "38.0.0"
24-
tokio = { version = "1.37.0", features = ["full"] }
24+
datafusion = "39"
25+
tokio = { version = "1.37", features = ["full"] }
2526

2627
[lints.clippy]
2728
dbg_macro = "deny"

src/common.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ fn scalar_apply_iter<'a, 'j, C: FromIterator<Option<I>> + 'static, I>(
159159

160160
pub fn jiter_json_find<'j>(opt_json: Option<&'j str>, path: &[JsonPath]) -> Option<(Jiter<'j>, Peek)> {
161161
if let Some(json_str) = opt_json {
162-
let mut jiter = Jiter::new(json_str.as_bytes(), false);
162+
let mut jiter = Jiter::new(json_str.as_bytes());
163163
if let Ok(peek) = jiter.peek() {
164164
if let Ok(peek_found) = jiter_json_find_step(&mut jiter, peek, path) {
165165
return Some((jiter, peek_found));

src/common_union.rs

+36-29
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::sync::Arc;
1+
use std::sync::{Arc, OnceLock};
22

33
use arrow::array::{Array, BooleanArray, Float64Array, Int64Array, StringArray, UnionArray};
44
use arrow::buffer::Buffer;
@@ -36,10 +36,7 @@ impl JsonUnion {
3636
}
3737

3838
pub fn data_type() -> DataType {
39-
DataType::Union(
40-
UnionFields::new(TYPE_IDS.to_vec(), union_fields().to_vec()),
41-
UnionMode::Sparse,
42-
)
39+
DataType::Union(union_fields(), UnionMode::Sparse)
4340
}
4441

4542
fn push(&mut self, field: JsonUnionField) {
@@ -58,7 +55,7 @@ impl JsonUnion {
5855
}
5956

6057
fn push_none(&mut self) {
61-
self.type_ids[self.index] = TYPE_IDS[0];
58+
self.type_ids[self.index] = TYPE_ID_NULL;
6259
self.index += 1;
6360
debug_assert!(self.index <= self.capacity);
6461
}
@@ -86,17 +83,16 @@ impl TryFrom<JsonUnion> for UnionArray {
8683
type Error = arrow::error::ArrowError;
8784

8885
fn try_from(value: JsonUnion) -> Result<Self, Self::Error> {
89-
let [f0, f1, f2, f3, f4, f5, f6] = union_fields();
90-
let children: Vec<(Field, Arc<dyn Array>)> = vec![
91-
(f0, Arc::new(BooleanArray::from(value.nulls))),
92-
(f1, Arc::new(BooleanArray::from(value.bools))),
93-
(f2, Arc::new(Int64Array::from(value.ints))),
94-
(f3, Arc::new(Float64Array::from(value.floats))),
95-
(f4, Arc::new(StringArray::from(value.strings))),
96-
(f5, Arc::new(StringArray::from(value.arrays))),
97-
(f6, Arc::new(StringArray::from(value.objects))),
86+
let children: Vec<Arc<dyn Array>> = vec![
87+
Arc::new(BooleanArray::from(value.nulls)),
88+
Arc::new(BooleanArray::from(value.bools)),
89+
Arc::new(Int64Array::from(value.ints)),
90+
Arc::new(Float64Array::from(value.floats)),
91+
Arc::new(StringArray::from(value.strings)),
92+
Arc::new(StringArray::from(value.arrays)),
93+
Arc::new(StringArray::from(value.objects)),
9894
];
99-
UnionArray::try_new(TYPE_IDS, Buffer::from_slice_ref(&value.type_ids), None, children)
95+
UnionArray::try_new(union_fields(), Buffer::from_vec(value.type_ids).into(), None, children)
10096
}
10197
}
10298

@@ -111,18 +107,29 @@ pub(crate) enum JsonUnionField {
111107
Object(String),
112108
}
113109

114-
const TYPE_IDS: &[i8] = &[0, 1, 2, 3, 4, 5, 6];
115-
116-
fn union_fields() -> [Field; 7] {
117-
[
118-
Field::new("null", DataType::Boolean, true),
119-
Field::new("bool", DataType::Boolean, false),
120-
Field::new("int", DataType::Int64, false),
121-
Field::new("float", DataType::Float64, false),
122-
Field::new("str", DataType::Utf8, false),
123-
Field::new("array", DataType::Utf8, false),
124-
Field::new("object", DataType::Utf8, false),
125-
]
110+
const TYPE_ID_NULL: i8 = 0;
111+
const TYPE_ID_BOOL: i8 = 1;
112+
const TYPE_ID_INT: i8 = 2;
113+
const TYPE_ID_FLOAT: i8 = 3;
114+
const TYPE_ID_STR: i8 = 4;
115+
const TYPE_ID_ARRAY: i8 = 5;
116+
const TYPE_ID_OBJECT: i8 = 6;
117+
118+
fn union_fields() -> UnionFields {
119+
static FIELDS: OnceLock<UnionFields> = OnceLock::new();
120+
FIELDS
121+
.get_or_init(|| {
122+
UnionFields::from_iter([
123+
(TYPE_ID_NULL, Arc::new(Field::new("null", DataType::Boolean, true))),
124+
(TYPE_ID_BOOL, Arc::new(Field::new("bool", DataType::Boolean, false))),
125+
(TYPE_ID_INT, Arc::new(Field::new("int", DataType::Int64, false))),
126+
(TYPE_ID_FLOAT, Arc::new(Field::new("float", DataType::Float64, false))),
127+
(TYPE_ID_STR, Arc::new(Field::new("str", DataType::Utf8, false))),
128+
(TYPE_ID_ARRAY, Arc::new(Field::new("array", DataType::Utf8, false))),
129+
(TYPE_ID_OBJECT, Arc::new(Field::new("object", DataType::Utf8, false))),
130+
])
131+
})
132+
.clone()
126133
}
127134

128135
impl JsonUnionField {
@@ -141,7 +148,7 @@ impl JsonUnionField {
141148
pub fn scalar_value(f: Option<Self>) -> ScalarValue {
142149
ScalarValue::Union(
143150
f.map(|f| (f.type_id(), Box::new(f.into()))),
144-
UnionFields::new(TYPE_IDS.to_vec(), union_fields().to_vec()),
151+
union_fields(),
145152
UnionMode::Sparse,
146153
)
147154
}

src/rewrite.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use datafusion_common::DFSchema;
55
use datafusion_common::Result;
66
use datafusion_expr::expr::ScalarFunction;
77
use datafusion_expr::expr_rewriter::FunctionRewrite;
8-
use datafusion_expr::{Expr, ScalarFunctionDefinition};
8+
use datafusion_expr::Expr;
99

1010
pub(crate) struct JsonFunctionRewriter;
1111

@@ -17,8 +17,7 @@ impl FunctionRewrite for JsonFunctionRewriter {
1717
fn rewrite(&self, expr: Expr, _schema: &DFSchema, _config: &ConfigOptions) -> Result<Transformed<Expr>> {
1818
if let Expr::Cast(cast) = &expr {
1919
if let Expr::ScalarFunction(func) = &*cast.expr {
20-
let ScalarFunctionDefinition::UDF(udf) = &func.func_def;
21-
if udf.name() == "json_get" {
20+
if func.func.name() == "json_get" {
2221
if let Some(t) = switch_json_get(&cast.data_type, &func.args) {
2322
return Ok(t);
2423
}
@@ -30,15 +29,15 @@ impl FunctionRewrite for JsonFunctionRewriter {
3029
}
3130

3231
fn switch_json_get(cast_data_type: &DataType, args: &[Expr]) -> Option<Transformed<Expr>> {
33-
let udf = match cast_data_type {
32+
let func = match cast_data_type {
3433
DataType::Boolean => crate::json_get_bool::json_get_bool_udf(),
3534
DataType::Float64 | DataType::Float32 => crate::json_get_float::json_get_float_udf(),
3635
DataType::Int64 | DataType::Int32 => crate::json_get_int::json_get_int_udf(),
3736
DataType::Utf8 => crate::json_get_str::json_get_str_udf(),
3837
_ => return None,
3938
};
4039
let f = ScalarFunction {
41-
func_def: ScalarFunctionDefinition::UDF(udf),
40+
func,
4241
args: args.to_vec(),
4342
};
4443
Some(Transformed::yes(Expr::ScalarFunction(f)))

0 commit comments

Comments
 (0)