-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathjson_get_array.rs
95 lines (78 loc) · 2.71 KB
/
json_get_array.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
use std::any::Any;
use std::sync::Arc;
use arrow::array::ListArray;
use arrow_schema::{DataType, Field};
use datafusion_common::arrow::array::ArrayRef;
use datafusion_common::{Result as DataFusionResult, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use jiter::Peek;
use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
use crate::common_macros::make_udf_function;
use crate::common_union::{JsonArrayField, JsonUnion};
make_udf_function!(
JsonGetArray,
json_get_array,
json_data path,
r#"Get an arrow array value from a JSON string by its "path""#
);
#[derive(Debug)]
pub(super) struct JsonGetArray {
signature: Signature,
aliases: [String; 1],
}
impl Default for JsonGetArray {
fn default() -> Self {
Self {
signature: Signature::variadic_any(Volatility::Immutable),
aliases: ["json_get_array".to_string()],
}
}
}
impl ScalarUDFImpl for JsonGetArray {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
self.aliases[0].as_str()
}
fn signature(&self) -> &Signature {
&self.signature
}
fn return_type(&self, arg_types: &[DataType]) -> DataFusionResult<DataType> {
check_args(arg_types, self.name()).map(|()| DataType::List(Field::new("item", DataType::Utf8, true).into()))
}
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
let to_array = |c: JsonUnion| {
let array: ListArray = c.try_into()?;
Ok(Arc::new(array) as ArrayRef)
};
invoke::<JsonUnion, JsonArrayField>(args, jiter_json_get_array, to_array, |i| {
i.map_or_else(|| ScalarValue::Null, Into::into)
})
}
fn aliases(&self) -> &[String] {
&self.aliases
}
}
fn jiter_json_get_array(json_data: Option<&str>, path: &[JsonPath]) -> Result<JsonArrayField, GetError> {
if let Some((mut jiter, peek)) = jiter_json_find(json_data, path) {
match peek {
Peek::Array => {
let mut peek_opt = jiter.known_array()?;
let mut elements = Vec::new();
while let Some(peek) = peek_opt {
let start = jiter.current_index();
jiter.known_skip(peek)?;
let object_slice = jiter.slice_to_current(start);
let object_string = std::str::from_utf8(object_slice)?;
elements.push(object_string.to_owned());
peek_opt = jiter.array_step()?;
}
Ok(JsonArrayField(elements))
}
_ => get_err!(),
}
} else {
get_err!()
}
}