Skip to content

Commit 1310994

Browse files
adriangbclaude
andcommitted
Rename helper to json_field_metadata and move to common_union
`is_json_metadata()` was ambiguous about direction (check vs. mark). The helper is a constructor of metadata that marks a field as containing JSON-encoded data. Renaming to `json_field_metadata()` makes that unambiguous and stays correct if more keys (e.g. canonical Arrow extension keys) are added to the returned map in the future. Move it to `src/common_union.rs`, which already houses JSON-typing concerns. Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
1 parent 5b3ff39 commit 1310994

4 files changed

Lines changed: 16 additions & 17 deletions

File tree

src/common.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use std::collections::HashMap;
21
use std::str::Utf8Error;
32
use std::sync::Arc;
43

@@ -17,13 +16,6 @@ use crate::common_union::{
1716
is_json_union, json_from_union_scalar, nested_json_array, nested_json_array_ref, TYPE_ID_NULL,
1817
};
1918

20-
/// Field metadata that marks a `Utf8` column/field as containing raw JSON.
21-
/// Downstream consumers (e.g. the rewrite layer, other UDFs) use this to
22-
/// recognize JSON-bearing string columns.
23-
pub fn is_json_metadata() -> HashMap<String, String> {
24-
HashMap::from([("is_json".to_string(), "true".to_string())])
25-
}
26-
2719
/// General implementation of `ScalarUDFImpl::return_type`.
2820
///
2921
/// # Arguments

src/common_union.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::collections::HashMap;
12
use std::sync::{Arc, LazyLock, OnceLock};
23

34
use datafusion::arrow::array::{
@@ -8,7 +9,13 @@ use datafusion::arrow::datatypes::{DataType, Field, UnionFields, UnionMode};
89
use datafusion::arrow::error::ArrowError;
910
use datafusion::common::ScalarValue;
1011

11-
use crate::common::is_json_metadata;
12+
/// Field metadata used to mark a `Utf8` field as containing raw JSON.
13+
///
14+
/// Attach this to any Arrow `Field` whose values are JSON-encoded strings so
15+
/// downstream consumers can recognize them as JSON rather than opaque text.
16+
pub fn json_field_metadata() -> HashMap<String, String> {
17+
HashMap::from([("is_json".to_string(), "true".to_string())])
18+
}
1219

1320
pub fn is_json_union(data_type: &DataType) -> bool {
1421
match data_type {
@@ -170,11 +177,11 @@ fn union_fields() -> UnionFields {
170177
(TYPE_ID_STR, Arc::new(Field::new("str", DataType::Utf8, false))),
171178
(
172179
TYPE_ID_ARRAY,
173-
Arc::new(Field::new("array", DataType::Utf8, false).with_metadata(is_json_metadata())),
180+
Arc::new(Field::new("array", DataType::Utf8, false).with_metadata(json_field_metadata())),
174181
),
175182
(
176183
TYPE_ID_OBJECT,
177-
Arc::new(Field::new("object", DataType::Utf8, false).with_metadata(is_json_metadata())),
184+
Arc::new(Field::new("object", DataType::Utf8, false).with_metadata(json_field_metadata())),
178185
),
179186
])
180187
})

src/json_get_array.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@ use datafusion::common::{Result as DataFusionResult, ScalarValue};
77
use datafusion::logical_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
88
use jiter::Peek;
99

10-
use crate::common::{
11-
get_err, invoke, is_json_metadata, jiter_json_find, return_type_check, GetError, InvokeResult, JsonPath,
12-
};
10+
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, InvokeResult, JsonPath};
1311
use crate::common_macros::make_udf_function;
12+
use crate::common_union::json_field_metadata;
1413

1514
fn list_item_field() -> Field {
16-
Field::new("item", DataType::Utf8, true).with_metadata(is_json_metadata())
15+
Field::new("item", DataType::Utf8, true).with_metadata(json_field_metadata())
1716
}
1817

1918
make_udf_function!(

src/json_get_json.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ use datafusion::logical_expr::{
88
ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
99
};
1010

11-
use crate::common::{get_err, invoke, is_json_metadata, jiter_json_find, return_type_check, GetError, JsonPath};
11+
use crate::common::{get_err, invoke, jiter_json_find, return_type_check, GetError, JsonPath};
1212
use crate::common_macros::make_udf_function;
13+
use crate::common_union::json_field_metadata;
1314

1415
make_udf_function!(
1516
JsonGetJson,
@@ -54,7 +55,7 @@ impl ScalarUDFImpl for JsonGetJson {
5455
let arg_types: Vec<DataType> = args.arg_fields.iter().map(|f| f.data_type().clone()).collect();
5556
let return_type = self.return_type(&arg_types)?;
5657
Ok(Arc::new(
57-
Field::new(self.name(), return_type, true).with_metadata(is_json_metadata()),
58+
Field::new(self.name(), return_type, true).with_metadata(json_field_metadata()),
5859
))
5960
}
6061

0 commit comments

Comments
 (0)