Skip to content

Commit d47b614

Browse files
Add ExpressionPlacement::Leaf for JSON UDFs to enable scan-level pushdown
This allows the ExtractLeafExpressions optimizer to push JSON accessor functions (json_as_text, json_get, etc.) into the Parquet scan projection, where custom PhysicalExprAdapters can rewrite them for shredded/reduced column fallback. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8fad18d commit d47b614

12 files changed

Lines changed: 201 additions & 3 deletions

src/json_as_text.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,24 @@ impl ScalarUDFImpl for JsonAsText {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..]
69+
.iter()
70+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
71+
{
72+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
73+
} else {
74+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
75+
}
76+
}
5977
}
6078

6179
impl InvokeResult for StringArray {

src/json_contains.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,24 @@ impl ScalarUDFImpl for JsonContains {
6060
fn aliases(&self) -> &[String] {
6161
&self.aliases
6262
}
63+
64+
fn placement(
65+
&self,
66+
args: &[datafusion::logical_expr::ExpressionPlacement],
67+
) -> datafusion::logical_expr::ExpressionPlacement {
68+
// If the first argument is a column and the remaining arguments are literals (a path)
69+
// then we can push this UDF down to the leaf nodes.
70+
if args.len() >= 2
71+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
72+
&& args[1..]
73+
.iter()
74+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
75+
{
76+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
77+
} else {
78+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
79+
}
80+
}
6381
}
6482

6583
impl InvokeResult for BooleanArray {

src/json_get.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,24 @@ impl ScalarUDFImpl for JsonGet {
6262
fn aliases(&self) -> &[String] {
6363
&self.aliases
6464
}
65+
66+
fn placement(
67+
&self,
68+
args: &[datafusion::logical_expr::ExpressionPlacement],
69+
) -> datafusion::logical_expr::ExpressionPlacement {
70+
// If the first argument is a column and the remaining arguments are literals (a path)
71+
// then we can push this UDF down to the leaf nodes.
72+
if args.len() >= 2
73+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
74+
&& args[1..]
75+
.iter()
76+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
77+
{
78+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
79+
} else {
80+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
81+
}
82+
}
6583
}
6684

6785
impl InvokeResult for JsonUnion {

src/json_get_array.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,24 @@ impl ScalarUDFImpl for JsonGetArray {
6464
fn aliases(&self) -> &[String] {
6565
&self.aliases
6666
}
67+
68+
fn placement(
69+
&self,
70+
args: &[datafusion::logical_expr::ExpressionPlacement],
71+
) -> datafusion::logical_expr::ExpressionPlacement {
72+
// If the first argument is a column and the remaining arguments are literals (a path)
73+
// then we can push this UDF down to the leaf nodes.
74+
if args.len() >= 2
75+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
76+
&& args[1..]
77+
.iter()
78+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
79+
{
80+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
81+
} else {
82+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
83+
}
84+
}
6785
}
6886

6987
#[derive(Debug)]

src/json_get_bool.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,24 @@ impl ScalarUDFImpl for JsonGetBool {
5555
fn aliases(&self) -> &[String] {
5656
&self.aliases
5757
}
58+
59+
fn placement(
60+
&self,
61+
args: &[datafusion::logical_expr::ExpressionPlacement],
62+
) -> datafusion::logical_expr::ExpressionPlacement {
63+
// If the first argument is a column and the remaining arguments are literals (a path)
64+
// then we can push this UDF down to the leaf nodes.
65+
if args.len() >= 2
66+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
67+
&& args[1..]
68+
.iter()
69+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
70+
{
71+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
72+
} else {
73+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
74+
}
75+
}
5876
}
5977

6078
fn jiter_json_get_bool(json_data: Option<&str>, path: &[JsonPath]) -> Result<bool, GetError> {

src/json_get_float.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,24 @@ impl ScalarUDFImpl for JsonGetFloat {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..]
69+
.iter()
70+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
71+
{
72+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
73+
} else {
74+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
75+
}
76+
}
5977
}
6078

6179
impl InvokeResult for Float64Array {

src/json_get_int.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,24 @@ impl ScalarUDFImpl for JsonGetInt {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..]
69+
.iter()
70+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
71+
{
72+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
73+
} else {
74+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
75+
}
76+
}
5977
}
6078

6179
impl InvokeResult for Int64Array {

src/json_get_json.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,24 @@ impl ScalarUDFImpl for JsonGetJson {
5454
fn aliases(&self) -> &[String] {
5555
&self.aliases
5656
}
57+
58+
fn placement(
59+
&self,
60+
args: &[datafusion::logical_expr::ExpressionPlacement],
61+
) -> datafusion::logical_expr::ExpressionPlacement {
62+
// If the first argument is a column and the remaining arguments are literals (a path)
63+
// then we can push this UDF down to the leaf nodes.
64+
if args.len() >= 2
65+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
66+
&& args[1..]
67+
.iter()
68+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
69+
{
70+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
71+
} else {
72+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
73+
}
74+
}
5775
}
5876

5977
fn jiter_json_get_json(opt_json: Option<&str>, path: &[JsonPath]) -> Result<String, GetError> {

src/json_get_str.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,24 @@ impl ScalarUDFImpl for JsonGetStr {
5555
fn aliases(&self) -> &[String] {
5656
&self.aliases
5757
}
58+
59+
fn placement(
60+
&self,
61+
args: &[datafusion::logical_expr::ExpressionPlacement],
62+
) -> datafusion::logical_expr::ExpressionPlacement {
63+
// If the first argument is a column and the remaining arguments are literals (a path)
64+
// then we can push this UDF down to the leaf nodes.
65+
if args.len() >= 2
66+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
67+
&& args[1..]
68+
.iter()
69+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
70+
{
71+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
72+
} else {
73+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
74+
}
75+
}
5876
}
5977

6078
fn jiter_json_get_str(json_data: Option<&str>, path: &[JsonPath]) -> Result<String, GetError> {

src/json_length.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,24 @@ impl ScalarUDFImpl for JsonLength {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..]
69+
.iter()
70+
.all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
71+
{
72+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
73+
} else {
74+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
75+
}
76+
}
5977
}
6078

6179
impl InvokeResult for UInt64Array {

0 commit comments

Comments
 (0)