Skip to content

Commit 3a907c5

Browse files
Add ExpressionPlacement::Leaf for JSON UDFs to enable scan-level pushdown
This allows the ExtractLeafExpressions optimizer to push JSON accessor functions (json_as_text, json_get, etc.) into the Parquet scan projection, where custom PhysicalExprAdapters can rewrite them for shredded/reduced column fallback. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8fad18d commit 3a907c5

12 files changed

Lines changed: 179 additions & 3 deletions

src/json_as_text.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ impl ScalarUDFImpl for JsonAsText {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
69+
{
70+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
71+
} else {
72+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
73+
}
74+
}
5975
}
6076

6177
impl InvokeResult for StringArray {

src/json_contains.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,22 @@ impl ScalarUDFImpl for JsonContains {
6060
fn aliases(&self) -> &[String] {
6161
&self.aliases
6262
}
63+
64+
fn placement(
65+
&self,
66+
args: &[datafusion::logical_expr::ExpressionPlacement],
67+
) -> datafusion::logical_expr::ExpressionPlacement {
68+
// If the first argument is a column and the remaining arguments are literals (a path)
69+
// then we can push this UDF down to the leaf nodes.
70+
if args.len() >= 2
71+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
72+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
73+
{
74+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
75+
} else {
76+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
77+
}
78+
}
6379
}
6480

6581
impl InvokeResult for BooleanArray {

src/json_get.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,22 @@ impl ScalarUDFImpl for JsonGet {
6262
fn aliases(&self) -> &[String] {
6363
&self.aliases
6464
}
65+
66+
fn placement(
67+
&self,
68+
args: &[datafusion::logical_expr::ExpressionPlacement],
69+
) -> datafusion::logical_expr::ExpressionPlacement {
70+
// If the first argument is a column and the remaining arguments are literals (a path)
71+
// then we can push this UDF down to the leaf nodes.
72+
if args.len() >= 2
73+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
74+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
75+
{
76+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
77+
} else {
78+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
79+
}
80+
}
6581
}
6682

6783
impl InvokeResult for JsonUnion {

src/json_get_array.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,22 @@ impl ScalarUDFImpl for JsonGetArray {
6464
fn aliases(&self) -> &[String] {
6565
&self.aliases
6666
}
67+
68+
fn placement(
69+
&self,
70+
args: &[datafusion::logical_expr::ExpressionPlacement],
71+
) -> datafusion::logical_expr::ExpressionPlacement {
72+
// If the first argument is a column and the remaining arguments are literals (a path)
73+
// then we can push this UDF down to the leaf nodes.
74+
if args.len() >= 2
75+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
76+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
77+
{
78+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
79+
} else {
80+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
81+
}
82+
}
6783
}
6884

6985
#[derive(Debug)]

src/json_get_bool.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,22 @@ impl ScalarUDFImpl for JsonGetBool {
5555
fn aliases(&self) -> &[String] {
5656
&self.aliases
5757
}
58+
59+
fn placement(
60+
&self,
61+
args: &[datafusion::logical_expr::ExpressionPlacement],
62+
) -> datafusion::logical_expr::ExpressionPlacement {
63+
// If the first argument is a column and the remaining arguments are literals (a path)
64+
// then we can push this UDF down to the leaf nodes.
65+
if args.len() >= 2
66+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
67+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
68+
{
69+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
70+
} else {
71+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
72+
}
73+
}
5874
}
5975

6076
fn jiter_json_get_bool(json_data: Option<&str>, path: &[JsonPath]) -> Result<bool, GetError> {

src/json_get_float.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ impl ScalarUDFImpl for JsonGetFloat {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
69+
{
70+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
71+
} else {
72+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
73+
}
74+
}
5975
}
6076

6177
impl InvokeResult for Float64Array {

src/json_get_int.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ impl ScalarUDFImpl for JsonGetInt {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
69+
{
70+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
71+
} else {
72+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
73+
}
74+
}
5975
}
6076

6177
impl InvokeResult for Int64Array {

src/json_get_json.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,22 @@ impl ScalarUDFImpl for JsonGetJson {
5454
fn aliases(&self) -> &[String] {
5555
&self.aliases
5656
}
57+
58+
fn placement(
59+
&self,
60+
args: &[datafusion::logical_expr::ExpressionPlacement],
61+
) -> datafusion::logical_expr::ExpressionPlacement {
62+
// If the first argument is a column and the remaining arguments are literals (a path)
63+
// then we can push this UDF down to the leaf nodes.
64+
if args.len() >= 2
65+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
66+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
67+
{
68+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
69+
} else {
70+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
71+
}
72+
}
5773
}
5874

5975
fn jiter_json_get_json(opt_json: Option<&str>, path: &[JsonPath]) -> Result<String, GetError> {

src/json_get_str.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,22 @@ impl ScalarUDFImpl for JsonGetStr {
5555
fn aliases(&self) -> &[String] {
5656
&self.aliases
5757
}
58+
59+
fn placement(
60+
&self,
61+
args: &[datafusion::logical_expr::ExpressionPlacement],
62+
) -> datafusion::logical_expr::ExpressionPlacement {
63+
// If the first argument is a column and the remaining arguments are literals (a path)
64+
// then we can push this UDF down to the leaf nodes.
65+
if args.len() >= 2
66+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
67+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
68+
{
69+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
70+
} else {
71+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
72+
}
73+
}
5874
}
5975

6076
fn jiter_json_get_str(json_data: Option<&str>, path: &[JsonPath]) -> Result<String, GetError> {

src/json_length.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ impl ScalarUDFImpl for JsonLength {
5656
fn aliases(&self) -> &[String] {
5757
&self.aliases
5858
}
59+
60+
fn placement(
61+
&self,
62+
args: &[datafusion::logical_expr::ExpressionPlacement],
63+
) -> datafusion::logical_expr::ExpressionPlacement {
64+
// If the first argument is a column and the remaining arguments are literals (a path)
65+
// then we can push this UDF down to the leaf nodes.
66+
if args.len() >= 2
67+
&& matches!(args[0], datafusion::logical_expr::ExpressionPlacement::Column)
68+
&& args[1..].iter().all(|arg| matches!(arg, datafusion::logical_expr::ExpressionPlacement::Literal))
69+
{
70+
datafusion::logical_expr::ExpressionPlacement::MoveTowardsLeafNodes
71+
} else {
72+
datafusion::logical_expr::ExpressionPlacement::KeepInPlace
73+
}
74+
}
5975
}
6076

6177
impl InvokeResult for UInt64Array {

0 commit comments

Comments
 (0)