Skip to content

Commit c2899d2

Browse files
authored
Don't return an error when we have an unsupported node, bubble up "TRUE" as in keep for that node, up to any and or or node and handle empty IN list. (#8)
1 parent b701e1e commit c2899d2

2 files changed

Lines changed: 122 additions & 9 deletions

File tree

vortex-datafusion/src/convert/exprs.rs

Lines changed: 121 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,14 @@ pub(crate) fn make_vortex_predicate(
4040
return None;
4141
}
4242

43-
let expr = Expression::try_from_df(e.as_ref());
44-
45-
Some(expr)
43+
// If conversion fails, skip this expression (equivalent to lit(true) in AND conjunction)
44+
Expression::try_from_df(e.as_ref()).ok()
4645
})
47-
.collect::<VortexResult<Vec<_>>>()?;
46+
.collect::<Vec<_>>();
4847

4948
Ok(exprs.into_iter().reduce(and))
5049
}
5150

52-
// TODO(joe): Don't return an error when we have an unsupported node, bubble up "TRUE" as in keep
53-
// for that node, up to any `and` or `or` node.
5451
impl TryFromDataFusion<dyn PhysicalExpr> for Expression {
5552
fn try_from_df(df: &dyn PhysicalExpr) -> VortexResult<Self> {
5653
if let Some(binary_expr) = df.as_any().downcast_ref::<df_expr::BinaryExpr>() {
@@ -112,8 +109,13 @@ impl TryFromDataFusion<dyn PhysicalExpr> for Expression {
112109
})
113110
.try_collect()?;
114111

112+
// Handle empty IN list: `x IN ()` is always false, `x NOT IN ()` is always true
113+
let Some(first_element) = list_elements.first() else {
114+
return Ok(lit(in_list.negated()));
115+
};
116+
115117
let list = Scalar::list(
116-
list_elements[0].dtype().clone(),
118+
first_element.dtype().clone(),
117119
list_elements,
118120
Nullability::Nullable,
119121
);
@@ -294,7 +296,7 @@ fn supported_data_types(dt: &DataType) -> bool {
294296
);
295297

296298
if !is_supported {
297-
log::debug!("DataFusion data type {dt:?} is not supported");
299+
tracing::debug!(data_type = ?dt, "DataFusion data type is not supported");
298300
}
299301

300302
is_supported
@@ -502,6 +504,28 @@ mod tests {
502504
);
503505
}
504506

507+
#[test]
508+
fn test_expr_from_df_in_list_empty_returns_false() {
509+
// `x IN ()` should return lit(false)
510+
let value = Arc::new(df_expr::Column::new("col", 0)) as Arc<dyn PhysicalExpr>;
511+
let in_list = df_expr::InListExpr::new(value, vec![], false, None);
512+
513+
let result = Expression::try_from_df(&in_list).unwrap();
514+
515+
assert_snapshot!(result.display_tree().to_string(), @"vortex.literal false");
516+
}
517+
518+
#[test]
519+
fn test_expr_from_df_in_list_empty_negated_returns_true() {
520+
// `x NOT IN ()` should return lit(true)
521+
let value = Arc::new(df_expr::Column::new("col", 0)) as Arc<dyn PhysicalExpr>;
522+
let in_list = df_expr::InListExpr::new(value, vec![], true, None);
523+
524+
let result = Expression::try_from_df(&in_list).unwrap();
525+
526+
assert_snapshot!(result.display_tree().to_string(), @"vortex.literal true");
527+
}
528+
505529
#[rstest]
506530
// Supported types
507531
#[case::null(DataType::Null, true)]
@@ -705,4 +729,93 @@ mod tests {
705729

706730
assert_eq!(can_be_pushed_down(&get_field_expr, &schema), expected);
707731
}
732+
733+
/// Create an unsupported scalar function expression (simulating functions like to_timestamp)
734+
fn create_unsupported_scalar_fn() -> Arc<dyn PhysicalExpr> {
735+
use datafusion_functions::datetime::to_timestamp::ToTimestampFunc;
736+
737+
let arg = Arc::new(df_expr::Literal::new(ScalarValue::Utf8(Some(
738+
"2024-01-01".to_string(),
739+
)))) as Arc<dyn PhysicalExpr>;
740+
741+
Arc::new(ScalarFunctionExpr::new(
742+
"to_timestamp",
743+
Arc::new(ScalarUDF::from(ToTimestampFunc::new())),
744+
vec![arg],
745+
Arc::new(Field::new(
746+
"result",
747+
DataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
748+
true,
749+
)),
750+
Arc::new(ConfigOptions::new()),
751+
))
752+
}
753+
754+
#[test]
755+
fn test_make_vortex_predicate_skips_unsupported_scalar_function() {
756+
// Unsupported scalar function like to_timestamp should be skipped, not error
757+
let unsupported_fn = create_unsupported_scalar_fn();
758+
let result = make_vortex_predicate(&[&unsupported_fn]);
759+
760+
// Should succeed (not error) and return None since the only expression was skipped
761+
assert!(result.is_ok());
762+
assert!(result.unwrap().is_none());
763+
}
764+
765+
#[test]
766+
fn test_make_vortex_predicate_combines_supported_and_skips_unsupported() {
767+
// Mix of supported column expression and unsupported scalar function
768+
let supported_col = Arc::new(df_expr::Column::new("test", 0)) as Arc<dyn PhysicalExpr>;
769+
let unsupported_fn = create_unsupported_scalar_fn();
770+
771+
let result = make_vortex_predicate(&[&supported_col, &unsupported_fn]);
772+
773+
// Should succeed and return the supported expression only
774+
assert!(result.is_ok());
775+
let predicate = result.unwrap();
776+
assert!(predicate.is_some());
777+
778+
// The result should just be the column expression since the unsupported one was skipped
779+
assert_snapshot!(predicate.unwrap().display_tree().to_string(), @r#"
780+
vortex.get_item "test"
781+
└── input: vortex.root
782+
"#);
783+
}
784+
785+
#[test]
786+
fn test_make_vortex_predicate_multiple_supported_with_unsupported() {
787+
// Two supported columns and one unsupported function
788+
let col1 = Arc::new(df_expr::Column::new("col1", 0)) as Arc<dyn PhysicalExpr>;
789+
let col2 = Arc::new(df_expr::Column::new("col2", 1)) as Arc<dyn PhysicalExpr>;
790+
let unsupported_fn = create_unsupported_scalar_fn();
791+
792+
let result = make_vortex_predicate(&[&col1, &unsupported_fn, &col2]);
793+
794+
// Should succeed and return AND of the two supported expressions
795+
assert!(result.is_ok());
796+
let predicate = result.unwrap();
797+
assert!(predicate.is_some());
798+
799+
// The result should be an AND of col1 and col2
800+
assert_snapshot!(predicate.unwrap().display_tree().to_string(), @r#"
801+
vortex.binary and
802+
├── lhs: vortex.get_item "col1"
803+
│ └── input: vortex.root
804+
└── rhs: vortex.get_item "col2"
805+
└── input: vortex.root
806+
"#);
807+
}
808+
809+
#[test]
810+
fn test_make_vortex_predicate_all_unsupported_returns_none() {
811+
// When all expressions are unsupported, should return None (no filter)
812+
let unsupported_fn1 = create_unsupported_scalar_fn();
813+
let unsupported_fn2 = create_unsupported_scalar_fn();
814+
815+
let result = make_vortex_predicate(&[&unsupported_fn1, &unsupported_fn2]);
816+
817+
// Should succeed and return None
818+
assert!(result.is_ok());
819+
assert!(result.unwrap().is_none());
820+
}
708821
}

vortex-datafusion/src/persistent/format.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use vortex::error::{VortexExpect, VortexResult, vortex_err};
3535
use vortex::file::VORTEX_FILE_EXTENSION;
3636
use vortex::scalar::Scalar;
3737
use vortex::session::VortexSession;
38-
use vortex::stats::{ArrayStats, Stat, StatsSet};
38+
use vortex::stats::{Stat, StatsSet};
3939
use vortex::{VortexSessionDefault, stats};
4040

4141
use super::cache::VortexFileCache;

0 commit comments

Comments
 (0)