CASE WHEN expression with execution context and DataFusion equivalence tests

lukekim · lukekim · commit 09fff48c9309 · 2026-01-29T14:01:53.000-08:00
diff --git a/vortex-array/benches/expr/case_when_bench.rs b/vortex-array/benches/expr/case_when_bench.rs
@@ -4,19 +4,28 @@
 #![allow(clippy::unwrap_used)]
 #![allow(clippy::cast_possible_truncation)]
 
+use std::sync::LazyLock;
+
 use divan::Bencher;
 use vortex_array::ArrayRef;
+use vortex_array::Canonical;
 use vortex_array::IntoArray;
+use vortex_array::VortexSessionExecute;
 use vortex_array::arrays::StructArray;
 use vortex_array::expr::case_when;
 use vortex_array::expr::get_item;
 use vortex_array::expr::gt;
 use vortex_array::expr::lit;
 use vortex_array::expr::nested_case_when;
 use vortex_array::expr::root;
+use vortex_array::session::ArraySession;
 use vortex_array::validity::Validity;
 use vortex_buffer::Buffer;
 use vortex_dtype::FieldNames;
+use vortex_session::VortexSession;
+
+static SESSION: LazyLock<VortexSession> =
+    LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
 
 fn main() {
     divan::main();
@@ -49,7 +58,14 @@ fn case_when_simple(bencher: Bencher, size: usize) {
 
     bencher
         .with_inputs(|| (&expr, &array))
-        .bench_refs(|(expr, array)| expr.evaluate(array).unwrap());
+        .bench_refs(|(expr, array)| {
+            let mut ctx = SESSION.create_execution_ctx();
+            array
+                .apply(expr)
+                .unwrap()
+                .execute::<Canonical>(&mut ctx)
+                .unwrap()
+        });
 }
 
 /// Benchmark nested CASE WHEN with multiple conditions.
@@ -69,7 +85,14 @@ fn case_when_nested_3_conditions(bencher: Bencher, size: usize) {
 
     bencher
         .with_inputs(|| (&expr, &array))
-        .bench_refs(|(expr, array)| expr.evaluate(array).unwrap());
+        .bench_refs(|(expr, array)| {
+            let mut ctx = SESSION.create_execution_ctx();
+            array
+                .apply(expr)
+                .unwrap()
+                .execute::<Canonical>(&mut ctx)
+                .unwrap()
+        });
 }
 
 /// Benchmark CASE WHEN where all conditions are true (short-circuit path).
@@ -86,7 +109,14 @@ fn case_when_all_true(bencher: Bencher, size: usize) {
 
     bencher
         .with_inputs(|| (&expr, &array))
-        .bench_refs(|(expr, array)| expr.evaluate(array).unwrap());
+        .bench_refs(|(expr, array)| {
+            let mut ctx = SESSION.create_execution_ctx();
+            array
+                .apply(expr)
+                .unwrap()
+                .execute::<Canonical>(&mut ctx)
+                .unwrap()
+        });
 }
 
 /// Benchmark CASE WHEN where all conditions are false (short-circuit path).
@@ -103,5 +133,12 @@ fn case_when_all_false(bencher: Bencher, size: usize) {
 
     bencher
         .with_inputs(|| (&expr, &array))
-        .bench_refs(|(expr, array)| expr.evaluate(array).unwrap());
+        .bench_refs(|(expr, array)| {
+            let mut ctx = SESSION.create_execution_ctx();
+            array
+                .apply(expr)
+                .unwrap()
+                .execute::<Canonical>(&mut ctx)
+                .unwrap()
+        });
 }
diff --git a/vortex-array/src/expr/exprs/case_when.rs b/vortex-array/src/expr/exprs/case_when.rs
@@ -287,16 +287,21 @@ pub fn nested_case_when(
 
 #[cfg(test)]
 mod tests {
+    use std::sync::LazyLock;
+
     use vortex_buffer::buffer;
     use vortex_dtype::DType;
     use vortex_dtype::Nullability;
     use vortex_dtype::PType;
     use vortex_error::VortexExpect as _;
     use vortex_scalar::Scalar;
+    use vortex_session::VortexSession;
 
     use super::*;
+    use crate::Canonical;
     use crate::IntoArray;
     use crate::ToCanonical;
+    use crate::VortexSessionExecute as _;
     use crate::arrays::BoolArray;
     use crate::arrays::PrimitiveArray;
     use crate::arrays::StructArray;
@@ -307,6 +312,21 @@ mod tests {
     use crate::expr::exprs::literal::lit;
     use crate::expr::exprs::root::root;
     use crate::expr::test_harness;
+    use crate::session::ArraySession;
+
+    static SESSION: LazyLock<VortexSession> =
+        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
+
+    /// Helper to evaluate an expression using the apply+execute pattern
+    fn evaluate_expr(expr: &Expression, array: &ArrayRef) -> ArrayRef {
+        let mut ctx = SESSION.create_execution_ctx();
+        array
+            .apply(expr)
+            .unwrap()
+            .execute::<Canonical>(&mut ctx)
+            .unwrap()
+            .into_array()
+    }
 
     // ==================== Serialization Tests ====================
 
@@ -455,7 +475,7 @@ mod tests {
             lit(0i32),
         );
 
-        let result = expr.evaluate(&test_array).unwrap().to_primitive();
+        let result = evaluate_expr(&expr, &test_array).to_primitive();
         assert_eq!(result.as_slice::<i32>(), &[0, 0, 100, 100, 100]);
     }
 
@@ -475,7 +495,7 @@ mod tests {
             Some(lit(0i32)),
         );
 
-        let result = expr.evaluate(&test_array).unwrap().to_primitive();
+        let result = evaluate_expr(&expr, &test_array).to_primitive();
         assert_eq!(result.as_slice::<i32>(), &[10, 0, 30, 0, 0]);
     }
 
@@ -495,7 +515,7 @@ mod tests {
             Some(lit(0i32)),
         );
 
-        let result = expr.evaluate(&test_array).unwrap().to_primitive();
+        let result = evaluate_expr(&expr, &test_array).to_primitive();
         assert_eq!(result.as_slice::<i32>(), &[0, 0, 100, 100, 100]);
     }
 
@@ -508,7 +528,7 @@ mod tests {
 
         let expr = case_when_no_else(gt(get_item("value", root()), lit(3i32)), lit(100i32));
 
-        let result = expr.evaluate(&test_array).unwrap();
+        let result = evaluate_expr(&expr, &test_array);
         assert!(result.dtype().is_nullable());
 
         assert_eq!(
@@ -546,7 +566,7 @@ mod tests {
             lit(0i32),
         );
 
-        let result = expr.evaluate(&test_array).unwrap().to_primitive();
+        let result = evaluate_expr(&expr, &test_array).to_primitive();
         assert_eq!(result.as_slice::<i32>(), &[0, 0, 0, 0, 0]);
     }
 
@@ -563,15 +583,15 @@ mod tests {
             lit(0i32),
         );
 
-        let result = expr.evaluate(&test_array).unwrap().to_primitive();
+        let result = evaluate_expr(&expr, &test_array).to_primitive();
         assert_eq!(result.as_slice::<i32>(), &[100, 100, 100, 100, 100]);
     }
 
     #[test]
     fn test_evaluate_with_literal_condition() {
         let test_array = buffer![1i32, 2, 3].into_array();
         let expr = case_when(lit(true), lit(100i32), lit(0i32));
-        let result = expr.evaluate(&test_array).unwrap();
+        let result = evaluate_expr(&expr, &test_array);
 
         if let Some(constant) = result.as_constant() {
             assert_eq!(constant, Scalar::from(100i32));
@@ -594,9 +614,9 @@ mod tests {
             lit(false),
         );
 
-        let result = expr.evaluate(&test_array).unwrap().to_bool();
+        let result = evaluate_expr(&expr, &test_array).to_bool();
         assert_eq!(
-            result.bit_buffer().iter().collect::<Vec<_>>(),
+            result.to_bit_buffer().iter().collect::<Vec<_>>(),
             vec![false, false, true, true, true]
         );
     }
@@ -612,7 +632,7 @@ mod tests {
 
         let expr = case_when(get_item("cond", root()), lit(100i32), lit(0i32));
 
-        let result = expr.evaluate(&test_array).unwrap().to_primitive();
+        let result = evaluate_expr(&expr, &test_array).to_primitive();
         assert_eq!(result.as_slice::<i32>(), &[100, 0, 0, 0, 100]);
     }
 
@@ -635,7 +655,7 @@ mod tests {
             lit(0i32),
         );
 
-        let result = expr.evaluate(&test_array).unwrap();
+        let result = evaluate_expr(&expr, &test_array);
         let prim = result.to_primitive();
         assert_eq!(prim.as_slice::<i32>(), &[0, 0, 30, 40, 50]);
     }
@@ -651,12 +671,11 @@ mod tests {
 
         let expr = case_when(get_item("cond", root()), lit(100i32), lit(0i32));
 
-        let result = expr.evaluate(&test_array).unwrap().to_primitive();
+        let result = evaluate_expr(&expr, &test_array).to_primitive();
         assert_eq!(result.as_slice::<i32>(), &[0, 0, 0]);
     }
 
-    // Note: Direct execute tests are covered through evaluate tests above,
-    // since evaluate() calls execute() internally.
+    // Note: Direct execute tests are covered through apply+execute tests above.
 
     // Note: The binary CASE WHEN implementation using `zip` does NOT provide
     // short-circuit/lazy evaluation. All child expressions are evaluated first,
diff --git a/vortex-datafusion/src/convert/exprs.rs b/vortex-datafusion/src/convert/exprs.rs
@@ -847,4 +847,96 @@ mod tests {
 
         assert!(!can_be_pushed_down_impl(&like_expr, &test_schema));
     }
+
+    /// Test that applying a CASE expression to an Arrow RecordBatch using DataFusion
+    /// matches the result of applying the converted Vortex expression.
+    #[test]
+    fn test_case_when_datafusion_vortex_equivalence() {
+        use datafusion::arrow::array::Int32Array;
+        use datafusion::arrow::array::RecordBatch;
+        use datafusion_physical_expr::expressions::CaseExpr;
+        use vortex::VortexSessionDefault;
+        use vortex::array::ArrayRef;
+        use vortex::array::Canonical;
+        use vortex::array::VortexSessionExecute as _;
+        use vortex::array::arrow::FromArrowArray;
+        use vortex::session::VortexSession;
+
+        // Create test data
+        let values = Arc::new(Int32Array::from(vec![1, 5, 10, 15, 20]));
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            DataType::Int32,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+
+        // Build a DataFusion CASE expression:
+        // CASE WHEN value > 10 THEN 100 WHEN value > 5 THEN 50 ELSE 0 END
+        let col_value = Arc::new(df_expr::Column::new("value", 0)) as Arc<dyn PhysicalExpr>;
+        let lit_10 =
+            Arc::new(df_expr::Literal::new(ScalarValue::Int32(Some(10)))) as Arc<dyn PhysicalExpr>;
+        let lit_5 =
+            Arc::new(df_expr::Literal::new(ScalarValue::Int32(Some(5)))) as Arc<dyn PhysicalExpr>;
+        let lit_100 =
+            Arc::new(df_expr::Literal::new(ScalarValue::Int32(Some(100)))) as Arc<dyn PhysicalExpr>;
+        let lit_50 =
+            Arc::new(df_expr::Literal::new(ScalarValue::Int32(Some(50)))) as Arc<dyn PhysicalExpr>;
+        let lit_0 =
+            Arc::new(df_expr::Literal::new(ScalarValue::Int32(Some(0)))) as Arc<dyn PhysicalExpr>;
+
+        // WHEN value > 10 THEN 100
+        let when1 = Arc::new(df_expr::BinaryExpr::new(
+            col_value.clone(),
+            DFOperator::Gt,
+            lit_10,
+        )) as Arc<dyn PhysicalExpr>;
+        // WHEN value > 5 THEN 50
+        let when2 = Arc::new(df_expr::BinaryExpr::new(col_value, DFOperator::Gt, lit_5))
+            as Arc<dyn PhysicalExpr>;
+
+        let case_expr =
+            CaseExpr::try_new(None, vec![(when1, lit_100), (when2, lit_50)], Some(lit_0)).unwrap();
+
+        // Apply DataFusion expression
+        let df_result = case_expr.evaluate(&batch).unwrap();
+        let df_array = df_result.into_array(batch.num_rows()).unwrap();
+
+        // Convert to Vortex expression
+        let expr_convertor = DefaultExpressionConvertor::default();
+        let vortex_expr = expr_convertor.try_convert_case_expr(&case_expr).unwrap();
+
+        // Convert batch to Vortex array
+        let vortex_array: ArrayRef = ArrayRef::from_arrow(&batch, false).unwrap();
+
+        // Apply Vortex expression
+        let session = VortexSession::default();
+        let mut ctx = session.create_execution_ctx();
+        let vortex_result = vortex_array
+            .apply(&vortex_expr)
+            .unwrap()
+            .execute::<Canonical>(&mut ctx)
+            .unwrap();
+
+        // Convert back to Arrow for comparison
+        let vortex_as_arrow = vortex_result.into_primitive().as_slice::<i32>().to_vec();
+
+        // Convert DataFusion result to Vec for comparison
+        let df_as_arrow: Vec<i32> = df_array
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap()
+            .values()
+            .to_vec();
+
+        // Compare results
+        // Expected: [0, 0, 50, 100, 100] for values [1, 5, 10, 15, 20]
+        // value=1: not > 10, not > 5 -> ELSE 0
+        // value=5: not > 10, not > 5 -> ELSE 0
+        // value=10: not > 10, > 5 -> 50
+        // value=15: > 10 -> 100
+        // value=20: > 10 -> 100
+        assert_eq!(df_as_arrow, vec![0, 0, 50, 100, 100]);
+        assert_eq!(vortex_as_arrow, df_as_arrow);
+    }
 }