@@ -107,12 +107,43 @@ fn evaluate_predicate_recursive(
107107 }
108108 }
109109 Predicate :: Not ( predicate) => {
110- // For NOT: evaluate predicate, then negate
111- let mut temp_result = vec ! [ true ; result. len( ) ] ;
112- evaluate_predicate_recursive ( predicate, row_index, schema, & mut temp_result) ?;
113- // NOT logic: result[i] = !temp_result[i]
114- for ( r, t) in result. iter_mut ( ) . zip ( temp_result. iter ( ) ) {
115- * r = !* t;
110+ match & * * predicate {
111+ Predicate :: Not ( inner) => {
112+ evaluate_predicate_recursive ( inner, row_index, schema, result) ?;
113+ }
114+ Predicate :: IsNull { column } => {
115+ evaluate_is_not_null ( column, row_index, schema, result) ?;
116+ }
117+ Predicate :: IsNotNull { column } => {
118+ evaluate_is_null ( column, row_index, schema, result) ?;
119+ }
120+ Predicate :: Comparison { column, op, value } => {
121+ evaluate_comparison ( column, op. negate ( ) , value, row_index, schema, result) ?;
122+ }
123+ Predicate :: And ( predicates) => {
124+ let not_preds: Vec < Predicate > = predicates
125+ . iter ( )
126+ . map ( |p| Predicate :: Not ( Box :: new ( p. clone ( ) ) ) )
127+ . collect ( ) ;
128+ evaluate_predicate_recursive (
129+ & Predicate :: Or ( not_preds) ,
130+ row_index,
131+ schema,
132+ result,
133+ ) ?;
134+ }
135+ Predicate :: Or ( predicates) => {
136+ let not_preds: Vec < Predicate > = predicates
137+ . iter ( )
138+ . map ( |p| Predicate :: Not ( Box :: new ( p. clone ( ) ) ) )
139+ . collect ( ) ;
140+ evaluate_predicate_recursive (
141+ & Predicate :: And ( not_preds) ,
142+ row_index,
143+ schema,
144+ result,
145+ ) ?;
146+ }
116147 }
117148 }
118149 }
@@ -1015,26 +1046,296 @@ mod tests {
10151046 }
10161047
10171048 #[ test]
1018- fn test_evaluate_predicate_missing_statistics ( ) {
1049+ fn test_evaluate_predicate_not_is_null ( ) {
1050+ use crate :: predicate:: Predicate ;
1051+ use crate :: row_index:: { RowGroupEntry , RowGroupIndex } ;
1052+ use std:: collections:: HashMap ;
1053+
1054+ // Create row index with mixed nulls and values
1055+ let mut columns = HashMap :: new ( ) ;
1056+ let entries = vec ! [
1057+ RowGroupEntry :: new(
1058+ Some ( {
1059+ let proto_stats = proto:: ColumnStatistics {
1060+ number_of_values: Some ( 5000 ) ,
1061+ has_null: Some ( true ) ,
1062+ int_statistics: Some ( proto:: IntegerStatistics {
1063+ minimum: Some ( 18 ) ,
1064+ maximum: Some ( 25 ) ,
1065+ sum: Some ( 107500 ) ,
1066+ } ) ,
1067+ ..Default :: default ( )
1068+ } ;
1069+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1070+ } ) ,
1071+ vec![ ] ,
1072+ ) ,
1073+ ] ;
1074+ columns. insert ( 1 , RowGroupIndex :: new ( entries, 10000 , 1 ) ) ;
1075+ let row_index = StripeRowIndex :: new ( columns, 10000 , 10000 ) ;
1076+ let schema = create_test_schema ( ) ;
1077+
1078+ // Test: Not(age IS NULL) -> age IS NOT NULL
1079+ let predicate = Predicate :: not ( Predicate :: is_null ( "age" ) ) ;
1080+ let result = super :: evaluate_predicate ( & predicate, & row_index, & schema) . unwrap ( ) ;
1081+
1082+ assert_eq ! ( result. len( ) , 1 ) ;
1083+ assert ! ( result[ 0 ] ) ; // Should keep because there are non-null values
1084+ }
1085+
1086+ #[ test]
1087+ fn test_evaluate_predicate_not_is_not_null ( ) {
1088+ use crate :: predicate:: Predicate ;
1089+ use crate :: row_index:: { RowGroupEntry , RowGroupIndex } ;
1090+ use std:: collections:: HashMap ;
1091+
1092+ // Create row index with mixed nulls and values
1093+ let mut columns = HashMap :: new ( ) ;
1094+ let entries = vec ! [
1095+ // Row group 0: Has nulls (and values)
1096+ RowGroupEntry :: new(
1097+ Some ( {
1098+ let proto_stats = proto:: ColumnStatistics {
1099+ number_of_values: Some ( 5000 ) ,
1100+ has_null: Some ( true ) ,
1101+ int_statistics: Some ( proto:: IntegerStatistics {
1102+ minimum: Some ( 18 ) ,
1103+ maximum: Some ( 25 ) ,
1104+ sum: Some ( 107500 ) ,
1105+ } ) ,
1106+ ..Default :: default ( )
1107+ } ;
1108+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1109+ } ) ,
1110+ vec![ ] ,
1111+ ) ,
1112+ // Row group 1: No nulls
1113+ RowGroupEntry :: new(
1114+ Some ( {
1115+ let proto_stats = proto:: ColumnStatistics {
1116+ number_of_values: Some ( 5000 ) ,
1117+ has_null: Some ( false ) ,
1118+ int_statistics: Some ( proto:: IntegerStatistics {
1119+ minimum: Some ( 26 ) ,
1120+ maximum: Some ( 65 ) ,
1121+ sum: Some ( 227500 ) ,
1122+ } ) ,
1123+ ..Default :: default ( )
1124+ } ;
1125+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1126+ } ) ,
1127+ vec![ ] ,
1128+ ) ,
1129+ ] ;
1130+ columns. insert ( 1 , RowGroupIndex :: new ( entries, 10000 , 1 ) ) ;
1131+ let row_index = StripeRowIndex :: new ( columns, 20000 , 10000 ) ;
1132+ let schema = create_test_schema ( ) ;
1133+
1134+ // Test: Not(age IS NOT NULL) -> age IS NULL
1135+ let predicate = Predicate :: not ( Predicate :: is_not_null ( "age" ) ) ;
1136+ let result = super :: evaluate_predicate ( & predicate, & row_index, & schema) . unwrap ( ) ;
1137+
1138+ assert_eq ! ( result. len( ) , 2 ) ;
1139+ assert ! ( result[ 0 ] ) ; // Row group 0: has_null = true -> Keep
1140+ assert ! ( !result[ 1 ] ) ; // Row group 1: has_null = false -> Skip
1141+ }
1142+
1143+ #[ test]
1144+ fn test_evaluate_predicate_not_comparison ( ) {
1145+ use crate :: predicate:: { Predicate , PredicateValue } ;
1146+ use crate :: row_index:: { RowGroupEntry , RowGroupIndex } ;
1147+ use std:: collections:: HashMap ;
1148+
1149+ let mut columns = HashMap :: new ( ) ;
1150+ // Row group: [0, 10]
1151+ let entries = vec ! [ RowGroupEntry :: new(
1152+ Some ( {
1153+ let proto_stats = proto:: ColumnStatistics {
1154+ number_of_values: Some ( 1000 ) ,
1155+ has_null: Some ( false ) ,
1156+ int_statistics: Some ( proto:: IntegerStatistics {
1157+ minimum: Some ( 0 ) ,
1158+ maximum: Some ( 10 ) ,
1159+ sum: Some ( 5000 ) ,
1160+ } ) ,
1161+ ..Default :: default ( )
1162+ } ;
1163+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1164+ } ) ,
1165+ vec![ ] ,
1166+ ) ] ;
1167+ columns. insert ( 1 , RowGroupIndex :: new ( entries, 10000 , 1 ) ) ;
1168+ let row_index = StripeRowIndex :: new ( columns, 10000 , 10000 ) ;
1169+ let schema = create_test_schema ( ) ;
1170+
1171+ // Test: Not(age > 5) -> age <= 5
1172+ let predicate = Predicate :: not ( Predicate :: gt ( "age" , PredicateValue :: Int32 ( Some ( 5 ) ) ) ) ;
1173+ let result = super :: evaluate_predicate ( & predicate, & row_index, & schema) . unwrap ( ) ;
1174+
1175+ assert_eq ! ( result. len( ) , 1 ) ;
1176+ assert ! ( result[ 0 ] ) ;
1177+ }
1178+
1179+ #[ test]
1180+ fn test_evaluate_predicate_not_and ( ) {
10191181 use crate :: predicate:: { Predicate , PredicateValue } ;
10201182 use crate :: row_index:: { RowGroupEntry , RowGroupIndex } ;
10211183 use std:: collections:: HashMap ;
10221184
1023- // Create row index with missing statistics
1185+ // Row group 1: [0, 10]
1186+ // Row group 2: [20, 30]
10241187 let mut columns = HashMap :: new ( ) ;
10251188 let entries = vec ! [
1026- RowGroupEntry :: new( None , vec![ ] ) , // No statistics
1189+ RowGroupEntry :: new(
1190+ Some ( {
1191+ let proto_stats = proto:: ColumnStatistics {
1192+ number_of_values: Some ( 1000 ) ,
1193+ has_null: Some ( false ) ,
1194+ int_statistics: Some ( proto:: IntegerStatistics {
1195+ minimum: Some ( 0 ) ,
1196+ maximum: Some ( 10 ) ,
1197+ sum: Some ( 5000 ) ,
1198+ } ) ,
1199+ ..Default :: default ( )
1200+ } ;
1201+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1202+ } ) ,
1203+ vec![ ] ,
1204+ ) ,
1205+ RowGroupEntry :: new(
1206+ Some ( {
1207+ let proto_stats = proto:: ColumnStatistics {
1208+ number_of_values: Some ( 1000 ) ,
1209+ has_null: Some ( false ) ,
1210+ int_statistics: Some ( proto:: IntegerStatistics {
1211+ minimum: Some ( 20 ) ,
1212+ maximum: Some ( 30 ) ,
1213+ sum: Some ( 25000 ) ,
1214+ } ) ,
1215+ ..Default :: default ( )
1216+ } ;
1217+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1218+ } ) ,
1219+ vec![ ] ,
1220+ ) ,
10271221 ] ;
10281222 columns. insert ( 1 , RowGroupIndex :: new ( entries, 10000 , 1 ) ) ;
1223+ let row_index = StripeRowIndex :: new ( columns, 20000 , 10000 ) ;
1224+ let schema = create_test_schema ( ) ;
1225+
1226+ // Test: Not(age >= 15 AND age <= 25)
1227+ // Equivalent to: age < 15 OR age > 25
1228+ // Row Group 1: [0, 10] -> Fits age < 15 -> Keep
1229+ // Row Group 2: [20, 30] -> Fits age > 25 -> Keep
1230+ let predicate = Predicate :: not ( Predicate :: and ( vec ! [
1231+ Predicate :: gte( "age" , PredicateValue :: Int32 ( Some ( 15 ) ) ) ,
1232+ Predicate :: lte( "age" , PredicateValue :: Int32 ( Some ( 25 ) ) ) ,
1233+ ] ) ) ;
1234+
1235+ let result = super :: evaluate_predicate ( & predicate, & row_index, & schema) . unwrap ( ) ;
1236+
1237+ assert_eq ! ( result. len( ) , 2 ) ;
1238+ assert ! ( result[ 0 ] ) ; // [0, 10] is < 15
1239+ assert ! ( result[ 1 ] ) ; // [20, 30] contains values > 25 (26..30)
1240+ }
1241+
1242+ #[ test]
1243+ fn test_evaluate_predicate_not_or ( ) {
1244+ use crate :: predicate:: { Predicate , PredicateValue } ;
1245+ use crate :: row_index:: { RowGroupEntry , RowGroupIndex } ;
1246+ use std:: collections:: HashMap ;
1247+
1248+ let mut columns = HashMap :: new ( ) ;
1249+ let entries = vec ! [
1250+ // Row group 0: [0, 5]
1251+ // Fits age < 10 fully. Does not overlap [10, 30].
1252+ RowGroupEntry :: new(
1253+ Some ( {
1254+ let proto_stats = proto:: ColumnStatistics {
1255+ number_of_values: Some ( 1000 ) ,
1256+ has_null: Some ( false ) ,
1257+ int_statistics: Some ( proto:: IntegerStatistics {
1258+ minimum: Some ( 0 ) ,
1259+ maximum: Some ( 5 ) ,
1260+ sum: Some ( 2500 ) ,
1261+ } ) ,
1262+ ..Default :: default ( )
1263+ } ;
1264+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1265+ } ) ,
1266+ vec![ ] ,
1267+ ) ,
1268+ RowGroupEntry :: new(
1269+ Some ( {
1270+ let proto_stats = proto:: ColumnStatistics {
1271+ number_of_values: Some ( 1000 ) ,
1272+ has_null: Some ( false ) ,
1273+ int_statistics: Some ( proto:: IntegerStatistics {
1274+ minimum: Some ( 5 ) ,
1275+ maximum: Some ( 15 ) ,
1276+ sum: Some ( 10000 ) ,
1277+ } ) ,
1278+ ..Default :: default ( )
1279+ } ;
1280+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1281+ } ) ,
1282+ vec![ ] ,
1283+ ) ,
1284+ ] ;
1285+ columns. insert ( 1 , RowGroupIndex :: new ( entries, 10000 , 1 ) ) ;
1286+ let row_index = StripeRowIndex :: new ( columns, 20000 , 10000 ) ;
1287+ let schema = create_test_schema ( ) ;
1288+
1289+ // Test: Not(age < 10 OR age > 30)
1290+ // Equivalent to: age >= 10 AND age <= 30
1291+ let predicate = Predicate :: not ( Predicate :: or ( vec ! [
1292+ Predicate :: lt( "age" , PredicateValue :: Int32 ( Some ( 10 ) ) ) ,
1293+ Predicate :: gt( "age" , PredicateValue :: Int32 ( Some ( 30 ) ) ) ,
1294+ ] ) ) ;
1295+ let result = super :: evaluate_predicate ( & predicate, & row_index, & schema) . unwrap ( ) ;
1296+
1297+ assert_eq ! ( result. len( ) , 2 ) ;
1298+ assert ! ( !result[ 0 ] ) ; // [0, 5] is outside [10, 30] -> Skip
1299+ assert ! ( result[ 1 ] ) ; // [5, 15] overlaps [10, 30] -> Keep
1300+ }
1301+
1302+ #[ test]
1303+ fn test_evaluate_predicate_double_negation ( ) {
1304+ use crate :: predicate:: { Predicate , PredicateValue } ;
1305+ use crate :: row_index:: { RowGroupEntry , RowGroupIndex } ;
1306+ use std:: collections:: HashMap ;
1307+
1308+ let mut columns = HashMap :: new ( ) ;
1309+ // Row group: [0, 10]
1310+ let entries = vec ! [ RowGroupEntry :: new(
1311+ Some ( {
1312+ let proto_stats = proto:: ColumnStatistics {
1313+ number_of_values: Some ( 1000 ) ,
1314+ has_null: Some ( false ) ,
1315+ int_statistics: Some ( proto:: IntegerStatistics {
1316+ minimum: Some ( 0 ) ,
1317+ maximum: Some ( 10 ) ,
1318+ sum: Some ( 5000 ) ,
1319+ } ) ,
1320+ ..Default :: default ( )
1321+ } ;
1322+ ColumnStatistics :: try_from( & proto_stats) . unwrap( )
1323+ } ) ,
1324+ vec![ ] ,
1325+ ) ] ;
1326+ columns. insert ( 1 , RowGroupIndex :: new ( entries, 10000 , 1 ) ) ;
10291327 let row_index = StripeRowIndex :: new ( columns, 10000 , 10000 ) ;
10301328 let schema = create_test_schema ( ) ;
10311329
1032- // Test: age > 10
1033- // Should keep row group when statistics are missing (conservative)
1034- let predicate = Predicate :: gt ( "age" , PredicateValue :: Int32 ( Some ( 10 ) ) ) ;
1330+ // Test: Not(Not(age > 5)) -> age > 5
1331+ // Row group [0, 10] contains values > 5 -> Keep
1332+ let predicate = Predicate :: not ( Predicate :: not ( Predicate :: gt (
1333+ "age" ,
1334+ PredicateValue :: Int32 ( Some ( 5 ) ) ,
1335+ ) ) ) ;
10351336 let result = super :: evaluate_predicate ( & predicate, & row_index, & schema) . unwrap ( ) ;
10361337
10371338 assert_eq ! ( result. len( ) , 1 ) ;
1038- assert ! ( result[ 0 ] ) ; // Keep when statistics missing
1339+ assert ! ( result[ 0 ] ) ;
10391340 }
10401341}
0 commit comments