@@ -9,23 +9,24 @@ use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionSc
9
9
use delta_kernel:: actions:: { get_log_schema, REMOVE_NAME } ;
10
10
use delta_kernel:: actions:: { Metadata , Protocol , SetTransaction } ;
11
11
use delta_kernel:: engine:: arrow_data:: ArrowEngineData ;
12
+ use delta_kernel:: engine:: arrow_expression:: evaluate_expression;
12
13
use delta_kernel:: engine:: default:: executor:: tokio:: {
13
14
TokioBackgroundExecutor , TokioMultiThreadExecutor ,
14
15
} ;
15
16
use delta_kernel:: engine:: default:: DefaultEngine ;
16
17
use delta_kernel:: log_segment:: LogSegment ;
17
- use delta_kernel:: schema:: Schema ;
18
+ use delta_kernel:: schema:: { DataType , Schema } ;
18
19
use delta_kernel:: snapshot:: Snapshot as SnapshotInner ;
19
20
use delta_kernel:: table_properties:: TableProperties ;
20
- use delta_kernel:: { Engine , Expression , ExpressionRef , Table , Version } ;
21
+ use delta_kernel:: { Engine , Expression , ExpressionHandler , ExpressionRef , Table , Version } ;
21
22
use itertools:: Itertools ;
22
23
use object_store:: path:: Path ;
23
24
use object_store:: ObjectStore ;
24
25
use url:: Url ;
25
26
26
27
use super :: cache:: CommitCacheObjectStore ;
27
28
use super :: { replay_file_actions, Snapshot } ;
28
- use crate :: kernel:: { Action , CommitInfo } ;
29
+ use crate :: kernel:: { Action , CommitInfo , ARROW_HANDLER } ;
29
30
use crate :: { DeltaResult , DeltaTableError } ;
30
31
31
32
// TODO: avoid repetitive parsing of json stats
@@ -94,11 +95,8 @@ impl Snapshot for LazySnapshot {
94
95
}
95
96
96
97
fn tombstones ( & self ) -> DeltaResult < Box < dyn Iterator < Item = DeltaResult < RecordBatch > > > > {
97
- static META_PREDICATE : LazyLock < Option < ExpressionRef > > = LazyLock :: new ( || {
98
- Some ( Arc :: new (
99
- Expression :: column ( [ REMOVE_NAME , "path" ] ) . is_not_null ( ) ,
100
- ) )
101
- } ) ;
98
+ static META_PREDICATE : LazyLock < ExpressionRef > =
99
+ LazyLock :: new ( || Arc :: new ( Expression :: column ( [ REMOVE_NAME , "path" ] ) . is_not_null ( ) ) ) ;
102
100
let read_schema = get_log_schema ( ) . project ( & [ REMOVE_NAME ] ) ?;
103
101
Ok ( Box :: new (
104
102
self . inner
@@ -107,9 +105,23 @@ impl Snapshot for LazySnapshot {
107
105
self . engine . as_ref ( ) ,
108
106
read_schema. clone ( ) ,
109
107
read_schema,
110
- META_PREDICATE . clone ( ) ,
108
+ Some ( META_PREDICATE . clone ( ) ) ,
111
109
) ?
112
- . map_ok ( |( d, _) | Ok ( RecordBatch :: from ( ArrowEngineData :: try_from_engine_data ( d) ?) ) )
110
+ . map_ok ( |( d, _) | {
111
+ let batch = RecordBatch :: from ( ArrowEngineData :: try_from_engine_data ( d) ?) ;
112
+ let selection = evaluate_expression (
113
+ META_PREDICATE . as_ref ( ) ,
114
+ & batch,
115
+ Some ( & DataType :: BOOLEAN ) ,
116
+ ) ?;
117
+ let filter = selection
118
+ . as_any ( )
119
+ . downcast_ref :: < BooleanArray > ( )
120
+ . ok_or_else ( || {
121
+ DeltaTableError :: generic ( "failed to downcast to BooleanArray" )
122
+ } ) ?;
123
+ Ok ( filter_record_batch ( & batch, filter) ?)
124
+ } )
113
125
. flatten ( ) ,
114
126
) )
115
127
}
@@ -247,37 +259,46 @@ impl LazySnapshot {
247
259
248
260
#[ cfg( test) ]
249
261
mod tests {
250
- use deltalake_test:: acceptance:: { read_dat_case, TestCaseInfo } ;
262
+ use delta_kernel:: schema:: StructType ;
263
+ use deltalake_test:: utils:: * ;
251
264
use deltalake_test:: TestResult ;
252
265
253
- use super :: super :: tests:: get_dat_dir;
254
266
use super :: * ;
255
267
256
268
async fn load_snapshot ( ) -> TestResult < ( ) > {
257
- // some comment
258
- let mut dat_dir = get_dat_dir ( ) ;
259
- dat_dir. push ( "multi_partitioned" ) ;
260
-
261
- let dat_info: TestCaseInfo = read_dat_case ( dat_dir) ?;
262
- let table_info = dat_info. table_summary ( ) ?;
263
-
264
- let table = Table :: try_from_uri ( dat_info. table_root ( ) ?) ?;
265
-
266
- let snapshot = LazySnapshot :: try_new (
267
- table,
268
- Arc :: new ( object_store:: local:: LocalFileSystem :: default ( ) ) ,
269
- None ,
270
- )
271
- . await ?;
272
-
273
- assert_eq ! ( snapshot. version( ) , table_info. version) ;
274
- assert_eq ! (
275
- (
276
- snapshot. protocol( ) . min_reader_version( ) ,
277
- snapshot. protocol( ) . min_writer_version( )
278
- ) ,
279
- ( table_info. min_reader_version, table_info. min_writer_version)
280
- ) ;
269
+ let ctx = IntegrationContext :: new ( Box :: < LocalStorageIntegration > :: default ( ) ) ?;
270
+ ctx. load_table ( TestTables :: Simple ) . await ?;
271
+
272
+ let store = ctx
273
+ . table_builder ( TestTables :: Simple )
274
+ . build_storage ( ) ?
275
+ . object_store ( None ) ;
276
+ let table = Table :: try_from_uri ( "memory:///" ) ?;
277
+ let snapshot = LazySnapshot :: try_new ( table, store, None ) . await ?;
278
+
279
+ let schema_string = r#"{"type":"struct","fields":[{"name":"id","type":"long","nullable":true,"metadata":{}}]}"# ;
280
+ let expected: StructType = serde_json:: from_str ( schema_string) ?;
281
+ assert_eq ! ( snapshot. schema( ) , & expected) ;
282
+
283
+ let infos = snapshot. commit_infos ( None , None ) ?. collect_vec ( ) ;
284
+ assert_eq ! ( infos. len( ) , 5 ) ;
285
+
286
+ let tombstones: Vec < _ > = snapshot. tombstones ( ) ?. try_collect ( ) ?;
287
+ let num_tombstones = tombstones. iter ( ) . map ( |b| b. num_rows ( ) as i64 ) . sum :: < i64 > ( ) ;
288
+ assert_eq ! ( num_tombstones, 31 ) ;
289
+
290
+ let expected = vec ! [
291
+ "part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet" ,
292
+ "part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet" ,
293
+ "part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet" ,
294
+ "part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet" ,
295
+ "part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet" ,
296
+ ] ;
297
+ let file_names: Vec < _ > = snapshot
298
+ . logical_files_view ( None ) ?
299
+ . map_ok ( |f| f. path ( ) . to_owned ( ) )
300
+ . try_collect ( ) ?;
301
+ assert_eq ! ( file_names, expected) ;
281
302
282
303
Ok ( ( ) )
283
304
}
0 commit comments