30
30
import org .apache .iceberg .ChangelogOperation ;
31
31
import org .apache .iceberg .ChangelogScanTask ;
32
32
import org .apache .iceberg .DataFile ;
33
+ import org .apache .iceberg .DeleteFile ;
33
34
import org .apache .iceberg .Files ;
34
35
import org .apache .iceberg .IncrementalChangelogScan ;
35
36
import org .apache .iceberg .PartitionSpec ;
46
47
import org .apache .iceberg .relocated .com .google .common .collect .Lists ;
47
48
import org .apache .iceberg .spark .TestBase ;
48
49
import org .apache .iceberg .types .Types ;
50
+ import org .apache .iceberg .util .CharSequenceSet ;
51
+ import org .apache .iceberg .util .Pair ;
49
52
import org .apache .spark .sql .catalyst .InternalRow ;
50
53
import org .junit .jupiter .api .AfterEach ;
51
54
import org .junit .jupiter .api .BeforeEach ;
@@ -60,6 +63,7 @@ public class TestChangelogReader extends TestBase {
60
63
PartitionSpec .builderFor (SCHEMA ).bucket ("data" , 16 ).build ();
61
64
private final List <Record > records1 = Lists .newArrayList ();
62
65
private final List <Record > records2 = Lists .newArrayList ();
66
+ private final List <Record > records3 = Lists .newArrayList ();
63
67
64
68
private Table table ;
65
69
private DataFile dataFile1 ;
@@ -84,6 +88,11 @@ public void before() throws IOException {
84
88
// write data to files
85
89
dataFile1 = writeDataFile (records1 );
86
90
dataFile2 = writeDataFile (records2 );
91
+
92
+ // records to be deleted
93
+ records3 .add (record .copy ("id" , 29 , "data" , "a" ));
94
+ records3 .add (record .copy ("id" , 89 , "data" , "d" ));
95
+ records3 .add (record .copy ("id" , 122 , "data" , "g" ));
87
96
}
88
97
89
98
@ AfterEach
@@ -191,6 +200,153 @@ public void testMixDeleteAndInsert() throws IOException {
191
200
table .newAppend ().appendFile (dataFile2 ).commit ();
192
201
long snapshotId3 = table .currentSnapshot ().snapshotId ();
193
202
203
+ List <InternalRow > rows = getChangelogRows ();
204
+
205
+ List <Object []> expectedRows = Lists .newArrayList ();
206
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId1 , 0 , records1 );
207
+ addExpectedRows (expectedRows , ChangelogOperation .DELETE , snapshotId2 , 1 , records1 );
208
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId3 , 2 , records2 );
209
+
210
+ assertEquals ("Should have expected rows" , expectedRows , internalRowsToJava (rows ));
211
+ }
212
+
213
+ @ Test
214
+ public void testPositionDeletes () throws IOException {
215
+ table .newAppend ().appendFile (dataFile1 ).commit ();
216
+ long snapshotId1 = table .currentSnapshot ().snapshotId ();
217
+
218
+ table .newAppend ().appendFile (dataFile2 ).commit ();
219
+ long snapshotId2 = table .currentSnapshot ().snapshotId ();
220
+
221
+ List <Pair <CharSequence , Long >> deletes =
222
+ Lists .newArrayList (
223
+ Pair .of (dataFile1 .path (), 0L ), // id = 29
224
+ Pair .of (dataFile1 .path (), 3L ), // id = 89
225
+ Pair .of (dataFile2 .path (), 2L ) // id = 122
226
+ );
227
+
228
+ Pair <DeleteFile , CharSequenceSet > posDeletes =
229
+ FileHelpers .writeDeleteFile (
230
+ table ,
231
+ Files .localOutput (File .createTempFile ("junit" , null , temp .toFile ())),
232
+ TestHelpers .Row .of (0 ),
233
+ deletes );
234
+
235
+ table
236
+ .newRowDelta ()
237
+ .addDeletes (posDeletes .first ())
238
+ .validateDataFilesExist (posDeletes .second ())
239
+ .commit ();
240
+ long snapshotId3 = table .currentSnapshot ().snapshotId ();
241
+
242
+ List <InternalRow > rows = getChangelogRows ();
243
+
244
+ List <Object []> expectedRows = Lists .newArrayList ();
245
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId1 , 0 , records1 );
246
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId2 , 1 , records2 );
247
+ addExpectedRows (expectedRows , ChangelogOperation .DELETE , snapshotId3 , 2 , records3 );
248
+
249
+ assertEquals ("Should have expected rows" , expectedRows , internalRowsToJava (rows ));
250
+ }
251
+
252
+ @ Test
253
+ public void testEqualityDeletes () throws IOException {
254
+ table .newAppend ().appendFile (dataFile1 ).commit ();
255
+ long snapshotId1 = table .currentSnapshot ().snapshotId ();
256
+
257
+ table .newAppend ().appendFile (dataFile2 ).commit ();
258
+ long snapshotId2 = table .currentSnapshot ().snapshotId ();
259
+
260
+ Schema deleteRowSchema = table .schema ().select ("data" );
261
+ Record dataDelete = GenericRecord .create (deleteRowSchema );
262
+ List <Record > dataDeletes =
263
+ Lists .newArrayList (
264
+ dataDelete .copy ("data" , "a" ), // id = 29
265
+ dataDelete .copy ("data" , "d" ), // id = 89
266
+ dataDelete .copy ("data" , "g" ) // id = 122
267
+ );
268
+
269
+ DeleteFile eqDeletes =
270
+ FileHelpers .writeDeleteFile (
271
+ table ,
272
+ Files .localOutput (File .createTempFile ("junit" , null , temp .toFile ())),
273
+ TestHelpers .Row .of (0 ),
274
+ dataDeletes ,
275
+ deleteRowSchema );
276
+
277
+ table .newRowDelta ().addDeletes (eqDeletes ).commit ();
278
+ long snapshotId3 = table .currentSnapshot ().snapshotId ();
279
+
280
+ List <InternalRow > rows = getChangelogRows ();
281
+
282
+ List <Object []> expectedRows = Lists .newArrayList ();
283
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId1 , 0 , records1 );
284
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId2 , 1 , records2 );
285
+ addExpectedRows (expectedRows , ChangelogOperation .DELETE , snapshotId3 , 2 , records3 );
286
+
287
+ assertEquals ("Should have expected rows" , expectedRows , internalRowsToJava (rows ));
288
+ }
289
+
290
+ @ Test
291
+ public void testMixOfPositionAndEqualityDeletes () throws IOException {
292
+ table .newAppend ().appendFile (dataFile1 ).commit ();
293
+ long snapshotId1 = table .currentSnapshot ().snapshotId ();
294
+
295
+ table .newAppend ().appendFile (dataFile2 ).commit ();
296
+ long snapshotId2 = table .currentSnapshot ().snapshotId ();
297
+
298
+ List <Pair <CharSequence , Long >> deletes =
299
+ Lists .newArrayList (
300
+ Pair .of (dataFile1 .path (), 0L ), // id = 29
301
+ Pair .of (dataFile1 .path (), 3L ) // id = 89
302
+ );
303
+
304
+ Pair <DeleteFile , CharSequenceSet > posDeletes =
305
+ FileHelpers .writeDeleteFile (
306
+ table ,
307
+ Files .localOutput (File .createTempFile ("junit" , null , temp .toFile ())),
308
+ TestHelpers .Row .of (0 ),
309
+ deletes );
310
+
311
+ Schema deleteRowSchema = table .schema ().select ("data" );
312
+ Record dataDelete = GenericRecord .create (deleteRowSchema );
313
+ List <Record > dataDeletes =
314
+ Lists .newArrayList (
315
+ dataDelete .copy ("data" , "a" ), // id = 29
316
+ dataDelete .copy ("data" , "g" ) // id = 122
317
+ );
318
+
319
+ DeleteFile eqDeletes =
320
+ FileHelpers .writeDeleteFile (
321
+ table ,
322
+ Files .localOutput (File .createTempFile ("junit" , null , temp .toFile ())),
323
+ TestHelpers .Row .of (0 ),
324
+ dataDeletes ,
325
+ deleteRowSchema );
326
+
327
+ table
328
+ .newRowDelta ()
329
+ .addDeletes (eqDeletes )
330
+ .addDeletes (posDeletes .first ())
331
+ .validateDataFilesExist (posDeletes .second ())
332
+ .commit ();
333
+ long snapshotId3 = table .currentSnapshot ().snapshotId ();
334
+
335
+ List <InternalRow > rows = getChangelogRows ();
336
+
337
+ List <Object []> expectedRows = Lists .newArrayList ();
338
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId1 , 0 , records1 );
339
+ addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId2 , 1 , records2 );
340
+ addExpectedRows (expectedRows , ChangelogOperation .DELETE , snapshotId3 , 2 , records3 );
341
+
342
+ assertEquals ("Should have expected rows" , expectedRows , internalRowsToJava (rows ));
343
+ }
344
+
345
+ private IncrementalChangelogScan newScan () {
346
+ return table .newIncrementalChangelogScan ();
347
+ }
348
+
349
+ private List <InternalRow > getChangelogRows () throws IOException {
194
350
CloseableIterable <ScanTaskGroup <ChangelogScanTask >> taskGroups = newScan ().planTasks ();
195
351
196
352
List <InternalRow > rows = Lists .newArrayList ();
@@ -214,16 +370,7 @@ public void testMixDeleteAndInsert() throws IOException {
214
370
}
215
371
});
216
372
217
- List <Object []> expectedRows = Lists .newArrayList ();
218
- addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId1 , 0 , records1 );
219
- addExpectedRows (expectedRows , ChangelogOperation .DELETE , snapshotId2 , 1 , records1 );
220
- addExpectedRows (expectedRows , ChangelogOperation .INSERT , snapshotId3 , 2 , records2 );
221
-
222
- assertEquals ("Should have expected rows" , expectedRows , internalRowsToJava (rows ));
223
- }
224
-
225
- private IncrementalChangelogScan newScan () {
226
- return table .newIncrementalChangelogScan ();
373
+ return rows ;
227
374
}
228
375
229
376
private List <Object []> addExpectedRows (
0 commit comments