|
29 | 29 | import java.io.File;
|
30 | 30 | import java.util.Arrays;
|
31 | 31 | import java.util.Collections;
|
| 32 | +import java.util.Comparator; |
32 | 33 | import java.util.List;
|
| 34 | +import java.util.stream.Collectors; |
33 | 35 | import org.apache.iceberg.exceptions.CommitFailedException;
|
34 | 36 | import org.apache.iceberg.exceptions.ValidationException;
|
35 | 37 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
|
@@ -384,6 +386,116 @@ public void testRewriteDataAndAssignOldSequenceNumber() {
|
384 | 386 | assertThat(listManifestFiles()).hasSize(4);
|
385 | 387 | }
|
386 | 388 |
|
| 389 | + @TestTemplate |
| 390 | + public void testRewriteDataAndAssignOldSequenceNumbersShouldNotDropDeleteFiles() { |
| 391 | + assumeThat(formatVersion) |
| 392 | + .as("Sequence number is only supported in iceberg format v2 or later") |
| 393 | + .isGreaterThan(1); |
| 394 | + assertThat(listManifestFiles()).isEmpty(); |
| 395 | + |
| 396 | + commit(table, table.newRowDelta().addRows(FILE_A).addDeletes(FILE_A2_DELETES), branch); |
| 397 | + |
| 398 | + long firstRewriteSequenceNumber = latestSnapshot(table, branch).sequenceNumber(); |
| 399 | + |
| 400 | + commit( |
| 401 | + table, |
| 402 | + table.newRowDelta().addRows(FILE_B).addRows(FILE_B).addDeletes(FILE_B2_DELETES), |
| 403 | + branch); |
| 404 | + commit( |
| 405 | + table, |
| 406 | + table.newRowDelta().addRows(FILE_B).addRows(FILE_C).addDeletes(FILE_C2_DELETES), |
| 407 | + branch); |
| 408 | + |
| 409 | + long secondRewriteSequenceNumber = latestSnapshot(table, branch).sequenceNumber(); |
| 410 | + |
| 411 | + commit( |
| 412 | + table, |
| 413 | + table |
| 414 | + .newRewrite() |
| 415 | + .addFile(FILE_D) |
| 416 | + .deleteFile(FILE_B) |
| 417 | + .deleteFile(FILE_C) |
| 418 | + .dataSequenceNumber(secondRewriteSequenceNumber), |
| 419 | + branch); |
| 420 | + |
| 421 | + TableMetadata base = readMetadata(); |
| 422 | + Snapshot baseSnap = latestSnapshot(base, branch); |
| 423 | + long baseSnapshotId = baseSnap.snapshotId(); |
| 424 | + |
| 425 | + Comparator<ManifestFile> sequenceNumberOrdering = |
| 426 | + new Comparator<>() { |
| 427 | + @Override |
| 428 | + public int compare(ManifestFile o1, ManifestFile o2) { |
| 429 | + return (int) (o1.sequenceNumber() - o2.sequenceNumber()); |
| 430 | + } |
| 431 | + }; |
| 432 | + |
| 433 | + // FILE_B2_DELETES and FILE_A2_DELETES should not be removed as the rewrite specifies |
| 434 | + // `firstRewriteSequenceNumber` |
| 435 | + // explicitly which is the same as that of A2_DELETES and before B2_DELETES |
| 436 | + |
| 437 | + // Technically A1_DELETES could be removed since it's an equality delete and should apply on |
| 438 | + // data sequences strictly |
| 439 | + // smaller, so it's no longer needed. However, MergingSnapshotProducer calls |
| 440 | + // dropDeleteFilesOlderThan |
| 441 | + // which doesn't consider if the file is an equality delete, if that API is changed the equality |
| 442 | + // delete file could be dropped sooner |
| 443 | + Snapshot pending = |
| 444 | + apply( |
| 445 | + table |
| 446 | + .newRewrite() |
| 447 | + .addFile(FILE_A2) |
| 448 | + .deleteFile(FILE_A) |
| 449 | + .dataSequenceNumber(firstRewriteSequenceNumber), |
| 450 | + branch); |
| 451 | + |
| 452 | + assertThat(pending.allManifests(table.io())).hasSize(6); |
| 453 | + |
| 454 | + long pendingId = pending.snapshotId(); |
| 455 | + List<ManifestFile> manifestFiles = |
| 456 | + pending.allManifests(table.io()).stream() |
| 457 | + .sorted(sequenceNumberOrdering.reversed()) |
| 458 | + .collect(Collectors.toList()); |
| 459 | + ManifestFile newManifest = manifestFiles.get(0); |
| 460 | + validateManifestEntries(newManifest, ids(pendingId), files(FILE_A2), statuses(ADDED)); |
| 461 | + |
| 462 | + assertThat(ManifestFiles.read(newManifest, FILE_IO).entries()) |
| 463 | + .allSatisfy( |
| 464 | + entry -> assertThat(entry.dataSequenceNumber()).isEqualTo(firstRewriteSequenceNumber)); |
| 465 | + assertThat(newManifest.sequenceNumber()).isEqualTo(secondRewriteSequenceNumber + 2); |
| 466 | + |
| 467 | + validateManifestEntries(manifestFiles.get(1), ids(pendingId), files(FILE_A), statuses(DELETED)); |
| 468 | + |
| 469 | + validateManifestEntries( |
| 470 | + manifestFiles.get(2), ids(baseSnapshotId), files(FILE_D), statuses(ADDED)); |
| 471 | + |
| 472 | + validateDeleteManifest( |
| 473 | + manifestFiles.get(3), |
| 474 | + dataSeqs(3L), |
| 475 | + fileSeqs(3L), |
| 476 | + ids(baseSnapshotId - 1), |
| 477 | + files(FILE_C2_DELETES), |
| 478 | + statuses(ADDED)); |
| 479 | + |
| 480 | + validateDeleteManifest( |
| 481 | + manifestFiles.get(4), |
| 482 | + dataSeqs(2L), |
| 483 | + fileSeqs(2L), |
| 484 | + ids(baseSnapshotId - 2), |
| 485 | + files(FILE_B2_DELETES), |
| 486 | + statuses(ADDED)); |
| 487 | + |
| 488 | + validateDeleteManifest( |
| 489 | + manifestFiles.get(5), |
| 490 | + dataSeqs(1L), |
| 491 | + fileSeqs(1L), |
| 492 | + ids(baseSnapshotId - 3), |
| 493 | + files(FILE_A2_DELETES), |
| 494 | + statuses(ADDED)); |
| 495 | + |
| 496 | + assertThat(listManifestFiles()).hasSize(11); |
| 497 | + } |
| 498 | + |
387 | 499 | @TestTemplate
|
388 | 500 | public void testFailure() {
|
389 | 501 | commit(table, table.newAppend().appendFile(FILE_A), branch);
|
|
0 commit comments