|
29 | 29 | import java.io.File;
|
30 | 30 | import java.util.Arrays;
|
31 | 31 | import java.util.Collections;
|
| 32 | +import java.util.Comparator; |
32 | 33 | import java.util.List;
|
| 34 | +import java.util.stream.Collectors; |
33 | 35 | import org.apache.iceberg.exceptions.CommitFailedException;
|
34 | 36 | import org.apache.iceberg.exceptions.ValidationException;
|
35 | 37 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
|
@@ -387,6 +389,83 @@ public void testRewriteDataAndAssignOldSequenceNumber() {
|
387 | 389 | assertThat(listManifestFiles()).hasSize(4);
|
388 | 390 | }
|
389 | 391 |
|
| 392 | + @TestTemplate |
| 393 | + public void testRewriteDataAndAssignOldSequenceNumbersShouldNotDropDeleteFiles() { |
| 394 | + assumeThat(formatVersion) |
| 395 | + .as("Sequence number is only supported in iceberg format v2 or later") |
| 396 | + .isGreaterThan(1); |
| 397 | + assertThat(listManifestFiles()).isEmpty(); |
| 398 | + |
| 399 | + commit(table, table.newRowDelta().addRows(FILE_A).addDeletes(FILE_A2_DELETES), branch); |
| 400 | + |
| 401 | + long firstCommitSequenceNumber = latestSnapshot(table, branch).sequenceNumber(); |
| 402 | + |
| 403 | + commit(table, table.newRowDelta().addRows(FILE_B).addDeletes(FILE_B_EQ_DELETES), branch); |
| 404 | + |
| 405 | + long secondCommitSequenceNumber = latestSnapshot(table, branch).sequenceNumber(); |
| 406 | + |
| 407 | + TableMetadata base = readMetadata(); |
| 408 | + Snapshot baseSnap = latestSnapshot(base, branch); |
| 409 | + long baseSnapshotId = baseSnap.snapshotId(); |
| 410 | + |
| 411 | + // FILE_B_EQ_DELETES and FILE_A2_DELETES should not be removed as the rewrite specifies |
| 412 | + // `firstRewriteSequenceNumber` explicitly which is the same as that of FILE_A2_DELETES and before |
| 413 | + // FILE_B_EQ_DELETES |
| 414 | + |
| 415 | + // Technically FILE_A2_DELETES could be removed since it's an equality delete and should apply |
| 416 | + // on data sequences strictly smaller, so it's no longer needed. |
| 417 | + // However, MergingSnapshotProducer calls dropDeleteFilesOlderThan which doesn't consider if the |
| 418 | + // file is an equality delete, if that API is changed the equality delete file could be dropped |
| 419 | + // sooner |
| 420 | + Snapshot pending = |
| 421 | + apply( |
| 422 | + table |
| 423 | + .newRewrite() |
| 424 | + .addFile(FILE_A2) |
| 425 | + .deleteFile(FILE_A) |
| 426 | + .dataSequenceNumber(firstCommitSequenceNumber), |
| 427 | + branch); |
| 428 | + |
| 429 | + assertThat(pending.allManifests(table.io())).hasSize(5); |
| 430 | + |
| 431 | + long pendingId = pending.snapshotId(); |
| 432 | + List<ManifestFile> manifestFiles = |
| 433 | + pending.allManifests(table.io()).stream() |
| 434 | + .sorted(Comparator.comparingLong(ManifestFile::sequenceNumber).reversed()) |
| 435 | + .collect(Collectors.toList()); |
| 436 | + |
| 437 | + validateManifest( |
| 438 | + manifestFiles.get(0), |
| 439 | + dataSeqs(1L), |
| 440 | + fileSeqs(secondCommitSequenceNumber + 1), |
| 441 | + ids(pendingId), |
| 442 | + files(FILE_A2), |
| 443 | + statuses(ADDED)); |
| 444 | + |
| 445 | + validateManifestEntries(manifestFiles.get(1), ids(pendingId), files(FILE_A), statuses(DELETED)); |
| 446 | + |
| 447 | + validateManifestEntries( |
| 448 | + manifestFiles.get(2), ids(baseSnapshotId), files(FILE_B), statuses(ADDED)); |
| 449 | + |
| 450 | + validateDeleteManifest( |
| 451 | + manifestFiles.get(3), |
| 452 | + dataSeqs(2L), |
| 453 | + fileSeqs(2L), |
| 454 | + ids(baseSnapshotId), |
| 455 | + files(FILE_B_EQ_DELETES), |
| 456 | + statuses(ADDED)); |
| 457 | + |
| 458 | + validateDeleteManifest( |
| 459 | + manifestFiles.get(4), |
| 460 | + dataSeqs(1L), |
| 461 | + fileSeqs(1L), |
| 462 | + ids(baseSnapshotId - 1), |
| 463 | + files(FILE_A2_DELETES), |
| 464 | + statuses(ADDED)); |
| 465 | + |
| 466 | + assertThat(listManifestFiles()).hasSize(6); |
| 467 | + } |
| 468 | + |
390 | 469 | @TestTemplate
|
391 | 470 | public void testFailure() {
|
392 | 471 | commit(table, table.newAppend().appendFile(FILE_A), branch);
|
|
0 commit comments