|
30 | 30 | import org.apache.paimon.io.DataFilePathFactory; |
31 | 31 | import org.apache.paimon.manifest.FileEntry; |
32 | 32 | import org.apache.paimon.manifest.FileKind; |
33 | | -import org.apache.paimon.manifest.FileSource; |
34 | 33 | import org.apache.paimon.manifest.IndexManifestEntry; |
35 | 34 | import org.apache.paimon.manifest.IndexManifestFile; |
36 | 35 | import org.apache.paimon.manifest.ManifestCommittable; |
|
50 | 49 | import org.apache.paimon.operation.commit.ConflictDetection.ConflictCheck; |
51 | 50 | import org.apache.paimon.operation.commit.ManifestEntryChanges; |
52 | 51 | import org.apache.paimon.operation.commit.RetryCommitResult; |
| 52 | +import org.apache.paimon.operation.commit.RowTrackingCommitUtils.RowTrackingAssigned; |
53 | 53 | import org.apache.paimon.operation.commit.SuccessCommitResult; |
54 | 54 | import org.apache.paimon.operation.metrics.CommitMetrics; |
55 | 55 | import org.apache.paimon.operation.metrics.CommitStats; |
|
62 | 62 | import org.apache.paimon.stats.Statistics; |
63 | 63 | import org.apache.paimon.stats.StatsFileHandler; |
64 | 64 | import org.apache.paimon.table.BucketMode; |
65 | | -import org.apache.paimon.table.SpecialFields; |
66 | 65 | import org.apache.paimon.table.sink.CommitCallback; |
67 | 66 | import org.apache.paimon.table.sink.CommitMessage; |
68 | 67 | import org.apache.paimon.table.sink.CommitMessageImpl; |
|
95 | 94 |
|
96 | 95 | import static java.util.Collections.emptyList; |
97 | 96 | import static org.apache.paimon.deletionvectors.DeletionVectorsIndexFile.DELETION_VECTORS_INDEX; |
98 | | -import static org.apache.paimon.format.blob.BlobFileFormat.isBlobFile; |
99 | 97 | import static org.apache.paimon.manifest.ManifestEntry.nullableRecordCount; |
100 | 98 | import static org.apache.paimon.manifest.ManifestEntry.recordCountAdd; |
101 | 99 | import static org.apache.paimon.manifest.ManifestEntry.recordCountDelete; |
102 | 100 | import static org.apache.paimon.operation.commit.ConflictDetection.hasConflictChecked; |
103 | 101 | import static org.apache.paimon.operation.commit.ConflictDetection.mustConflictCheck; |
104 | 102 | import static org.apache.paimon.operation.commit.ConflictDetection.noConflictCheck; |
105 | 103 | import static org.apache.paimon.operation.commit.ManifestEntryChanges.changedPartitions; |
| 104 | +import static org.apache.paimon.operation.commit.RowTrackingCommitUtils.assignRowTracking; |
106 | 105 | import static org.apache.paimon.partition.PartitionPredicate.createBinaryPartitions; |
107 | 106 | import static org.apache.paimon.partition.PartitionPredicate.createPartitionPredicate; |
108 | 107 | import static org.apache.paimon.utils.Preconditions.checkArgument; |
@@ -971,14 +970,10 @@ CommitResult tryCommitOnce( |
971 | 970 | baseManifestList = manifestList.write(mergeAfterManifests); |
972 | 971 |
|
973 | 972 | if (rowTrackingEnabled) { |
974 | | - // assigned snapshot id to delta files |
975 | | - List<ManifestEntry> snapshotAssigned = new ArrayList<>(); |
976 | | - assignSnapshotId(newSnapshotId, deltaFiles, snapshotAssigned); |
977 | | - // assign row id for new files |
978 | | - List<ManifestEntry> rowIdAssigned = new ArrayList<>(); |
979 | | - nextRowIdStart = |
980 | | - assignRowTrackingMeta(firstRowIdStart, snapshotAssigned, rowIdAssigned); |
981 | | - deltaFiles = rowIdAssigned; |
| 973 | + RowTrackingAssigned assigned = |
| 974 | + assignRowTracking(newSnapshotId, firstRowIdStart, deltaFiles); |
| 975 | + nextRowIdStart = assigned.nextRowIdStart; |
| 976 | + deltaFiles = assigned.assignedEntries; |
982 | 977 | } |
983 | 978 |
|
984 | 979 | // the added records subtract the deleted records from |
@@ -1132,57 +1127,6 @@ public boolean replaceManifestList( |
1132 | 1127 | return commitSnapshotImpl(newSnapshot, emptyList()); |
1133 | 1128 | } |
1134 | 1129 |
|
1135 | | - private long assignRowTrackingMeta( |
1136 | | - long firstRowIdStart, |
1137 | | - List<ManifestEntry> deltaFiles, |
1138 | | - List<ManifestEntry> rowIdAssigned) { |
1139 | | - if (deltaFiles.isEmpty()) { |
1140 | | - return firstRowIdStart; |
1141 | | - } |
1142 | | - // assign row id for new files |
1143 | | - long start = firstRowIdStart; |
1144 | | - long blobStart = firstRowIdStart; |
1145 | | - for (ManifestEntry entry : deltaFiles) { |
1146 | | - checkArgument( |
1147 | | - entry.file().fileSource().isPresent(), |
1148 | | - "This is a bug, file source field for row-tracking table must present."); |
1149 | | - boolean containsRowId = |
1150 | | - entry.file().writeCols() != null |
1151 | | - && entry.file().writeCols().contains(SpecialFields.ROW_ID.name()); |
1152 | | - if (entry.file().fileSource().get().equals(FileSource.APPEND) |
1153 | | - && entry.file().firstRowId() == null |
1154 | | - && !containsRowId) { |
1155 | | - if (isBlobFile(entry.file().fileName())) { |
1156 | | - if (blobStart >= start) { |
1157 | | - throw new IllegalStateException( |
1158 | | - String.format( |
1159 | | - "This is a bug, blobStart %d should be less than start %d when assigning a blob entry file.", |
1160 | | - blobStart, start)); |
1161 | | - } |
1162 | | - long rowCount = entry.file().rowCount(); |
1163 | | - rowIdAssigned.add(entry.assignFirstRowId(blobStart)); |
1164 | | - blobStart += rowCount; |
1165 | | - } else { |
1166 | | - long rowCount = entry.file().rowCount(); |
1167 | | - rowIdAssigned.add(entry.assignFirstRowId(start)); |
1168 | | - blobStart = start; |
1169 | | - start += rowCount; |
1170 | | - } |
1171 | | - } else { |
1172 | | - // for compact file, do not assign first row id. |
1173 | | - rowIdAssigned.add(entry); |
1174 | | - } |
1175 | | - } |
1176 | | - return start; |
1177 | | - } |
1178 | | - |
1179 | | - private void assignSnapshotId( |
1180 | | - long snapshotId, List<ManifestEntry> deltaFiles, List<ManifestEntry> snapshotAssigned) { |
1181 | | - for (ManifestEntry entry : deltaFiles) { |
1182 | | - snapshotAssigned.add(entry.assignSequenceNumber(snapshotId, snapshotId)); |
1183 | | - } |
1184 | | - } |
1185 | | - |
1186 | 1130 | public void compactManifest() { |
1187 | 1131 | int retryCount = 0; |
1188 | 1132 | long startMillis = System.currentTimeMillis(); |
|
0 commit comments