Skip to content

Commit 26ac4b1

Browse files
authored
[core] Fix that snapshot expire might delete files used by tag mistakenly (apache#5237)
1 parent 2355445 commit 26ac4b1

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

paimon-core/src/main/java/org/apache/paimon/operation/FileDeletionBase.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,11 @@ public Predicate<ExpireFileEntry> createDataFileSkipperForTags(
340340
if (index >= 0) {
341341
Snapshot previousTag = taggedSnapshots.get(index);
342342
if (previousTag.id() != cachedTag) {
343-
cachedTag = previousTag.id();
343+
cachedTag = 0;
344344
cachedTagDataFiles.clear();
345345
addMergedDataFiles(cachedTagDataFiles, previousTag);
346+
// update cachedTag after read tag successfully
347+
cachedTag = previousTag.id();
346348
}
347349
return entry -> containsDataFile(cachedTagDataFiles, entry);
348350
}
@@ -359,7 +361,7 @@ protected List<ManifestFileMeta> tryReadManifestList(String manifestListName) {
359361
try {
360362
return manifestList.read(manifestListName);
361363
} catch (Exception e) {
362-
LOG.warn("Failed to read manifest list file " + manifestListName, e);
364+
LOG.warn("Failed to read manifest list file {}", manifestListName, e);
363365
return Collections.emptyList();
364366
}
365367
}
@@ -371,22 +373,17 @@ protected List<ManifestFileMeta> tryReadManifestList(String manifestListName) {
371373
protected void addMergedDataFiles(
372374
Map<BinaryRow, Map<Integer, Set<String>>> dataFiles, Snapshot snapshot)
373375
throws IOException {
374-
for (ExpireFileEntry entry : readMergedDataFiles(snapshot)) {
376+
for (ExpireFileEntry entry :
377+
readMergedDataFiles(manifestList.readDataManifests(snapshot))) {
375378
dataFiles
376379
.computeIfAbsent(entry.partition(), p -> new HashMap<>())
377380
.computeIfAbsent(entry.bucket(), b -> new HashSet<>())
378381
.add(entry.fileName());
379382
}
380383
}
381384

382-
protected Collection<ExpireFileEntry> readMergedDataFiles(Snapshot snapshot)
385+
protected Collection<ExpireFileEntry> readMergedDataFiles(List<ManifestFileMeta> manifests)
383386
throws IOException {
384-
// read data manifests
385-
386-
List<ManifestFileMeta> manifests = tryReadManifestList(snapshot.baseManifestList());
387-
manifests.addAll(tryReadManifestList(snapshot.deltaManifestList()));
388-
389-
// read and merge manifest entries
390387
Map<Identifier, ExpireFileEntry> map = new HashMap<>();
391388
for (ManifestFileMeta manifest : manifests) {
392389
List<ExpireFileEntry> entries =

paimon-core/src/main/java/org/apache/paimon/operation/TagDeletion.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.paimon.io.DataFilePathFactory;
2727
import org.apache.paimon.manifest.ExpireFileEntry;
2828
import org.apache.paimon.manifest.ManifestFile;
29+
import org.apache.paimon.manifest.ManifestFileMeta;
2930
import org.apache.paimon.manifest.ManifestList;
3031
import org.apache.paimon.stats.StatsFileHandler;
3132
import org.apache.paimon.utils.DataFilePathFactories;
@@ -73,7 +74,10 @@ public TagDeletion(
7374
public void cleanUnusedDataFiles(Snapshot taggedSnapshot, Predicate<ExpireFileEntry> skipper) {
7475
Collection<ExpireFileEntry> manifestEntries;
7576
try {
76-
manifestEntries = readMergedDataFiles(taggedSnapshot);
77+
List<ManifestFileMeta> manifests =
78+
tryReadManifestList(taggedSnapshot.baseManifestList());
79+
manifests.addAll(tryReadManifestList(taggedSnapshot.deltaManifestList()));
80+
manifestEntries = readMergedDataFiles(manifests);
7781
} catch (IOException e) {
7882
LOG.info("Skip data file clean for the tag of id {}.", taggedSnapshot.id(), e);
7983
return;

0 commit comments

Comments
 (0)