Revert "[server] Recover log and index file for unclean shutdown (#1749)" (#2036)

swuferhong · web-flow · commit 53b108bcdc98 · 2025-11-27T13:38:32.000+08:00
This reverts commit d5cb521.
diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/LogLoader.java b/fluss-server/src/main/java/org/apache/fluss/server/log/LogLoader.java
@@ -19,7 +19,6 @@
 
 import org.apache.fluss.config.ConfigOptions;
 import org.apache.fluss.config.Configuration;
-import org.apache.fluss.exception.InvalidOffsetException;
 import org.apache.fluss.exception.LogSegmentOffsetOverflowException;
 import org.apache.fluss.exception.LogStorageException;
 import org.apache.fluss.metadata.LogFormat;
@@ -32,13 +31,8 @@
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
-import java.nio.file.NoSuchFileException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.stream.Collectors;
 
 /* This file is based on source code of Apache Kafka Project (https://kafka.apache.org/), licensed by the Apache
  * Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE file distributed with this work for
@@ -123,37 +117,6 @@ public LoadedLogOffsets load() throws IOException {
                         nextOffset, activeSegment.getBaseOffset(), activeSegment.getSizeInBytes()));
     }
 
-    /**
-     * Just recovers the given segment, without adding it to the provided segments.
-     *
-     * @param segment Segment to recover
-     * @return The number of bytes truncated from the segment
-     * @throws LogSegmentOffsetOverflowException if the segment contains messages that cause index
-     *     offset overflow
-     */
-    private int recoverSegment(LogSegment segment) throws IOException {
-        WriterStateManager writerStateManager =
-                new WriterStateManager(
-                        logSegments.getTableBucket(),
-                        logTabletDir,
-                        this.writerStateManager.writerExpirationMs());
-        // TODO, Here, we use 0 as the logStartOffset passed into rebuildWriterState. The reason is
-        // that the current implementation of logStartOffset in Fluss is not yet fully refined, and
-        // there may be cases where logStartOffset is not updated. As a result, logStartOffset is
-        // not yet reliable. Once the issue with correctly updating logStartOffset is resolved in
-        // issue https://github.com/apache/fluss/issues/744, we can use logStartOffset here.
-        // Additionally, using 0 versus using logStartOffset does not affect correctness—they both
-        // can restore the complete WriterState. The only difference is that using logStartOffset
-        // can potentially skip over more segments.
-        LogTablet.rebuildWriterState(
-                writerStateManager, logSegments, 0, segment.getBaseOffset(), false);
-        int bytesTruncated = segment.recover();
-        // once we have recovered the segment's data, take a snapshot to ensure that we won't
-        // need to reload the same segment again while recovering another segment.
-        writerStateManager.takeSnapshot();
-        return bytesTruncated;
-    }
-
     /**
      * Recover the log segments (if there was an unclean shutdown). Ensures there is at least one
      * active segment, and returns the updated recovery point and next offset after recovery.
@@ -166,106 +129,14 @@ private int recoverSegment(LogSegment segment) throws IOException {
      *     overflow
      */
     private Tuple2<Long, Long> recoverLog() throws IOException {
-        if (!isCleanShutdown) {
-            List<LogSegment> unflushed =
-                    logSegments.values(recoveryPointCheckpoint, Long.MAX_VALUE);
-            int numUnflushed = unflushed.size();
-            Iterator<LogSegment> unflushedIter = unflushed.iterator();
-            boolean truncated = false;
-            int numFlushed = 1;
-
-            while (unflushedIter.hasNext() && !truncated) {
-                LogSegment segment = unflushedIter.next();
-                LOG.info(
-                        "Recovering unflushed segment {}. {}/{} recovered for bucket {}",
-                        segment.getBaseOffset(),
-                        numFlushed,
-                        numUnflushed,
-                        logSegments.getTableBucket());
-
-                int truncatedBytes = -1;
-                try {
-                    truncatedBytes = recoverSegment(segment);
-                } catch (Exception e) {
-                    if (e instanceof InvalidOffsetException) {
-                        long startOffset = segment.getBaseOffset();
-                        LOG.warn(
-                                "Found invalid offset during recovery for bucket {}. Deleting the corrupt segment "
-                                        + "and creating an empty one with starting offset {}",
-                                logSegments.getTableBucket(),
-                                startOffset);
-                        truncatedBytes = segment.truncateTo(startOffset);
-                    } else {
-                        throw e;
-                    }
-                }
-
-                if (truncatedBytes > 0) {
-                    // we had an invalid message, delete all remaining log
-                    LOG.warn(
-                            "Corruption found in segment {} for bucket {}, truncating to offset {}",
-                            segment.getBaseOffset(),
-                            logSegments.getTableBucket(),
-                            segment.readNextOffset());
-                    removeAndDeleteSegments(unflushedIter);
-                    truncated = true;
-                } else {
-                    numFlushed += 1;
-                }
-            }
-        }
-
+        // TODO truncate log to recover maybe unflush segments.
         if (logSegments.isEmpty()) {
-            // TODO: use logStartOffset if issue https://github.com/apache/fluss/issues/744 ready
             logSegments.add(LogSegment.open(logTabletDir, 0L, conf, logFormat));
         }
         long logEndOffset = logSegments.lastSegment().get().readNextOffset();
         return Tuple2.of(recoveryPointCheckpoint, logEndOffset);
     }
 
-    /**
-     * This method deletes the given log segments and the associated writer snapshots.
-     *
-     * <p>This method does not need to convert IOException to {@link LogStorageException} because it
-     * is either called before all logs are loaded or the immediate caller will catch and handle
-     * IOException
-     *
-     * @param segmentsToDelete The log segments to schedule for deletion
-     */
-    private void removeAndDeleteSegments(Iterator<LogSegment> segmentsToDelete) {
-        if (segmentsToDelete.hasNext()) {
-            List<LogSegment> toDelete = new ArrayList<>();
-            segmentsToDelete.forEachRemaining(toDelete::add);
-
-            LOG.info(
-                    "Deleting segments for bucket {} as part of log recovery: {}",
-                    logSegments.getTableBucket(),
-                    toDelete.stream().map(LogSegment::toString).collect(Collectors.joining(",")));
-            toDelete.forEach(segment -> logSegments.remove(segment.getBaseOffset()));
-
-            try {
-                LocalLog.deleteSegmentFiles(
-                        toDelete, LocalLog.SegmentDeletionReason.LOG_TRUNCATION);
-            } catch (IOException e) {
-                LOG.error(
-                        "Failed to delete truncated segments {} for bucket {}",
-                        toDelete,
-                        logSegments.getTableBucket(),
-                        e);
-            }
-
-            try {
-                LogTablet.deleteWriterSnapshots(toDelete, writerStateManager);
-            } catch (IOException e) {
-                LOG.error(
-                        "Failed to delete truncated writer snapshots {} for bucket {}",
-                        toDelete,
-                        logSegments.getTableBucket(),
-                        e);
-            }
-        }
-    }
-
     /** Loads segments from disk into the provided segments. */
     private void loadSegmentFiles() throws IOException {
         File[] sortedFiles = logTabletDir.listFiles();
@@ -285,28 +156,8 @@ private void loadSegmentFiles() throws IOException {
                         }
                     } else if (LocalLog.isLogFile(file)) {
                         long baseOffset = FlussPaths.offsetFromFile(file);
-                        boolean timeIndexFileNewlyCreated =
-                                !FlussPaths.timeIndexFile(logTabletDir, baseOffset).exists();
                         LogSegment segment =
                                 LogSegment.open(logTabletDir, baseOffset, conf, true, 0, logFormat);
-
-                        try {
-                            segment.sanityCheck(timeIndexFileNewlyCreated);
-                        } catch (IOException e) {
-                            if (e instanceof NoSuchFileException) {
-                                if (isCleanShutdown
-                                        || segment.getBaseOffset() < recoveryPointCheckpoint) {
-                                    LOG.error(
-                                            "Could not find offset index file corresponding to log file {} "
-                                                    + "for bucket {}, recovering segment and rebuilding index files...",
-                                            logSegments.getTableBucket(),
-                                            segment.getFileLogRecords().file().getAbsoluteFile());
-                                }
-                                recoverSegment(segment);
-                            } else {
-                                throw e;
-                            }
-                        }
                         logSegments.add(segment);
                     }
                 }
diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/LogSegment.java b/fluss-server/src/main/java/org/apache/fluss/server/log/LogSegment.java
@@ -44,7 +44,6 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.nio.file.NoSuchFileException;
 import java.util.Optional;
 
 import static org.apache.fluss.record.LogRecordBatchFormat.V0_RECORD_BATCH_HEADER_SIZE;
@@ -173,23 +172,6 @@ public void resizeIndexes(int size) throws IOException {
         timeIndex().resize(size);
     }
 
-    public void sanityCheck(boolean timeIndexFileNewlyCreated) throws IOException {
-        if (lazyOffsetIndex.file().exists()) {
-            // Resize the time index file to 0 if it is newly created.
-            if (timeIndexFileNewlyCreated) {
-                timeIndex().resize(0);
-            }
-            // Sanity checks for time index and offset index are skipped because
-            // we will recover the segments above the recovery point in recoverLog()
-            // in any case so sanity checking them here is redundant.
-        } else {
-            throw new NoSuchFileException(
-                    "Offset index file "
-                            + lazyOffsetIndex.file().getAbsolutePath()
-                            + " does not exist.");
-        }
-    }
-
     /**
      * The maximum timestamp we see so far.
      *
@@ -302,7 +284,7 @@ public boolean deleted() {
      * Run recovery on the given segment. This will rebuild the index from the log file and lop off
      * any invalid bytes from the end of the log and index.
      */
-    public int recover() throws IOException {
+    public int recover() throws Exception {
         offsetIndex().reset();
         timeIndex().reset();
         int validBytes = 0;
diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/LogTablet.java b/fluss-server/src/main/java/org/apache/fluss/server/log/LogTablet.java
@@ -1283,7 +1283,7 @@ private static void loadWritersFromRecords(
         loadedWriters.values().forEach(writerStateManager::update);
     }
 
-    public static void deleteWriterSnapshots(
+    private static void deleteWriterSnapshots(
             List<LogSegment> segments, WriterStateManager writerStateManager) throws IOException {
         for (LogSegment segment : segments) {
             writerStateManager.removeAndDeleteSnapshot(segment.getBaseOffset());
diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/WriterStateManager.java b/fluss-server/src/main/java/org/apache/fluss/server/log/WriterStateManager.java
@@ -99,10 +99,6 @@ public WriterStateManager(TableBucket tableBucket, File logTabletDir, int writer
         this.snapshots = loadSnapshots();
     }
 
-    public int writerExpirationMs() {
-        return writerExpirationMs;
-    }
-
     public int writerIdCount() {
         return writerIdCount;
     }
diff --git a/fluss-server/src/test/java/org/apache/fluss/server/log/LogLoaderTest.java b/fluss-server/src/test/java/org/apache/fluss/server/log/LogLoaderTest.java

Original file line number	Diff line number	Diff line change
`@@ -1283,7 +1283,7 @@ private static void loadWritersFromRecords(`
`1283`	`1283`	`loadedWriters.values().forEach(writerStateManager::update);`
`1284`	`1284`	`}`
`1285`	`1285`
`1286`		`- public static void deleteWriterSnapshots(`
	`1286`	`+ private static void deleteWriterSnapshots(`
`1287`	`1287`	`List<LogSegment> segments, WriterStateManager writerStateManager) throws IOException {`
`1288`	`1288`	`for (LogSegment segment : segments) {`
`1289`	`1289`	`writerStateManager.removeAndDeleteSnapshot(segment.getBaseOffset());`
Original file line number	Diff line number	Diff line change
`@@ -99,10 +99,6 @@ public WriterStateManager(TableBucket tableBucket, File logTabletDir, int writer`
`99`	`99`	`this.snapshots = loadSnapshots();`
`100`	`100`	`}`
`101`	`101`
`102`		`- public int writerExpirationMs() {`
`103`		`- return writerExpirationMs;`
`104`		`- }`
`105`		`-`
`106`	`102`	`public int writerIdCount() {`
`107`	`103`	`return writerIdCount;`
`108`	`104`	`}`