Skip to content

Commit 05ab5e8

Browse files
[server] Handle uncaught exceptions in TabletServer during log recovery from residual data corresponding to already dropped tables (#1487)
1 parent 7f3b266 commit 05ab5e8

File tree

3 files changed

+423
-19
lines changed

3 files changed

+423
-19
lines changed

fluss-server/src/main/java/org/apache/fluss/server/TabletManagerBase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.fluss.config.Configuration;
2121
import org.apache.fluss.exception.KvStorageException;
2222
import org.apache.fluss.exception.LogStorageException;
23+
import org.apache.fluss.exception.SchemaNotExistException;
2324
import org.apache.fluss.metadata.PhysicalTablePath;
2425
import org.apache.fluss.metadata.SchemaInfo;
2526
import org.apache.fluss.metadata.TableBucket;
@@ -203,7 +204,7 @@ public static TableInfo getTableInfo(ZooKeeperClient zkClient, TablePath tablePa
203204
Optional<SchemaInfo> schemaInfoOpt = zkClient.getSchemaById(tablePath, schemaId);
204205
SchemaInfo schemaInfo;
205206
if (!schemaInfoOpt.isPresent()) {
206-
throw new LogStorageException(
207+
throw new SchemaNotExistException(
207208
String.format(
208209
"Failed to load table '%s': Table schema not found in zookeeper metadata.",
209210
tablePath));

fluss-server/src/main/java/org/apache/fluss/server/log/LogManager.java

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.fluss.config.Configuration;
2323
import org.apache.fluss.exception.FlussRuntimeException;
2424
import org.apache.fluss.exception.LogStorageException;
25+
import org.apache.fluss.exception.SchemaNotExistException;
2526
import org.apache.fluss.metadata.LogFormat;
2627
import org.apache.fluss.metadata.PhysicalTablePath;
2728
import org.apache.fluss.metadata.TableBucket;
@@ -189,24 +190,8 @@ private void loadLogs() {
189190
final boolean cleanShutdown = isCleanShutdown;
190191
// set runnable job.
191192
Runnable[] jobsForDir =
192-
tabletsToLoad.stream()
193-
.map(
194-
tabletDir ->
195-
(Runnable)
196-
() -> {
197-
LOG.debug("Loading log {}", tabletDir);
198-
try {
199-
loadLog(
200-
tabletDir,
201-
cleanShutdown,
202-
finalRecoveryPoints,
203-
conf,
204-
clock);
205-
} catch (Exception e) {
206-
throw new FlussRuntimeException(e);
207-
}
208-
})
209-
.toArray(Runnable[]::new);
193+
createLogLoadingJobs(
194+
tabletsToLoad, cleanShutdown, finalRecoveryPoints, conf, clock);
210195

211196
long startTime = System.currentTimeMillis();
212197

@@ -471,6 +456,70 @@ public void shutdown() {
471456
LOG.info("Shut down LogManager complete.");
472457
}
473458

459+
/** Create runnable jobs for loading logs from tablet directories. */
460+
private Runnable[] createLogLoadingJobs(
461+
List<File> tabletsToLoad,
462+
boolean cleanShutdown,
463+
Map<TableBucket, Long> recoveryPoints,
464+
Configuration conf,
465+
Clock clock) {
466+
Runnable[] jobs = new Runnable[tabletsToLoad.size()];
467+
for (int i = 0; i < tabletsToLoad.size(); i++) {
468+
final File tabletDir = tabletsToLoad.get(i);
469+
jobs[i] = createLogLoadingJob(tabletDir, cleanShutdown, recoveryPoints, conf, clock);
470+
}
471+
return jobs;
472+
}
473+
474+
/** Create a runnable job for loading log from a single tablet directory. */
475+
private Runnable createLogLoadingJob(
476+
File tabletDir,
477+
boolean cleanShutdown,
478+
Map<TableBucket, Long> recoveryPoints,
479+
Configuration conf,
480+
Clock clock) {
481+
return new Runnable() {
482+
@Override
483+
public void run() {
484+
LOG.debug("Loading log {}", tabletDir);
485+
try {
486+
loadLog(tabletDir, cleanShutdown, recoveryPoints, conf, clock);
487+
} catch (Exception e) {
488+
LOG.error("Fail to loadLog from {}", tabletDir, e);
489+
if (e instanceof SchemaNotExistException) {
490+
LOG.error(
491+
"schema not exist, table for {} has already been dropped, the residual data will be removed.",
492+
tabletDir,
493+
e);
494+
FileUtils.deleteDirectoryQuietly(tabletDir);
495+
496+
// Also delete corresponding KV tablet directory if it exists
497+
try {
498+
Tuple2<PhysicalTablePath, TableBucket> pathAndBucket =
499+
FlussPaths.parseTabletDir(tabletDir);
500+
File kvTabletDir =
501+
FlussPaths.kvTabletDir(
502+
dataDir, pathAndBucket.f0, pathAndBucket.f1);
503+
if (kvTabletDir.exists()) {
504+
LOG.info(
505+
"Also removing corresponding KV tablet directory: {}",
506+
kvTabletDir);
507+
FileUtils.deleteDirectoryQuietly(kvTabletDir);
508+
}
509+
} catch (Exception kvDeleteException) {
510+
LOG.warn(
511+
"Failed to delete corresponding KV tablet directory for log {}: {}",
512+
tabletDir,
513+
kvDeleteException.getMessage());
514+
}
515+
return;
516+
}
517+
throw new FlussRuntimeException(e);
518+
}
519+
}
520+
};
521+
}
522+
474523
@VisibleForTesting
475524
void checkpointRecoveryOffsets() {
476525
// Assuming TableBucket and LogTablet are actual types used in your application

0 commit comments

Comments
 (0)