Skip to content

Commit de9f394

Browse files
[coordinator] Log the reason for failures in bucket or replica state changes (apache#2145)
1 parent d10252d commit de9f394

File tree

2 files changed

+54
-25
lines changed

2 files changed

+54
-25
lines changed

fluss-server/src/main/java/org/apache/fluss/server/coordinator/statemachine/ReplicaStateMachine.java

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,8 @@ private void doHandleStateChanges(
221221
try {
222222
partitionName = getPartitionName(tableBucket);
223223
} catch (PartitionNotExistException e) {
224-
LOG.error(e.getMessage());
225-
logFailedSateChange(replica, currentState, targetState);
224+
logFailedSateChange(
225+
replica, currentState, targetState, e.getMessage());
226226
return;
227227
}
228228

@@ -282,11 +282,11 @@ private void doHandleStateChanges(
282282
try {
283283
partitionName = getPartitionName(tableBucket);
284284
} catch (PartitionNotExistException e) {
285-
LOG.error(e.getMessage());
286285
logFailedSateChange(
287286
tableBucketReplica,
288287
coordinatorContext.getReplicaState(tableBucketReplica),
289-
targetState);
288+
targetState,
289+
e.getMessage());
290290
continue;
291291
}
292292
// send leader request to the replica server
@@ -343,7 +343,11 @@ protected Collection<TableBucketReplica> checkValidReplicaStateChange(
343343
return true;
344344
} else {
345345
logInvalidTransition(replica, curState, targetState);
346-
logFailedSateChange(replica, curState, targetState);
346+
logFailedSateChange(
347+
replica,
348+
curState,
349+
targetState,
350+
"Invalid Replica State Transition.");
347351
return false;
348352
}
349353
})
@@ -376,12 +380,16 @@ private void logInvalidTransition(
376380
}
377381

378382
private void logFailedSateChange(
379-
TableBucketReplica replica, ReplicaState currState, ReplicaState targetState) {
383+
TableBucketReplica replica,
384+
ReplicaState currState,
385+
ReplicaState targetState,
386+
String reason) {
380387
LOG.error(
381-
"Fail to change state for table bucket replica {} from {} to {}.",
388+
"Fail to change state for table bucket replica {} from {} to {}, reason: {}.",
382389
stringifyReplica(replica),
383390
currState,
384-
targetState);
391+
targetState,
392+
reason);
385393
}
386394

387395
private void logSuccessfulStateChange(

fluss-server/src/main/java/org/apache/fluss/server/coordinator/statemachine/TableBucketStateMachine.java

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,13 @@ private void doHandleStateChange(
209209
partitionName =
210210
coordinatorContext.getPartitionName(tableBucket.getPartitionId());
211211
if (partitionName == null) {
212-
LOG.error(
213-
"Can't find partition name for partition: {}.",
214-
tableBucket.getBucket());
215-
logFailedStateChange(tableBucket, currentState, targetState);
212+
logFailedStateChange(
213+
tableBucket,
214+
currentState,
215+
targetState,
216+
String.format(
217+
"Can't find partition name for partition: %s.",
218+
tableBucket.getBucket()));
216219
return;
217220
}
218221
}
@@ -222,7 +225,8 @@ private void doHandleStateChange(
222225
Optional<ElectionResult> optionalElectionResult =
223226
initLeaderForTableBuckets(tableBucket, assignedServers);
224227
if (!optionalElectionResult.isPresent()) {
225-
logFailedStateChange(tableBucket, currentState, targetState);
228+
logFailedStateChange(
229+
tableBucket, currentState, targetState, "Elect Result is empty.");
226230
} else {
227231
// transmit state
228232
doStateChange(tableBucket, targetState);
@@ -244,7 +248,8 @@ private void doHandleStateChange(
244248
electNewLeaderForTableBuckets(
245249
tableBucket, replicaLeaderElectionStrategy);
246250
if (!optionalElectionResult.isPresent()) {
247-
logFailedStateChange(tableBucket, currentState, targetState);
251+
logFailedStateChange(
252+
tableBucket, currentState, targetState, "Elect result is empty.");
248253
} else {
249254
// transmit state
250255
doStateChange(tableBucket, targetState);
@@ -337,10 +342,13 @@ public void batchHandleOnlineChangeAndInitLeader(Set<TableBucket> tableBuckets)
337342
if (tableBucket.getPartitionId() != null) {
338343
partitionName = coordinatorContext.getPartitionName(tableBucket.getPartitionId());
339344
if (partitionName == null) {
340-
LOG.error(
341-
"Can't find partition name for partition: {}.",
342-
tableBucket.getBucket());
343-
logFailedStateChange(tableBucket, currentState, BucketState.OnlineBucket);
345+
logFailedStateChange(
346+
tableBucket,
347+
currentState,
348+
BucketState.OnlineBucket,
349+
String.format(
350+
"Can't find partition name for partition: %s.",
351+
tableBucket.getBucket()));
344352
continue;
345353
}
346354
}
@@ -350,7 +358,11 @@ public void batchHandleOnlineChangeAndInitLeader(Set<TableBucket> tableBuckets)
350358
Optional<ElectionResult> optionalElectionResult =
351359
doInitElectionForBucket(tableBucket, assignedServers);
352360
if (!optionalElectionResult.isPresent()) {
353-
logFailedStateChange(tableBucket, currentState, BucketState.OnlineBucket);
361+
logFailedStateChange(
362+
tableBucket,
363+
currentState,
364+
BucketState.OnlineBucket,
365+
"Elect result is empty.");
354366
continue;
355367
}
356368
ElectionResult electionResult = optionalElectionResult.get();
@@ -506,7 +518,8 @@ private boolean checkValidTableBucketStateChange(
506518
return true;
507519
} else {
508520
logInvalidTransition(tableBucket, curState, targetState);
509-
logFailedStateChange(tableBucket, curState, targetState);
521+
logFailedStateChange(
522+
tableBucket, curState, targetState, "Invalid TableBucket State Transition.");
510523
return false;
511524
}
512525
}
@@ -537,12 +550,16 @@ private void logInvalidTransition(
537550
}
538551

539552
private void logFailedStateChange(
540-
TableBucket tableBucket, BucketState currState, BucketState targetState) {
553+
TableBucket tableBucket,
554+
BucketState currState,
555+
BucketState targetState,
556+
String reason) {
541557
LOG.error(
542-
"Fail to change state for table bucket {} from {} to {}.",
558+
"Fail to change state for table bucket {} from {} to {}, reason: {}",
543559
stringifyBucket(tableBucket),
544560
currState,
545-
targetState);
561+
targetState,
562+
reason);
546563
}
547564

548565
private void logSuccessfulStateChange(
@@ -610,8 +627,12 @@ private Optional<ElectionResult> electLeader(
610627

611628
if (!resultOpt.isPresent()) {
612629
LOG.error(
613-
"The leader election for table bucket {} is empty.",
614-
stringifyBucket(tableBucket));
630+
"The leader election for table bucket {} is empty, assignment: {}, live replicas: {}, leaderAndIsr: {}, strategy: {}",
631+
stringifyBucket(tableBucket),
632+
assignment,
633+
liveReplicas,
634+
leaderAndIsr,
635+
electionStrategy);
615636
return Optional.empty();
616637
}
617638
return resultOpt;

0 commit comments

Comments
 (0)