Skip to content

Commit 72e4c43

Browse files
authored
[log] Fix the recovery log failed situation because of OutOfOrderSequenceException cause by writerId expire (#1386)
1 parent 128f657 commit 72e4c43

File tree

5 files changed

+82
-21
lines changed

5 files changed

+82
-21
lines changed

fluss-server/src/main/java/com/alibaba/fluss/server/log/LogTablet.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1115,7 +1115,9 @@ private static void updateWriterAppendInfo(
11151115
// update writers.
11161116
WriterAppendInfo appendInfo =
11171117
writers.computeIfAbsent(writerId, id -> writerStateManager.prepareUpdate(writerId));
1118-
appendInfo.append(batch);
1118+
appendInfo.append(
1119+
batch,
1120+
writerStateManager.isWriterInBatchExpired(System.currentTimeMillis(), batch));
11191121
}
11201122

11211123
static void rebuildWriterState(

fluss-server/src/main/java/com/alibaba/fluss/server/log/WriterAppendInfo.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import com.alibaba.fluss.metadata.TableBucket;
2222
import com.alibaba.fluss.record.LogRecordBatch;
2323

24+
import static com.alibaba.fluss.record.LogRecordBatch.NO_BATCH_SEQUENCE;
25+
2426
/**
2527
* This class is used to validate the records appended by a given writer before they are written to
2628
* log. It's initialized with writer's state after the last successful append.
@@ -42,35 +44,38 @@ public long writerId() {
4244
return writerId;
4345
}
4446

45-
public void append(LogRecordBatch batch) {
47+
public void append(LogRecordBatch batch, boolean isWriterInBatchExpired) {
4648
LogOffsetMetadata firstOffsetMetadata = new LogOffsetMetadata(batch.baseLogOffset());
4749
appendDataBatch(
4850
batch.batchSequence(),
4951
firstOffsetMetadata,
5052
batch.lastLogOffset(),
51-
System.currentTimeMillis()); // TODO, add timestamp to record batch.
53+
isWriterInBatchExpired,
54+
batch.commitTimestamp());
5255
}
5356

5457
public void appendDataBatch(
5558
int batchSequence,
5659
LogOffsetMetadata firstOffsetMetadata,
5760
long lastOffset,
58-
long lastTimestamp) {
59-
maybeValidateDataBatch(batchSequence, lastOffset);
61+
boolean isWriterInBatchExpired,
62+
long batchTimestamp) {
63+
maybeValidateDataBatch(batchSequence, isWriterInBatchExpired, lastOffset);
6064
updatedEntry.addBath(
6165
batchSequence,
6266
lastOffset,
6367
(int) (lastOffset - firstOffsetMetadata.getMessageOffset()),
64-
lastTimestamp);
68+
batchTimestamp);
6569
}
6670

67-
private void maybeValidateDataBatch(int appendFirstSeq, long lastOffset) {
71+
private void maybeValidateDataBatch(
72+
int appendFirstSeq, boolean isWriterInBatchExpired, long lastOffset) {
6873
int currentLastSeq =
6974
!updatedEntry.isEmpty()
7075
? updatedEntry.lastBatchSequence()
7176
: currentEntry.lastBatchSequence();
7277
// must be in sequence, even for the first batch should start from 0
73-
if (!inSequence(currentLastSeq, appendFirstSeq)) {
78+
if (!inSequence(currentLastSeq, appendFirstSeq, isWriterInBatchExpired)) {
7479
throw new OutOfOrderSequenceException(
7580
String.format(
7681
"Out of order batch sequence for writer %s at offset %s in "
@@ -83,8 +88,22 @@ public WriterStateEntry toEntry() {
8388
return updatedEntry;
8489
}
8590

86-
private boolean inSequence(int lastBatchSeq, int nextBatchSeq) {
87-
return nextBatchSeq == lastBatchSeq + 1L
91+
/**
92+
* Check if the next batch sequence is in sequence with the last batch sequence. The following
93+
* three scenarios will be judged as in sequence:
94+
*
95+
* <ul>
96+
* <li>If lastBatchSeq equals NO_BATCH_SEQUENCE, we need to check whether the committed
97+
* timestamp of the next batch under the current writerId has expired. If it has expired,
98+
* we consider this a special case caused by writerId expiration, for this case, to ensure
99+
* the correctness of follower sync, we still treat it as in sequence.
100+
* <li>nextBatchSeq == lastBatchSeq + 1L
101+
* <li>lastBatchSeq reaches its maximum value
102+
* </ul>
103+
*/
104+
private boolean inSequence(int lastBatchSeq, int nextBatchSeq, boolean isWriterInBatchExpired) {
105+
return (lastBatchSeq == NO_BATCH_SEQUENCE && isWriterInBatchExpired)
106+
|| nextBatchSeq == lastBatchSeq + 1L
88107
|| (nextBatchSeq == 0 && lastBatchSeq == Integer.MAX_VALUE);
89108
}
90109
}

fluss-server/src/main/java/com/alibaba/fluss/server/log/WriterStateManager.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,10 @@ private boolean isWriterExpired(long currentTimeMs, WriterStateEntry writerState
419419
return currentTimeMs - writerStateEntry.lastBatchTimestamp() > writerExpirationMs;
420420
}
421421

422+
public boolean isWriterInBatchExpired(long currentTimeMs, LogRecordBatch recordBatch) {
423+
return currentTimeMs - recordBatch.commitTimestamp() > writerExpirationMs;
424+
}
425+
422426
private static List<WriterStateEntry> readSnapshot(File file) {
423427
try {
424428
byte[] json = Files.readAllBytes(file.toPath());

fluss-server/src/test/java/com/alibaba/fluss/server/log/LogTabletTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ void testWriterSnapshotAfterSegmentRollOnAppend() throws Exception {
411411

412412
@Test
413413
void testPeriodicWriterIdExpiration() throws Exception {
414-
conf.set(ConfigOptions.WRITER_ID_EXPIRATION_TIME, Duration.ofMillis(3000));
414+
conf.set(ConfigOptions.WRITER_ID_EXPIRATION_TIME, Duration.ofMillis(1000));
415415
conf.set(ConfigOptions.WRITER_ID_EXPIRATION_CHECK_INTERVAL, Duration.ofMillis(1000));
416416
long writerId1 = 23L;
417417
LogTablet log = createLogTablet(conf);

fluss-server/src/test/java/com/alibaba/fluss/server/log/WriterStateManagerTest.java

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import com.alibaba.fluss.config.Configuration;
2222
import com.alibaba.fluss.exception.OutOfOrderSequenceException;
2323
import com.alibaba.fluss.metadata.TableBucket;
24+
import com.alibaba.fluss.utils.clock.ManualClock;
2425
import com.alibaba.fluss.utils.types.Tuple2;
2526

2627
import org.junit.jupiter.api.BeforeEach;
@@ -124,14 +125,15 @@ void testValidationOnFirstEntryWhenLoadingLog() {
124125
@Test
125126
void testPrepareUpdateDoesNotMutate() {
126127
WriterAppendInfo appendInfo = stateManager.prepareUpdate(writerId);
127-
appendInfo.appendDataBatch(0, new LogOffsetMetadata(15L), 20L, System.currentTimeMillis());
128+
appendInfo.appendDataBatch(
129+
0, new LogOffsetMetadata(15L), 20L, false, System.currentTimeMillis());
128130
assertThat(stateManager.lastEntry(writerId)).isNotPresent();
129131
stateManager.update(appendInfo);
130132
assertThat(stateManager.lastEntry(writerId)).isPresent();
131133

132134
WriterAppendInfo nextAppendInfo = stateManager.prepareUpdate(writerId);
133135
nextAppendInfo.appendDataBatch(
134-
1, new LogOffsetMetadata(26L), 30L, System.currentTimeMillis());
136+
1, new LogOffsetMetadata(26L), 30L, false, System.currentTimeMillis());
135137
assertThat(stateManager.lastEntry(writerId)).isPresent();
136138

137139
WriterStateEntry lastEntry = stateManager.lastEntry(writerId).get();
@@ -179,8 +181,8 @@ void testFetchSnapshotEmptySnapshot() {
179181

180182
@Test
181183
void testRemoveExpiredWritersOnReload() throws IOException {
182-
append(stateManager, writerId, 0, 0L, 0);
183-
append(stateManager, writerId, 1, 1L, 1);
184+
append(stateManager, writerId, 0, 0L, false, 0);
185+
append(stateManager, writerId, 1, 1L, false, 1);
184186

185187
stateManager.takeSnapshot();
186188
WriterStateManager recoveredMapping =
@@ -194,21 +196,49 @@ void testRemoveExpiredWritersOnReload() throws IOException {
194196
// the writer mapping. If writing with the same writerId and non-zero batch sequence, the
195197
// OutOfOrderSequenceException will throw. If you want to continue to write, you need to get
196198
// a new writer id.
197-
assertThatThrownBy(() -> append(recoveredMapping, writerId, 2, 2L, 70001))
199+
assertThatThrownBy(() -> append(recoveredMapping, writerId, 2, 2L, false, 3000L))
198200
.isInstanceOf(OutOfOrderSequenceException.class)
199201
.hasMessageContaining(
200202
"Out of order batch sequence for writer 1 at offset 2 in "
201203
+ "table-bucket TableBucket{tableId=1001, bucket=0}"
202204
+ " : 2 (incoming batch seq.), -1 (current batch seq.)");
203205

204-
append(recoveredMapping, 2L, 0, 2L, 70002);
206+
append(recoveredMapping, 2L, 0, 2L, false, 70002);
205207

206208
assertThat(recoveredMapping.activeWriters().size()).isEqualTo(1);
207209
assertThat(recoveredMapping.activeWriters().values().iterator().next().lastBatchSequence())
208210
.isEqualTo(0);
209211
assertThat(recoveredMapping.mapEndOffset()).isEqualTo(3L);
210212
}
211213

214+
@Test
215+
void testAppendAnExpiredBatchWithEmptyWriterStatus() throws Exception {
216+
ManualClock clock = new ManualClock(5000L);
217+
218+
// 2 seconds to expire the writer.
219+
conf.set(ConfigOptions.WRITER_ID_EXPIRATION_TIME, Duration.ofSeconds(2));
220+
WriterStateManager stateManager1 =
221+
new WriterStateManager(
222+
tableBucket,
223+
logDir,
224+
(int) conf.get(ConfigOptions.WRITER_ID_EXPIRATION_TIME).toMillis());
225+
226+
// If we try to append an expired batch with none zero batch sequence, the
227+
// OutOfOrderSequenceException will not been throw.
228+
append(stateManager1, 1L, 10, 10L, true, clock.milliseconds());
229+
assertThat(stateManager1.activeWriters().size()).isEqualTo(1);
230+
assertThat(stateManager1.activeWriters().values().iterator().next().lastBatchSequence())
231+
.isEqualTo(10);
232+
233+
// If we try to append a none-expired batch with none zero batch sequence, the
234+
// OutOfOrderSequenceException will throw.
235+
assertThatThrownBy(() -> append(stateManager1, 2L, 10, 10L, false, 1000L))
236+
.isInstanceOf(OutOfOrderSequenceException.class)
237+
.hasMessageContaining(
238+
"Out of order batch sequence for writer 2 at offset 10 in table-bucket "
239+
+ "TableBucket{tableId=1001, bucket=0} : 10 (incoming batch seq.), -1 (current batch seq.)");
240+
}
241+
212242
@Test
213243
void testDeleteSnapshotsBefore() throws IOException {
214244
append(stateManager, writerId, 0, 0L);
@@ -322,7 +352,7 @@ void testLoadFromSnapshotRetainsNonExpiredWriters() throws IOException {
322352

323353
@Test
324354
void testSkipSnapshotIfOffsetUnchanged() throws IOException {
325-
append(stateManager, writerId, 0, 0L, 0L);
355+
append(stateManager, writerId, 0, 0L, false, 0L);
326356

327357
stateManager.takeSnapshot();
328358
assertThat(Objects.requireNonNull(logDir.listFiles()).length).isEqualTo(1);
@@ -475,17 +505,23 @@ private void testLoadFromCorruptSnapshot(Consumer<FileChannel> makeFileCorrupt)
475505

476506
private void append(
477507
WriterStateManager stateManager, long writerId, int batchSequence, long offset) {
478-
append(stateManager, writerId, batchSequence, offset, System.currentTimeMillis());
508+
append(stateManager, writerId, batchSequence, offset, false, System.currentTimeMillis());
479509
}
480510

481511
private void append(
482512
WriterStateManager stateManager,
483513
long writerId,
484514
int batchSequence,
485515
long offset,
486-
long timestamp) {
516+
boolean isWriterInBatchExpired,
517+
long lastTimestamp) {
487518
WriterAppendInfo appendInfo = stateManager.prepareUpdate(writerId);
488-
appendInfo.appendDataBatch(batchSequence, new LogOffsetMetadata(offset), offset, timestamp);
519+
appendInfo.appendDataBatch(
520+
batchSequence,
521+
new LogOffsetMetadata(offset),
522+
offset,
523+
isWriterInBatchExpired,
524+
lastTimestamp);
489525
stateManager.update(appendInfo);
490526
stateManager.updateMapEndOffset(offset + 1);
491527
}

0 commit comments

Comments
 (0)