@@ -44,13 +44,15 @@ public long writerId() {
4444 return writerId ;
4545 }
4646
47- public void append (LogRecordBatch batch , boolean isWriterInBatchExpired ) {
47+ public void append (
48+ LogRecordBatch batch , boolean isWriterInBatchExpired , boolean isAppendAsLeader ) {
4849 LogOffsetMetadata firstOffsetMetadata = new LogOffsetMetadata (batch .baseLogOffset ());
4950 appendDataBatch (
5051 batch .batchSequence (),
5152 firstOffsetMetadata ,
5253 batch .lastLogOffset (),
5354 isWriterInBatchExpired ,
55+ isAppendAsLeader ,
5456 batch .commitTimestamp ());
5557 }
5658
@@ -59,8 +61,9 @@ public void appendDataBatch(
5961 LogOffsetMetadata firstOffsetMetadata ,
6062 long lastOffset ,
6163 boolean isWriterInBatchExpired ,
64+ boolean isAppendAsLeader ,
6265 long batchTimestamp ) {
63- maybeValidateDataBatch (batchSequence , isWriterInBatchExpired , lastOffset );
66+ maybeValidateDataBatch (batchSequence , isWriterInBatchExpired , lastOffset , isAppendAsLeader );
6467 updatedEntry .addBath (
6568 batchSequence ,
6669 lastOffset ,
@@ -69,13 +72,16 @@ public void appendDataBatch(
6972 }
7073
7174 private void maybeValidateDataBatch (
72- int appendFirstSeq , boolean isWriterInBatchExpired , long lastOffset ) {
75+ int appendFirstSeq ,
76+ boolean isWriterInBatchExpired ,
77+ long lastOffset ,
78+ boolean isAppendAsLeader ) {
7379 int currentLastSeq =
7480 !updatedEntry .isEmpty ()
7581 ? updatedEntry .lastBatchSequence ()
7682 : currentEntry .lastBatchSequence ();
7783 // must be in sequence, even for the first batch should start from 0
78- if (!inSequence (currentLastSeq , appendFirstSeq , isWriterInBatchExpired )) {
84+ if (!inSequence (currentLastSeq , appendFirstSeq , isWriterInBatchExpired , isAppendAsLeader )) {
7985 throw new OutOfOrderSequenceException (
8086 String .format (
8187 "Out of order batch sequence for writer %s at offset %s in "
@@ -93,16 +99,53 @@ public WriterStateEntry toEntry() {
9399 * three scenarios will be judged as in sequence:
94100 *
95101 * <ul>
96- * <li>If lastBatchSeq equals NO_BATCH_SEQUENCE, we need to check whether the committed
97- * timestamp of the next batch under the current writerId has expired. If it has expired,
98- * we consider this a special case caused by writerId expiration, for this case, to ensure
99- * the correctness of follower sync, we still treat it as in sequence.
100- * <li>nextBatchSeq == lastBatchSeq + 1L
101- * <li>lastBatchSeq reaches its maximum value
102+ * <li>1. If lastBatchSeq equals NO_BATCH_SEQUENCE, the following two scenarios will be judged
103+ * as in sequence:
104+ * <ul>
105+ * <li>1.1 If the committed timestamp of the next batch under the current writerId has
106+ * expired, we consider this a special case caused by writerId expiration, for this
107+ * case, to ensure the correctness of follower sync, we still treat it as in
108+ * sequence.
109+ * <li>1.2 If the append request is from the follower, we consider this is a special
110+ * case caused by inconsistent expiration of writerId between the leader and
111+ * follower. To prevent continuous fetch failures on the follower side, we still
112+ * treat it as in sequence.
113+ * </ul>
114+ * <li>2. nextBatchSeq == lastBatchSeq + 1L
115+ * <li>3. lastBatchSeq reaches its maximum value
102116 * </ul>
117+ *
118+ * <p>For case 1.2, here is a detailed example: The expiration of a writer is triggered
119+ * asynchronously by the {@code PeriodicWriterIdExpirationCheck} thread at intervals defined by
120+ * {@code server.writer-id.expiration-check-interval}, which can result in slight differences in
121+ * the actual expiration times of the same writer on the leader replica and follower replicas.
122+ * This slight difference leads to a dreadful corner case. Imagine the following scenario(set
123+ * {@code server.writer-id.expiration-check-interval}: 10min, {@code
124+ * server.writer-id.expiration-time}: 12h):
125+ *
126+ * <pre>{@code
127+ * Step Time Action of Leader Action of Follower
128+ * 1 00:03:38 receive batch 0 of writer 101
129+ * 2 00:03:38 fetch batch 0 of writer 101
130+ * 3 12:05:00 remove state of writer 101
131+ * 4 12:10:02 receive batch 1 of writer 101
132+ * 5 12:10:02 fetch batch 0 of writer 101
133+ * 6 12:11:00 remove state of writer 101
134+ * }</pre>
135+ *
136+ * <p>In step 3, the follower removes the state of writer 101 first, since it has been more than
137+ * 12 hours since writer 101's last batch write, making it safe to remove. However, since the
138+ * expiration of writer 101 has not yet occurred on the leader, and a new batch 1 is received at
139+ * this time, it is successfully written on the leader. At this point, the fetcher pulls batch 1
140+ * from the leader, but since the state of writer 101 has already been cleaned up, an {@link
141+ * OutOfOrderSequenceException} will occur during to write if we don't treat it as in sequence.
103142 */
104- private boolean inSequence (int lastBatchSeq , int nextBatchSeq , boolean isWriterInBatchExpired ) {
105- return (lastBatchSeq == NO_BATCH_SEQUENCE && isWriterInBatchExpired )
143+ private boolean inSequence (
144+ int lastBatchSeq ,
145+ int nextBatchSeq ,
146+ boolean isWriterInBatchExpired ,
147+ boolean isAppendAsLeader ) {
148+ return (lastBatchSeq == NO_BATCH_SEQUENCE && (isWriterInBatchExpired || !isAppendAsLeader ))
106149 || nextBatchSeq == lastBatchSeq + 1L
107150 || (nextBatchSeq == 0 && lastBatchSeq == Integer .MAX_VALUE );
108151 }
0 commit comments