Skip to content

Commit 1b53a7c

Browse files
massakamMasahiro Sakamoto
authored andcommitted
[fix][broker] Fix ack hole in cursor for geo-replication (apache#20931)
Co-authored-by: Masahiro Sakamoto <[email protected]>
1 parent 668eb49 commit 1b53a7c

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/PersistentReplicator.java

+22-3
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ public abstract class PersistentReplicator extends AbstractReplicator
116116
protected final ReplicatorStatsImpl stats = new ReplicatorStatsImpl();
117117

118118
protected volatile boolean fetchSchemaInProgress = false;
119+
private volatile boolean waitForCursorRewinding = false;
119120

120121
public PersistentReplicator(String localCluster, PersistentTopic localTopic, ManagedCursor cursor,
121122
String remoteCluster, String remoteTopic,
@@ -143,9 +144,15 @@ public PersistentReplicator(String localCluster, PersistentTopic localTopic, Man
143144

144145
@Override
145146
protected void setProducerAndTriggerReadEntries(Producer<byte[]> producer) {
146-
// Rewind the cursor to be sure to read again all non-acked messages sent while restarting.
147-
cursor.rewind();
148-
cursor.cancelPendingReadRequest();
147+
waitForCursorRewinding = true;
148+
149+
// Repeat until there are no read operations in progress
150+
if (STATE_UPDATER.get(this) == State.Starting && HAVE_PENDING_READ_UPDATER.get(this) == TRUE
151+
&& !cursor.cancelPendingReadRequest()) {
152+
brokerService.getPulsar().getExecutor()
153+
.schedule(() -> setProducerAndTriggerReadEntries(producer), 10, TimeUnit.MILLISECONDS);
154+
return;
155+
}
149156

150157
/**
151158
* 1. Try change state to {@link Started}.
@@ -158,6 +165,7 @@ protected void setProducerAndTriggerReadEntries(Producer<byte[]> producer) {
158165
if (!(producer instanceof ProducerImpl)) {
159166
log.error("[{}] The partitions count between two clusters is not the same, the replicator can not be"
160167
+ " created successfully: {}", replicatorId, state);
168+
waitForCursorRewinding = false;
161169
doCloseProducerAsync(producer, () -> {});
162170
throw new ClassCastException(producer.getClass().getName() + " can not be cast to ProducerImpl");
163171
}
@@ -168,6 +176,11 @@ protected void setProducerAndTriggerReadEntries(Producer<byte[]> producer) {
168176
backOff.reset();
169177
// activate cursor: so, entries can be cached.
170178
this.cursor.setActive();
179+
180+
// Rewind the cursor to be sure to read again all non-acked messages sent while restarting
181+
cursor.rewind();
182+
waitForCursorRewinding = false;
183+
171184
// read entries
172185
readMoreEntries();
173186
} else {
@@ -183,6 +196,7 @@ protected void setProducerAndTriggerReadEntries(Producer<byte[]> producer) {
183196
log.error("[{}] Replicator state is not expected, so close the producer. Replicator state: {}",
184197
replicatorId, changeStateRes.getRight());
185198
}
199+
waitForCursorRewinding = false;
186200
// Close the producer if change the state fail.
187201
doCloseProducerAsync(producer, () -> {});
188202
}
@@ -296,6 +310,11 @@ protected void readMoreEntries() {
296310

297311
// Schedule read
298312
if (HAVE_PENDING_READ_UPDATER.compareAndSet(this, FALSE, TRUE)) {
313+
if (waitForCursorRewinding) {
314+
log.info("[{}] Skip the reading because repl producer is starting", replicatorId);
315+
HAVE_PENDING_READ_UPDATER.set(this, FALSE);
316+
return;
317+
}
299318
if (log.isDebugEnabled()) {
300319
log.debug("[{}] Schedule read of {} messages or {} bytes", replicatorId, messagesToRead,
301320
bytesToRead);

0 commit comments

Comments
 (0)