Skip to content

Commit 794ae7d

Browse files
committed
try ledger recovery from previous entries in case of corrupt/missing footer of the chunked data (apache#282)
(cherry picked from commit 6e72ecb)
1 parent bfc84fb commit 794ae7d

File tree

3 files changed

+188
-74
lines changed

3 files changed

+188
-74
lines changed

managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedCursorImpl.java

+109-73
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,15 @@
4040
import io.netty.buffer.CompositeByteBuf;
4141
import io.netty.buffer.Unpooled;
4242
import java.io.ByteArrayInputStream;
43+
import java.io.ByteArrayOutputStream;
4344
import java.io.DataInputStream;
4445
import java.io.IOException;
4546
import java.time.Clock;
4647
import java.util.ArrayDeque;
4748
import java.util.ArrayList;
4849
import java.util.Collections;
4950
import java.util.Comparator;
51+
import java.util.Enumeration;
5052
import java.util.HashMap;
5153
import java.util.HashSet;
5254
import java.util.Iterator;
@@ -75,6 +77,7 @@
7577
import lombok.Getter;
7678
import lombok.NoArgsConstructor;
7779
import lombok.ToString;
80+
import org.apache.bookkeeper.client.AsyncCallback;
7881
import org.apache.bookkeeper.client.AsyncCallback.CloseCallback;
7982
import org.apache.bookkeeper.client.AsyncCallback.OpenCallback;
8083
import org.apache.bookkeeper.client.BKException;
@@ -244,6 +247,8 @@ public class ManagedCursorImpl implements ManagedCursor {
244247
// active state cache in ManagedCursor. It should be in sync with the state in activeCursors in ManagedLedger.
245248
private volatile boolean isActive = false;
246249

250+
protected int maxPositionChunkSize = 1024 * 1024;
251+
247252
static class MarkDeleteEntry {
248253
final PositionImpl newPosition;
249254
final MarkDeleteCallback callback;
@@ -581,71 +586,9 @@ protected void recoverFromLedger(final ManagedCursorInfo info, final VoidCallbac
581586

582587
// Read the last entry in the ledger
583588
long lastEntryInLedger = lh.getLastAddConfirmed();
584-
585-
if (lastEntryInLedger < 0) {
586-
log.warn("[{}] Error reading from metadata ledger {} for cursor {}: No entries in ledger",
587-
ledger.getName(), ledgerId, name);
588-
// Rewind to last cursor snapshot available
589-
initialize(getRollbackPosition(info), Collections.emptyMap(), cursorProperties, callback);
590-
return;
591-
}
592-
593-
lh.asyncReadEntries(lastEntryInLedger, lastEntryInLedger, (rc1, lh1, seq, ctx1) -> {
594-
if (log.isDebugEnabled()) {
595-
log.debug("[{}} readComplete rc={} entryId={}", ledger.getName(), rc1, lh1.getLastAddConfirmed());
596-
}
597-
if (isBkErrorNotRecoverable(rc1)) {
598-
log.error("[{}] Error reading from metadata ledger {} for cursor {}: {}", ledger.getName(),
599-
ledgerId, name, BKException.getMessage(rc1));
600-
// Rewind to oldest entry available
601-
initialize(getRollbackPosition(info), Collections.emptyMap(), cursorProperties, callback);
602-
return;
603-
} else if (rc1 != BKException.Code.OK) {
604-
log.warn("[{}] Error reading from metadata ledger {} for cursor {}: {}", ledger.getName(),
605-
ledgerId, name, BKException.getMessage(rc1));
606-
607-
callback.operationFailed(createManagedLedgerException(rc1));
608-
return;
609-
}
610-
611-
LedgerEntry entry = seq.nextElement();
612-
mbean.addReadCursorLedgerSize(entry.getLength());
613-
PositionInfo positionInfo;
614-
try {
615-
byte[] data = entry.getEntry();
616-
data = decompressDataIfNeeded(data, lh);
617-
positionInfo = PositionInfo.parseFrom(data);
618-
} catch (InvalidProtocolBufferException e) {
619-
callback.operationFailed(new ManagedLedgerException(e));
620-
return;
621-
}
622-
log.info("[{}] Cursor {} recovered to position {}", ledger.getName(), name, positionInfo);
623-
624-
Map<String, Long> recoveredProperties = Collections.emptyMap();
625-
if (positionInfo.getPropertiesCount() > 0) {
626-
// Recover properties map
627-
recoveredProperties = new HashMap<>();
628-
for (int i = 0; i < positionInfo.getPropertiesCount(); i++) {
629-
LongProperty property = positionInfo.getProperties(i);
630-
recoveredProperties.put(property.getName(), property.getValue());
631-
}
632-
}
633-
634-
log.info("[{}] Cursor {} recovered with recoveredProperties {}, individualDeletedMessagesCount {}",
635-
ledger.getName(), name, recoveredProperties, positionInfo.getIndividualDeletedMessagesCount());
636-
637-
PositionImpl position = new PositionImpl(positionInfo);
638-
if (positionInfo.getIndividualDeletedMessagesCount() > 0) {
639-
recoverIndividualDeletedMessages(positionInfo.getIndividualDeletedMessagesList());
640-
}
641-
if (getConfig().isDeletionAtBatchIndexLevelEnabled()
642-
&& positionInfo.getBatchedEntryDeletionIndexInfoCount() > 0) {
643-
recoverBatchDeletedIndexes(positionInfo.getBatchedEntryDeletionIndexInfoList());
644-
}
645-
recoveredCursor(position, recoveredProperties, cursorProperties, lh);
646-
callback.operationComplete();
647-
}, null);
589+
recoverFromLedgerByEntryId(info, callback, lh, lastEntryInLedger);
648590
};
591+
649592
try {
650593
bookkeeper.asyncOpenLedger(ledgerId, digestType, getConfig().getPassword(), openCallback,
651594
null);
@@ -656,6 +599,101 @@ protected void recoverFromLedger(final ManagedCursorInfo info, final VoidCallbac
656599
}
657600
}
658601

602+
private void recoverFromLedgerByEntryId(ManagedCursorInfo info,
603+
VoidCallback callback,
604+
LedgerHandle lh,
605+
long entryId) {
606+
long ledgerId = lh.getId();
607+
608+
if (entryId < 0) {
609+
log.warn("[{}] Error reading from metadata ledger {} for cursor {}: No valid entries in ledger",
610+
ledger.getName(), ledgerId, name);
611+
// Rewind to last cursor snapshot available
612+
initialize(getRollbackPosition(info), Collections.emptyMap(), cursorProperties, callback);
613+
return;
614+
}
615+
616+
lh.asyncReadEntries(entryId, entryId, (rc1, lh1, seq, ctx1) -> {
617+
if (log.isDebugEnabled()) {
618+
log.debug("[{}} readComplete rc={} entryId={}", ledger.getName(), rc1, lh1.getLastAddConfirmed());
619+
}
620+
if (isBkErrorNotRecoverable(rc1)) {
621+
log.error("[{}] Error reading from metadata ledger {} for cursor {}: {}", ledger.getName(),
622+
ledgerId, name, BKException.getMessage(rc1));
623+
// Rewind to oldest entry available
624+
initialize(getRollbackPosition(info), Collections.emptyMap(), cursorProperties, callback);
625+
return;
626+
} else if (rc1 != BKException.Code.OK) {
627+
log.warn("[{}] Error reading from metadata ledger {} for cursor {}: {}", ledger.getName(),
628+
ledgerId, name, BKException.getMessage(rc1));
629+
630+
callback.operationFailed(createManagedLedgerException(rc1));
631+
return;
632+
}
633+
634+
LedgerEntry entry = seq.nextElement();
635+
byte[] data = entry.getEntry();
636+
try {
637+
ChunkSequenceFooter chunkSequenceFooter = parseChunkSequenceFooter(data);
638+
if (chunkSequenceFooter.numParts > 0) {
639+
readChunkSequence(callback, lh, entryId, chunkSequenceFooter);
640+
} else {
641+
Throwable res = tryCompleteCursorRecovery(lh, data);
642+
if (res == null) {
643+
callback.operationComplete();
644+
} else {
645+
log.warn("[{}] Error recovering from metadata ledger {} entry {} for cursor {}. "
646+
+ "Will try recovery from previous entry.",
647+
ledger.getName(), ledgerId, entryId, name, res);
648+
//try recovery from previous entry
649+
recoverFromLedgerByEntryId(info, callback, lh, entryId - 1);
650+
}
651+
}
652+
} catch (IOException error) {
653+
log.error("Cannot parse footer", error);
654+
log.warn("[{}] Error recovering from metadata ledger {} entry {} for cursor {}, cannot parse footer. "
655+
+ "Will try recovery from previous entry.",
656+
ledger.getName(), ledgerId, entryId, name, error);
657+
recoverFromLedgerByEntryId(info, callback, lh, entryId - 1);
658+
}
659+
}, null);
660+
}
661+
662+
private void readChunkSequence(VoidCallback callback, LedgerHandle lh,
663+
long footerPosition, ChunkSequenceFooter chunkSequenceFooter) {
664+
long startPos = footerPosition - chunkSequenceFooter.numParts;
665+
long endPos = footerPosition - 1;
666+
log.info("readChunkSequence from pos {}, num parts {}, startPos {}, endPos {}",
667+
footerPosition, chunkSequenceFooter.numParts, startPos, endPos);
668+
lh.asyncReadEntries(startPos, endPos, new AsyncCallback.ReadCallback() {
669+
@Override
670+
public void readComplete(int rc, LedgerHandle lh, Enumeration<LedgerEntry> entries, Object ctx) {
671+
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
672+
entries.asIterator().forEachRemaining(entry -> {
673+
log.info("pos {} len {} bytes ", entry.getEntryId(), entry.getLength());
674+
try {
675+
buffer.write(entry.getEntry());
676+
} catch (IOException err) {
677+
throw new RuntimeException(err);
678+
}
679+
});
680+
byte[] result = buffer.toByteArray();
681+
log.info("Read {} chunks, total of {} bytes, expected {} bytes", chunkSequenceFooter.numParts,
682+
result.length, chunkSequenceFooter.length);
683+
if (result.length != chunkSequenceFooter.length) {
684+
callback.operationFailed(ManagedLedgerException.getManagedLedgerException(new IOException(
685+
"Expected " + chunkSequenceFooter.length + " bytes but read " + result.length + " bytes")));
686+
}
687+
Throwable res = tryCompleteCursorRecovery(lh, result);
688+
if (res == null) {
689+
callback.operationComplete();
690+
} else {
691+
callback.operationFailed(new ManagedLedgerException(res));
692+
}
693+
}
694+
}, null);
695+
}
696+
659697
@AllArgsConstructor
660698
@NoArgsConstructor
661699
@Getter
@@ -675,16 +713,15 @@ private ChunkSequenceFooter parseChunkSequenceFooter(byte[] data) throws IOExcep
675713
return ObjectMapperFactory.getMapper().getObjectMapper().readValue(data, ChunkSequenceFooter.class);
676714
}
677715

678-
private void completeCursorRecovery(VoidCallback callback, LedgerHandle lh, byte[] data) {
716+
private Throwable tryCompleteCursorRecovery(LedgerHandle lh, byte[] data) {
679717
mbean.addReadCursorLedgerSize(data.length);
680718

681719
try {
682720
data = decompressDataIfNeeded(data, lh);
683721
} catch (Throwable e) {
684722
log.error("[{}] Failed to decompress position info from ledger {} for cursor {}: {}", ledger.getName(),
685723
lh.getId(), name, e);
686-
callback.operationFailed(new ManagedLedgerException(e));
687-
return;
724+
return e;
688725
}
689726

690727
PositionInfo positionInfo;
@@ -693,8 +730,7 @@ private void completeCursorRecovery(VoidCallback callback, LedgerHandle lh, byt
693730
} catch (InvalidProtocolBufferException e) {
694731
log.error("[{}] Failed to parse position info from ledger {} for cursor {}: {}", ledger.getName(),
695732
lh.getId(), name, e);
696-
callback.operationFailed(new ManagedLedgerException(e));
697-
return;
733+
return e;
698734
}
699735

700736
Map<String, Long> recoveredProperties = Collections.emptyMap();
@@ -716,7 +752,7 @@ private void completeCursorRecovery(VoidCallback callback, LedgerHandle lh, byt
716752
recoverBatchDeletedIndexes(positionInfo.getBatchedEntryDeletionIndexInfoList());
717753
}
718754
recoveredCursor(position, recoveredProperties, cursorProperties, lh);
719-
callback.operationComplete();
755+
return null;
720756
}
721757

722758
private void recoverIndividualDeletedMessages(List<MLDataFormats.MessageRange> individualDeletedMessagesList) {
@@ -3282,6 +3318,7 @@ private void buildBatchEntryDeletionIndexInfoList(
32823318
}
32833319

32843320
void persistPositionToLedger(final LedgerHandle lh, MarkDeleteEntry mdEntry, final VoidCallback callback) {
3321+
checkArgument(maxPositionChunkSize > 0, "maxPositionChunkSize mus be greater than zero");
32853322
long now = System.nanoTime();
32863323
PositionImpl position = mdEntry.newPosition;
32873324

@@ -3302,10 +3339,9 @@ void persistPositionToLedger(final LedgerHandle lh, MarkDeleteEntry mdEntry, fin
33023339

33033340
long endCompress = System.nanoTime();
33043341

3305-
int maxSize = 1024 * 1024;
33063342
int offset = 0;
33073343
final int len = data.readableBytes();
3308-
int numParts = 1 + (len / maxSize);
3344+
int numParts = 1 + (len / maxPositionChunkSize);
33093345

33103346
if (log.isDebugEnabled()) {
33113347
log.debug("[{}] Cursor {} Appending to ledger={} position={} data size {} bytes, numParts {}",
@@ -3328,7 +3364,7 @@ void persistPositionToLedger(final LedgerHandle lh, MarkDeleteEntry mdEntry, fin
33283364
int part = 0;
33293365
while (part != numParts) {
33303366
int remaining = len - offset;
3331-
int currentLen = Math.min(maxSize, remaining);
3367+
int currentLen = Math.min(maxPositionChunkSize, remaining);
33323368
boolean isLast = part == numParts - 1;
33333369

33343370
if (log.isDebugEnabled()) {

managed-ledger/src/test/java/org/apache/bookkeeper/mledger/impl/ManagedCursorTest.java

+78
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
import org.apache.bookkeeper.client.BookKeeper;
7676
import org.apache.bookkeeper.client.BookKeeper.DigestType;
7777
import org.apache.bookkeeper.client.LedgerEntry;
78+
import org.apache.bookkeeper.client.LedgerHandle;
7879
import org.apache.bookkeeper.client.api.ReadHandle;
7980
import org.apache.bookkeeper.mledger.AsyncCallbacks;
8081
import org.apache.bookkeeper.mledger.AsyncCallbacks.AddEntryCallback;
@@ -98,6 +99,7 @@
9899
import org.apache.bookkeeper.mledger.proto.MLDataFormats.ManagedCursorInfo;
99100
import org.apache.bookkeeper.mledger.proto.MLDataFormats.PositionInfo;
100101
import org.apache.bookkeeper.test.MockedBookKeeperTestCase;
102+
import org.apache.commons.lang3.reflect.FieldUtils;
101103
import org.apache.pulsar.common.api.proto.CommandSubscribe;
102104
import org.apache.pulsar.common.api.proto.IntRange;
103105
import org.apache.pulsar.common.util.FutureUtil;
@@ -3618,6 +3620,82 @@ public void operationFailed(ManagedLedgerException exception) {
36183620
assertEquals(c.getReadPosition(), readPositionBeforeRecover);
36193621
assertEquals(c.getNumberOfEntries(), 2L);
36203622
}
3623+
3624+
@Test(timeOut = 20000)
3625+
public void testRecoverCursorCorruptLastEntry() throws Exception {
3626+
ManagedLedger ml = factory.open("testRecoverCursorCorruptLastEntry");
3627+
ManagedCursorImpl c = (ManagedCursorImpl) ml.openCursor("sub", CommandSubscribe.InitialPosition.Latest);
3628+
// force chunking
3629+
c.maxPositionChunkSize = 2;
3630+
3631+
// A new cursor starts out with these values. The rest of the test assumes this, so we assert it here.
3632+
assertEquals(c.getMarkDeletedPosition().getEntryId(), -1);
3633+
assertEquals(c.getReadPosition().getEntryId(), 0);
3634+
assertEquals(ml.getLastConfirmedEntry().getEntryId(), -1);
3635+
3636+
c.resetCursor(PositionImpl.LATEST);
3637+
3638+
// A reset cursor starts out with these values. The rest of the test assumes this, so we assert it here.
3639+
assertEquals(c.getMarkDeletedPosition().getEntryId(), -1);
3640+
assertEquals(c.getReadPosition().getEntryId(), 0);
3641+
assertEquals(ml.getLastConfirmedEntry().getEntryId(), -1);
3642+
3643+
// Trigger the lastConfirmedEntry to move forward
3644+
ml.addEntry(new byte[1]);
3645+
ml.addEntry(new byte[1]);
3646+
ml.addEntry(new byte[1]);
3647+
ml.addEntry(new byte[1]);
3648+
3649+
c.resetCursor(PositionImpl.LATEST);
3650+
//corrupt last entry
3651+
LedgerHandle cursorLedger = (LedgerHandle)FieldUtils.readDeclaredField(c, "cursorLedger", true);
3652+
// can't parse json
3653+
cursorLedger.addEntry("{{".getBytes());
3654+
// can't parse PositionInfo protobuf
3655+
cursorLedger.addEntry("aa".getBytes());
3656+
3657+
assertEquals(c.getMarkDeletedPosition().getEntryId(), 3);
3658+
assertEquals(c.getReadPosition().getEntryId(), 4);
3659+
assertEquals(ml.getLastConfirmedEntry().getEntryId(), 3);
3660+
3661+
// Publish messages to move the lastConfirmedEntry field forward
3662+
ml.addEntry(new byte[1]);
3663+
ml.addEntry(new byte[1]);
3664+
3665+
final Position markDeleteBeforeRecover = c.getMarkDeletedPosition();
3666+
final Position readPositionBeforeRecover = c.getReadPosition();
3667+
3668+
ManagedCursorInfo info = ManagedCursorInfo.newBuilder()
3669+
.setCursorsLedgerId(c.getCursorLedger())
3670+
.setMarkDeleteLedgerId(markDeleteBeforeRecover.getLedgerId())
3671+
.setMarkDeleteEntryId(markDeleteBeforeRecover.getEntryId())
3672+
.setLastActive(0L)
3673+
.build();
3674+
3675+
CountDownLatch latch = new CountDownLatch(1);
3676+
AtomicBoolean failed = new AtomicBoolean(false);
3677+
c.recoverFromLedger(info, new VoidCallback() {
3678+
@Override
3679+
public void operationComplete() {
3680+
latch.countDown();
3681+
}
3682+
3683+
@Override
3684+
public void operationFailed(ManagedLedgerException exception) {
3685+
failed.set(true);
3686+
latch.countDown();
3687+
}
3688+
});
3689+
3690+
latch.await();
3691+
if (failed.get()) {
3692+
fail("Cursor recovery should not fail");
3693+
}
3694+
assertEquals(c.getMarkDeletedPosition(), markDeleteBeforeRecover);
3695+
assertEquals(c.getReadPosition(), readPositionBeforeRecover);
3696+
assertEquals(c.getNumberOfEntries(), 2L);
3697+
}
3698+
36213699
@Test
36223700
void testAlwaysInactive() throws Exception {
36233701
ManagedLedger ml = factory.open("testAlwaysInactive");

managed-ledger/src/test/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -3256,7 +3256,7 @@ public void testManagedLedgerWithAddEntryTimeOut() throws Exception {
32563256
class MockLedgerHandle extends PulsarMockLedgerHandle {
32573257
public MockLedgerHandle(PulsarMockBookKeeper bk, long id, DigestType digest, byte[] passwd)
32583258
throws GeneralSecurityException {
3259-
super(bk, id, digest, passwd);
3259+
super(bk, id, digest, passwd, null);
32603260
}
32613261

32623262
@Override

0 commit comments

Comments
 (0)