Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions common/mock/forkDetectorMock.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type ForkDetectorMock struct {
GetHighestFinalBlockNonceCalled func() uint64
GetHighestFinalBlockHashCalled func() []byte
ProbableHighestNonceCalled func() uint64
HighestNonceReceivedCalled func() uint64
ResetForkCalled func()
SoftResetForkCalled func(nonce uint64)
GetNotarizedHeaderHashCalled func(nonce uint64) []byte
Expand Down Expand Up @@ -60,6 +61,14 @@ func (fdm *ForkDetectorMock) ProbableHighestNonce() uint64 {
return fdm.ProbableHighestNonceCalled()
}

// HighestNonceReceived -
func (fdm *ForkDetectorMock) HighestNonceReceived() uint64 {
if fdm.HighestNonceReceivedCalled != nil {
return fdm.HighestNonceReceivedCalled()
}
return 0
}

// SetRollBackNonce -
func (fdm *ForkDetectorMock) SetRollBackNonce(nonce uint64) {
if fdm.SetRollBackNonceCalled != nil {
Expand Down
1 change: 1 addition & 0 deletions core/process/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type ForkDetector interface {
GetHighestFinalBlockNonce() uint64
GetHighestFinalBlockHash() []byte
ProbableHighestNonce() uint64
HighestNonceReceived() uint64
ResetFork()
SoftResetFork(nonce uint64)
SetRollBackNonce(nonce uint64)
Expand Down
9 changes: 9 additions & 0 deletions core/process/sync/baseForkDetector.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,15 @@ func (bfd *baseForkDetector) ProbableHighestNonce() uint64 {
return bfd.probableHighestNonce()
}

// HighestNonceReceived gets the highest nonce observed in any received header
// (including BHProposed gossip). Callers can compare this against
// ProbableHighestNonce / currentBlockNonce to detect when the node has fallen
// behind via the BHProposed-only path, which is the KLC-1920 / KLC-2389
// failure mode.
func (bfd *baseForkDetector) HighestNonceReceived() uint64 {
return bfd.highestNonceReceived()
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// ResetFork resets the forced fork
func (bfd *baseForkDetector) ResetFork() {
bfd.ResetProbableHighestNonce()
Expand Down
19 changes: 16 additions & 3 deletions core/process/sync/baseSync.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,23 @@ func (boot *baseBootstrap) computeNodeState() {
} else {
lastNonce = currentHeader.GetNonce()
lastSlot = currentHeader.GetSlot()
boot.hasLastBlock = boot.forkDetector.ProbableHighestNonce() <= boot.chainHandler.GetCurrentBlockHeader().GetNonce()
currentBlockNonce := boot.chainHandler.GetCurrentBlockHeader().GetNonce()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the refetch of lastNonce from L#302

probableHighestNonce := boot.forkDetector.ProbableHighestNonce()
highestNonceReceived := boot.forkDetector.HighestNonceReceived()
boot.hasLastBlock = probableHighestNonce <= currentBlockNonce
// KLC-1920: gossip-derived ceiling is the source of truth that
// probableHighestNonce can lag behind when the BHReceived path is
// disrupted (peer churn after an election, fallback observer not
// receiving fetched headers). If gossip reports the network ahead
// by more than the normal proposal/commit window, the node is not
// really synced even if probableHighestNonce equals currentBlockNonce.
if highestNonceReceived > currentBlockNonce+process.BlockFinality {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BlockFinality is hardcoded to 1, so this guard trips whenever the gossiped nonce ceiling (highestNonceReceived) runs ≥ 2 blocks ahead of the committed tip. That gap is reached during normal propagation/commit latency or a single missed round — i.e. when the node is briefly one block behind while the next proposal is already gossiping in — which would cause transient false not-synced flapping. Suggest widening the tolerance (e.g. tie it to the existing "max rounds without a new block" value, or BlockFinality + k) so benign one-block lag doesn't flip the state.

boot.hasLastBlock = false
}
log.Debug("computeNodeState",
"probableHighestNonce", boot.forkDetector.ProbableHighestNonce(),
"currentBlockNonce", boot.chainHandler.GetCurrentBlockHeader().GetNonce(),
"probableHighestNonce", probableHighestNonce,
"highestNonceReceived", highestNonceReceived,
"currentBlockNonce", currentBlockNonce,
"boot.hasLastBlock", boot.hasLastBlock)
}

Expand Down
95 changes: 95 additions & 0 deletions core/process/sync/klc1920_repro_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package sync_test

import (
"fmt"
"testing"
"time"

"github.com/klever-io/klever-go/common/mock"
consensusMock "github.com/klever-io/klever-go/core/consensus/mock"
"github.com/klever-io/klever-go/core/process"
"github.com/klever-io/klever-go/core/process/sync"
"github.com/klever-io/klever-go/data/block"
"github.com/stretchr/testify/assert"
)

// klc1920_repro_test.go pins down the invariant the KLC-1920 fix relies on:
// under the production failure mode (only BHProposed deliveries arrive, the
// BHReceived path is broken by peer churn after an election), the fork
// detector's HighestNonceReceived must advance with gossip while
// ProbableHighestNonce stays at the last processed nonce. The gap between
// them is what baseBootstrap.computeNodeState uses to force hasLastBlock=false
// and prevent the false isNodeSynchronized=true reported in the Slack-thread
// log at sprint-97/KLC-1920/slack-thread/log.txt.

func newSlotManagerForRepro(slot int64) *consensusMock.SlotManagerMock {
return &consensusMock.SlotManagerMock{
SlotIndex: slot,
TimeDurationCalled: func() time.Duration { return 0 },
}
}

// TestKLC1920_HighestNonceReceivedAdvancesUnderBHProposedOnly is the
// regression guard for the gossip-ceiling invariant. Production logs showed
// `setHighestNonceReceived` firing constantly while `forkDetector.AddHeader
// state=0` (BHReceived) never appeared. This test reproduces exactly that
// shape and asserts both sides of the gap are observable.
func TestKLC1920_HighestNonceReceivedAdvancesUnderBHProposedOnly(t *testing.T) {
t.Parallel()

bfd, err := sync.NewMetaForkDetector(newSlotManagerForRepro(100), &mock.BlackListHandlerStub{}, 0)
assert.Nil(t, err)
assert.NotNil(t, bfd)

processedHdr := &block.BlockHeader{Nonce: 10, Slot: 10}
err = bfd.AddHeader(&block.Block{Header: processedHdr}, []byte("processed-10"), process.BHProcessed, nil, nil)
assert.Nil(t, err)
assert.Equal(t, uint64(10), bfd.ProbableHighestNonce(),
"baseline: probable highest after BHProcessed at 10")
assert.Equal(t, uint64(10), bfd.HighestNonceReceived(),
"baseline: highest received tracks the same processed nonce")

for nonce := uint64(11); nonce <= uint64(15); nonce++ {
hdr := &block.BlockHeader{Nonce: nonce, Slot: nonce}
hash := []byte(fmt.Sprintf("proposed-%d", nonce))
err = bfd.AddHeader(&block.Block{Header: hdr}, hash, process.BHProposed, nil, nil)
assert.Nil(t, err)
}

assert.Equal(t, uint64(15), bfd.HighestNonceReceived(),
"gossip ceiling must reflect every BHProposed delivery")
assert.Equal(t, uint64(10), bfd.ProbableHighestNonce(),
"probableHighestNonce intentionally stays at last processed — BHProposed must not advance it (would break consensus during proposal rounds)")

gap := bfd.HighestNonceReceived() - bfd.ProbableHighestNonce()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test pins HighestNonceReceived − ProbableHighestNonce, but fix compares against currentBlockNonce. Coincide here only because no BHReceived headers were added.
Reframe the assertion around HighestNonceReceived − currentBlockNonce so the guard tracks what the fix actually evaluates.

assert.Equal(t, uint64(5), gap,
"the gap between gossip ceiling and probable is the signal computeNodeState uses to force hasLastBlock=false when it exceeds BlockFinality")
}

// TestKLC1920_GapExceedsBlockFinality demonstrates that the gap threshold
// (HighestNonceReceived - currentBlockNonce > BlockFinality) is the
// condition the fix watches for. BlockFinality is 1, so any gap >= 2
// indicates the node is not really synced.
func TestKLC1920_GapExceedsBlockFinality(t *testing.T) {
t.Parallel()

bfd, err := sync.NewMetaForkDetector(newSlotManagerForRepro(100), &mock.BlackListHandlerStub{}, 0)
assert.Nil(t, err)

processedHdr := &block.BlockHeader{Nonce: 50, Slot: 50}
err = bfd.AddHeader(&block.Block{Header: processedHdr}, []byte("p-50"), process.BHProcessed, nil, nil)
assert.Nil(t, err)

for nonce := uint64(51); nonce <= uint64(70); nonce++ {
hdr := &block.BlockHeader{Nonce: nonce, Slot: nonce}
hash := []byte(fmt.Sprintf("g-%d", nonce))
_ = bfd.AddHeader(&block.Block{Header: hdr}, hash, process.BHProposed, nil, nil)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

currentBlockNonce := uint64(50)
gossipGap := bfd.HighestNonceReceived() - currentBlockNonce
assert.Equal(t, uint64(20), gossipGap,
"matches Slack-log production amplitude (~70-block gap) at scale")
assert.True(t, gossipGap > uint64(process.BlockFinality),
"gap exceeds BlockFinality — computeNodeState must declare not-synced")
}
Loading