Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions common/mock/forkDetectorMock.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type ForkDetectorMock struct {
GetHighestFinalBlockNonceCalled func() uint64
GetHighestFinalBlockHashCalled func() []byte
ProbableHighestNonceCalled func() uint64
HighestNonceReceivedCalled func() uint64
ResetForkCalled func()
SoftResetForkCalled func(nonce uint64)
GetNotarizedHeaderHashCalled func(nonce uint64) []byte
Expand Down Expand Up @@ -60,6 +61,14 @@ func (fdm *ForkDetectorMock) ProbableHighestNonce() uint64 {
return fdm.ProbableHighestNonceCalled()
}

// HighestNonceReceived -
func (fdm *ForkDetectorMock) HighestNonceReceived() uint64 {
if fdm.HighestNonceReceivedCalled != nil {
return fdm.HighestNonceReceivedCalled()
}
return 0
}

// SetRollBackNonce -
func (fdm *ForkDetectorMock) SetRollBackNonce(nonce uint64) {
if fdm.SetRollBackNonceCalled != nil {
Expand Down
1 change: 1 addition & 0 deletions core/process/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type ForkDetector interface {
GetHighestFinalBlockNonce() uint64
GetHighestFinalBlockHash() []byte
ProbableHighestNonce() uint64
HighestNonceReceived() uint64
ResetFork()
SoftResetFork(nonce uint64)
SetRollBackNonce(nonce uint64)
Expand Down
15 changes: 12 additions & 3 deletions core/process/sync/baseForkDetector.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,15 @@ func (bfd *baseForkDetector) ProbableHighestNonce() uint64 {
return bfd.probableHighestNonce()
}

// HighestNonceReceived gets the highest nonce observed in any received header
// (including BHProposed gossip). Callers can compare this against
// ProbableHighestNonce / currentBlockNonce to detect when the node has fallen
// behind via the BHProposed-only path, which is the KLC-1920 / KLC-2389
// failure mode.
func (bfd *baseForkDetector) HighestNonceReceived() uint64 {
return bfd.highestNonceReceived()
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// ResetFork resets the forced fork
func (bfd *baseForkDetector) ResetFork() {
bfd.ResetProbableHighestNonce()
Expand Down Expand Up @@ -402,11 +411,11 @@ func (bfd *baseForkDetector) probableHighestNonce() uint64 {
}

func (bfd *baseForkDetector) setHighestNonceReceived(nonce uint64) {
if nonce <= bfd.highestNonceReceived() {
bfd.mutFork.Lock()
if nonce <= bfd.fork.highestNonceReceived {
bfd.mutFork.Unlock()
return
}

bfd.mutFork.Lock()
bfd.fork.highestNonceReceived = nonce
bfd.mutFork.Unlock()

Expand Down
19 changes: 16 additions & 3 deletions core/process/sync/baseSync.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,23 @@ func (boot *baseBootstrap) computeNodeState() {
} else {
lastNonce = currentHeader.GetNonce()
lastSlot = currentHeader.GetSlot()
boot.hasLastBlock = boot.forkDetector.ProbableHighestNonce() <= boot.chainHandler.GetCurrentBlockHeader().GetNonce()
currentBlockNonce := boot.chainHandler.GetCurrentBlockHeader().GetNonce()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the refetch of lastNonce from L#302

probableHighestNonce := boot.forkDetector.ProbableHighestNonce()
highestNonceReceived := boot.forkDetector.HighestNonceReceived()
boot.hasLastBlock = probableHighestNonce <= currentBlockNonce
// KLC-1920: gossip-derived ceiling is the source of truth that
// probableHighestNonce can lag behind when the BHReceived path is
// disrupted (peer churn after an election, fallback observer not
// receiving fetched headers). If gossip reports the network ahead
// by more than the normal proposal/commit window, the node is not
// really synced even if probableHighestNonce equals currentBlockNonce.
if highestNonceReceived > currentBlockNonce+process.BlockFinality {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BlockFinality is hardcoded to 1, so this guard trips whenever the gossiped nonce ceiling (highestNonceReceived) runs ≥ 2 blocks ahead of the committed tip. That gap is reached during normal propagation/commit latency or a single missed round — i.e. when the node is briefly one block behind while the next proposal is already gossiping in — which would cause transient false not-synced flapping. Suggest widening the tolerance (e.g. tie it to the existing "max rounds without a new block" value, or BlockFinality + k) so benign one-block lag doesn't flip the state.

boot.hasLastBlock = false
}
log.Debug("computeNodeState",
"probableHighestNonce", boot.forkDetector.ProbableHighestNonce(),
"currentBlockNonce", boot.chainHandler.GetCurrentBlockHeader().GetNonce(),
"probableHighestNonce", probableHighestNonce,
"highestNonceReceived", highestNonceReceived,
"currentBlockNonce", currentBlockNonce,
"boot.hasLastBlock", boot.hasLastBlock)
}

Expand Down
38 changes: 38 additions & 0 deletions core/process/sync/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,49 @@ package sync

import (
"github.com/klever-io/klever-go/core"
"github.com/klever-io/klever-go/core/consensus"
"github.com/klever-io/klever-go/core/process"
"github.com/klever-io/klever-go/data"
"github.com/klever-io/klever-go/data/block"
)

// BaseBootstrap is an alias so tests in the sync_test package can refer to
// the unexported baseBootstrap type by name.
type BaseBootstrap = baseBootstrap

// NewBaseBootstrapForKLC1920Test builds a minimal baseBootstrap wired only
// with the dependencies computeNodeState needs to exercise the KLC-1920
// gossip-ahead-of-probable branch. Internal-only helper, not for production.
func NewBaseBootstrapForKLC1920Test(
forkDetector process.ForkDetector,
chainHandler data.ChainHandler,
slotManager consensus.SlotManager,
networkWatcher process.NetworkConnectionWatcher,
statusHandler core.AppStatusHandler,
) *BaseBootstrap {
return &baseBootstrap{
forkDetector: forkDetector,
chainHandler: chainHandler,
slotManager: slotManager,
networkWatcher: networkWatcher,
statusHandler: statusHandler,
syncStateListeners: []func(bool){},
hasStarted: true,
}
}

func (boot *baseBootstrap) IsNodeSynchronized() bool {
boot.mutNodeState.RLock()
defer boot.mutNodeState.RUnlock()
return boot.isNodeSynchronized
}

func (boot *baseBootstrap) HasLastBlock() bool {
boot.mutNodeState.RLock()
defer boot.mutNodeState.RUnlock()
return boot.hasLastBlock
}

func (boot *MetaBootstrap) ReceivedHeaders(header data.HeaderHandler, key []byte) {
boot.processReceivedHeader(header, key)
}
Expand Down
166 changes: 166 additions & 0 deletions core/process/sync/klc1920_node_state_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package sync_test

import (
"sync"
"testing"
"time"

commonMock "github.com/klever-io/klever-go/common/mock"
"github.com/klever-io/klever-go/core"
consensusMock "github.com/klever-io/klever-go/core/consensus/mock"
"github.com/klever-io/klever-go/core/process"
syncpkg "github.com/klever-io/klever-go/core/process/sync"
"github.com/klever-io/klever-go/data"
"github.com/klever-io/klever-go/data/block"
"github.com/stretchr/testify/assert"
)

// klc1920_node_state_test.go covers the new branch in
// baseBootstrap.computeNodeState: when HighestNonceReceived is more than
// BlockFinality blocks ahead of currentBlockNonce, hasLastBlock is forced
// to false so isNodeSynchronized correctly reports the node is behind.
//
// Without this branch, a fallback whose BHReceived path is broken (peer
// churn after election) would have probableHighestNonce == currentBlockNonce
// and falsely declare itself synced — the production failure mode KLC-1920
// and KLC-2389 describe.

type observableStatusHandler struct {
mu sync.Mutex
isSyncing uint64
}

func (o *observableStatusHandler) Increment(_ string) {}
func (o *observableStatusHandler) AddUint64(_ string, _ uint64) {}
func (o *observableStatusHandler) Decrement(_ string) {}
func (o *observableStatusHandler) SetInt64Value(_ string, _ int64) {}
func (o *observableStatusHandler) SetUInt64Value(key string, value uint64) {
if key != core.MetricIsSyncing {
return
}
o.mu.Lock()
o.isSyncing = value
o.mu.Unlock()
}
func (o *observableStatusHandler) SetStringValue(_ string, _ string) {}
func (o *observableStatusHandler) Close() {}
func (o *observableStatusHandler) IsInterfaceNil() bool { return o == nil }

func (o *observableStatusHandler) IsSyncing() uint64 {
o.mu.Lock()
defer o.mu.Unlock()
return o.isSyncing
}

func buildKLC1920Bootstrap(probable, highest, currentBlockNonce uint64) (*syncpkg.BaseBootstrap, *observableStatusHandler) {
forkDetector := &commonMock.ForkDetectorMock{
CheckForkCalled: func() *process.ForkInfo { return &process.ForkInfo{} },
ProbableHighestNonceCalled: func() uint64 { return probable },
HighestNonceReceivedCalled: func() uint64 { return highest },
GetHighestFinalBlockNonceCalled: func() uint64 { return 0 },
}

genesisHeader := &block.Block{Header: &block.BlockHeader{Nonce: 0, Slot: 0}}
currentHeader := &block.Block{Header: &block.BlockHeader{Nonce: currentBlockNonce, Slot: currentBlockNonce}}

chainHandler := &commonMock.BlockChainMock{
GetGenesisHeaderCalled: func() data.HeaderHandler { return genesisHeader },
GetCurrentBlockHeaderCalled: func() data.HeaderHandler { return currentHeader },
}

slotManager := &consensusMock.SlotManagerMock{
SlotIndex: int64(currentBlockNonce + 5),
TimeDurationCalled: func() time.Duration { return 0 },
BeforeGenesisCalled: func() bool { return true }, // suppress requestHeadersIfSyncIsStuck path
}

networkWatcher := &commonMock.MessengerStub{
IsConnectedToTheNetworkCalled: func() bool { return true },
}
statusHandler := &observableStatusHandler{}

boot := syncpkg.NewBaseBootstrapForKLC1920Test(
forkDetector,
chainHandler,
slotManager,
networkWatcher,
statusHandler,
)

return boot, statusHandler
}

// TestKLC1920_ComputeNodeState_GossipAheadForcesNotSynced is the regression
// guard for the synced-state gate. Pre-fix: with probable == current the
// node declared itself synced even when HighestNonceReceived was far ahead.
// Post-fix: any gossip-vs-current gap > BlockFinality forces hasLastBlock=
// false and isNodeSynchronized=false.
func TestKLC1920_ComputeNodeState_GossipAheadForcesNotSynced(t *testing.T) {
t.Parallel()

// Production failure shape: probable matches current (fork detector
// thinks it's caught up) but gossip has reported headers many blocks
// ahead. Use a generous multiple of BlockFinality so we're well past
// the threshold regardless of how it gets tuned later.
const probable = uint64(50)
current := probable
highest := current + uint64(process.BlockFinality)*20

boot, statusHandler := buildKLC1920Bootstrap(probable, highest, current)

boot.ComputeNodeState()

assert.False(t, boot.HasLastBlock(),
"KLC-1920 fix: gossip-ahead gap must force hasLastBlock=false")
assert.False(t, boot.IsNodeSynchronized(),
"KLC-1920 fix: node must not declare synced when gossip is ahead")
assert.Equal(t, uint64(1), statusHandler.IsSyncing(),
"KLC-1920 fix: klv_is_syncing must report 1 — the production-bug metric was 0 (false-synced)")
}

// TestKLC1920_ComputeNodeState_GossipAtBoundaryStaysSynced confirms the gate
// does NOT spuriously fire when gossip is exactly BlockFinality ahead of the
// last committed block — the natural proposal-vs-commit window during normal
// consensus operation.
func TestKLC1920_ComputeNodeState_GossipAtBoundaryStaysSynced(t *testing.T) {
t.Parallel()

// gap == BlockFinality: a BHProposed for nonce N+BlockFinality has been
// seen but not yet committed. This is normal — must NOT trip the gate.
const probable = uint64(50)
current := probable
highest := current + uint64(process.BlockFinality)

boot, statusHandler := buildKLC1920Bootstrap(probable, highest, current)

boot.ComputeNodeState()

assert.True(t, boot.HasLastBlock(),
"normal proposal cycle: gap == BlockFinality must NOT force not-synced")
assert.True(t, boot.IsNodeSynchronized(),
"normal proposal cycle: node remains synced; consensus must not be gated")
assert.Equal(t, uint64(0), statusHandler.IsSyncing(),
"normal proposal cycle: klv_is_syncing stays 0")
}

// TestKLC1920_ComputeNodeState_GossipOneOverBoundaryNotSynced pins down the
// exact `>` boundary: one block past BlockFinality must trip the gate. This
// guards against the check accidentally becoming `>=` in a future refactor.
func TestKLC1920_ComputeNodeState_GossipOneOverBoundaryNotSynced(t *testing.T) {
t.Parallel()

const probable = uint64(50)
current := probable
highest := current + uint64(process.BlockFinality) + 1

boot, statusHandler := buildKLC1920Bootstrap(probable, highest, current)

boot.ComputeNodeState()

assert.False(t, boot.HasLastBlock(),
"boundary: gap == BlockFinality+1 must force not-synced")
assert.False(t, boot.IsNodeSynchronized(),
"boundary: gap == BlockFinality+1 must flip isNodeSynchronized to false")
assert.Equal(t, uint64(1), statusHandler.IsSyncing(),
"boundary: klv_is_syncing == 1 at the first nonce past BlockFinality")
}
Loading
Loading