Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions common/mock/forkDetectorMock.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type ForkDetectorMock struct {
GetHighestFinalBlockNonceCalled func() uint64
GetHighestFinalBlockHashCalled func() []byte
ProbableHighestNonceCalled func() uint64
HighestNonceReceivedCalled func() uint64
ResetForkCalled func()
SoftResetForkCalled func(nonce uint64)
GetNotarizedHeaderHashCalled func(nonce uint64) []byte
Expand Down Expand Up @@ -60,6 +61,14 @@ func (fdm *ForkDetectorMock) ProbableHighestNonce() uint64 {
return fdm.ProbableHighestNonceCalled()
}

// HighestNonceReceived -
func (fdm *ForkDetectorMock) HighestNonceReceived() uint64 {
if fdm.HighestNonceReceivedCalled != nil {
return fdm.HighestNonceReceivedCalled()
}
return 0
}

// SetRollBackNonce -
func (fdm *ForkDetectorMock) SetRollBackNonce(nonce uint64) {
if fdm.SetRollBackNonceCalled != nil {
Expand Down
1 change: 1 addition & 0 deletions core/process/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type ForkDetector interface {
GetHighestFinalBlockNonce() uint64
GetHighestFinalBlockHash() []byte
ProbableHighestNonce() uint64
HighestNonceReceived() uint64
ResetFork()
SoftResetFork(nonce uint64)
SetRollBackNonce(nonce uint64)
Expand Down
15 changes: 12 additions & 3 deletions core/process/sync/baseForkDetector.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,15 @@ func (bfd *baseForkDetector) ProbableHighestNonce() uint64 {
return bfd.probableHighestNonce()
}

// HighestNonceReceived gets the highest nonce observed in any received header
// (including BHProposed gossip). Callers can compare this against
// ProbableHighestNonce / currentBlockNonce to detect when the node has fallen
// behind via the BHProposed-only path, which is the KLC-1920 / KLC-2389
// failure mode.
func (bfd *baseForkDetector) HighestNonceReceived() uint64 {
return bfd.highestNonceReceived()
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// ResetFork resets the forced fork
func (bfd *baseForkDetector) ResetFork() {
bfd.ResetProbableHighestNonce()
Expand Down Expand Up @@ -402,11 +411,11 @@ func (bfd *baseForkDetector) probableHighestNonce() uint64 {
}

func (bfd *baseForkDetector) setHighestNonceReceived(nonce uint64) {
if nonce <= bfd.highestNonceReceived() {
bfd.mutFork.Lock()
if nonce <= bfd.fork.highestNonceReceived {
bfd.mutFork.Unlock()
return
}

bfd.mutFork.Lock()
bfd.fork.highestNonceReceived = nonce
bfd.mutFork.Unlock()

Expand Down
19 changes: 16 additions & 3 deletions core/process/sync/baseSync.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,23 @@ func (boot *baseBootstrap) computeNodeState() {
} else {
lastNonce = currentHeader.GetNonce()
lastSlot = currentHeader.GetSlot()
boot.hasLastBlock = boot.forkDetector.ProbableHighestNonce() <= boot.chainHandler.GetCurrentBlockHeader().GetNonce()
currentBlockNonce := boot.chainHandler.GetCurrentBlockHeader().GetNonce()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the refetch of lastNonce from L#302

probableHighestNonce := boot.forkDetector.ProbableHighestNonce()
highestNonceReceived := boot.forkDetector.HighestNonceReceived()
boot.hasLastBlock = probableHighestNonce <= currentBlockNonce
// KLC-1920: gossip-derived ceiling is the source of truth that
// probableHighestNonce can lag behind when the BHReceived path is
// disrupted (peer churn after an election, fallback observer not
// receiving fetched headers). If gossip reports the network ahead
// by more than the normal proposal/commit window, the node is not
// really synced even if probableHighestNonce equals currentBlockNonce.
if highestNonceReceived > currentBlockNonce+process.BlockFinality {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BlockFinality is hardcoded to 1, so this guard trips whenever the gossiped nonce ceiling (highestNonceReceived) runs ≥ 2 blocks ahead of the committed tip. That gap is reached during normal propagation/commit latency or a single missed round — i.e. when the node is briefly one block behind while the next proposal is already gossiping in — which would cause transient false not-synced flapping. Suggest widening the tolerance (e.g. tie it to the existing "max rounds without a new block" value, or BlockFinality + k) so benign one-block lag doesn't flip the state.

boot.hasLastBlock = false
}
log.Debug("computeNodeState",
"probableHighestNonce", boot.forkDetector.ProbableHighestNonce(),
"currentBlockNonce", boot.chainHandler.GetCurrentBlockHeader().GetNonce(),
"probableHighestNonce", probableHighestNonce,
"highestNonceReceived", highestNonceReceived,
"currentBlockNonce", currentBlockNonce,
"boot.hasLastBlock", boot.hasLastBlock)
}

Expand Down
38 changes: 38 additions & 0 deletions core/process/sync/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,49 @@ package sync

import (
"github.com/klever-io/klever-go/core"
"github.com/klever-io/klever-go/core/consensus"
"github.com/klever-io/klever-go/core/process"
"github.com/klever-io/klever-go/data"
"github.com/klever-io/klever-go/data/block"
)

// BaseBootstrap is an alias so tests in the sync_test package can refer to
// the unexported baseBootstrap type by name.
type BaseBootstrap = baseBootstrap

// NewBaseBootstrapForKLC1920Test builds a minimal baseBootstrap wired only
// with the dependencies computeNodeState needs to exercise the KLC-1920
// gossip-ahead-of-probable branch. Internal-only helper, not for production.
func NewBaseBootstrapForKLC1920Test(
forkDetector process.ForkDetector,
chainHandler data.ChainHandler,
slotManager consensus.SlotManager,
networkWatcher process.NetworkConnectionWatcher,
statusHandler core.AppStatusHandler,
) *BaseBootstrap {
return &baseBootstrap{
forkDetector: forkDetector,
chainHandler: chainHandler,
slotManager: slotManager,
networkWatcher: networkWatcher,
statusHandler: statusHandler,
syncStateListeners: []func(bool){},
hasStarted: true,
}
}

func (boot *baseBootstrap) IsNodeSynchronized() bool {
boot.mutNodeState.RLock()
defer boot.mutNodeState.RUnlock()
return boot.isNodeSynchronized
}

func (boot *baseBootstrap) HasLastBlock() bool {
boot.mutNodeState.RLock()
defer boot.mutNodeState.RUnlock()
return boot.hasLastBlock
}

func (boot *MetaBootstrap) ReceivedHeaders(header data.HeaderHandler, key []byte) {
boot.processReceivedHeader(header, key)
}
Expand Down
141 changes: 141 additions & 0 deletions core/process/sync/klc1920_node_state_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package sync_test

import (
"sync"
"testing"
"time"

commonMock "github.com/klever-io/klever-go/common/mock"
"github.com/klever-io/klever-go/core"
consensusMock "github.com/klever-io/klever-go/core/consensus/mock"
"github.com/klever-io/klever-go/core/process"
syncpkg "github.com/klever-io/klever-go/core/process/sync"
"github.com/klever-io/klever-go/data"
"github.com/klever-io/klever-go/data/block"
"github.com/stretchr/testify/assert"
)

// klc1920_node_state_test.go covers the new branch in
// baseBootstrap.computeNodeState: when HighestNonceReceived is more than
// BlockFinality blocks ahead of currentBlockNonce, hasLastBlock is forced
// to false so isNodeSynchronized correctly reports the node is behind.
//
// Without this branch, a fallback whose BHReceived path is broken (peer
// churn after election) would have probableHighestNonce == currentBlockNonce
// and falsely declare itself synced — the production failure mode KLC-1920
// and KLC-2389 describe.

type observableStatusHandler struct {
mu sync.Mutex
isSyncing uint64
}

func (o *observableStatusHandler) Increment(_ string) {}
func (o *observableStatusHandler) AddUint64(_ string, _ uint64) {}
func (o *observableStatusHandler) Decrement(_ string) {}
func (o *observableStatusHandler) SetInt64Value(_ string, _ int64) {}
func (o *observableStatusHandler) SetUInt64Value(key string, value uint64) {
if key != core.MetricIsSyncing {
return
}
o.mu.Lock()
o.isSyncing = value
o.mu.Unlock()
}
func (o *observableStatusHandler) SetStringValue(_ string, _ string) {}
func (o *observableStatusHandler) Close() {}
func (o *observableStatusHandler) IsInterfaceNil() bool { return o == nil }

func (o *observableStatusHandler) IsSyncing() uint64 {
o.mu.Lock()
defer o.mu.Unlock()
return o.isSyncing
}

func buildKLC1920Bootstrap(probable, highest, currentBlockNonce uint64) (*syncpkg.BaseBootstrap, *observableStatusHandler) {
forkDetector := &commonMock.ForkDetectorMock{
CheckForkCalled: func() *process.ForkInfo { return &process.ForkInfo{} },
ProbableHighestNonceCalled: func() uint64 { return probable },
HighestNonceReceivedCalled: func() uint64 { return highest },
GetHighestFinalBlockNonceCalled: func() uint64 { return 0 },
}

genesisHeader := &block.Block{Header: &block.BlockHeader{Nonce: 0, Slot: 0}}
currentHeader := &block.Block{Header: &block.BlockHeader{Nonce: currentBlockNonce, Slot: currentBlockNonce}}

chainHandler := &commonMock.BlockChainMock{
GetGenesisHeaderCalled: func() data.HeaderHandler { return genesisHeader },
GetCurrentBlockHeaderCalled: func() data.HeaderHandler { return currentHeader },
}

slotManager := &consensusMock.SlotManagerMock{
SlotIndex: int64(currentBlockNonce + 5),
TimeDurationCalled: func() time.Duration { return 0 },
BeforeGenesisCalled: func() bool { return true }, // suppress requestHeadersIfSyncIsStuck path
}

networkWatcher := &commonMock.MessengerStub{
IsConnectedToTheNetworkCalled: func() bool { return true },
}
statusHandler := &observableStatusHandler{}

boot := syncpkg.NewBaseBootstrapForKLC1920Test(
forkDetector,
chainHandler,
slotManager,
networkWatcher,
statusHandler,
)

return boot, statusHandler
}

// TestKLC1920_ComputeNodeState_GossipAheadForcesNotSynced is the regression
// guard for the synced-state gate. Pre-fix: with probable == current the
// node declared itself synced even when HighestNonceReceived was 20 blocks
// ahead. Post-fix: any gossip-vs-current gap > BlockFinality forces
// hasLastBlock=false and isNodeSynchronized=false.
func TestKLC1920_ComputeNodeState_GossipAheadForcesNotSynced(t *testing.T) {
t.Parallel()

// Production failure shape: probable matches current (fork detector
// thinks it's caught up) but gossip has reported headers 20 ahead.
boot, statusHandler := buildKLC1920Bootstrap(
uint64(50), // probable
uint64(70), // highest received from gossip
uint64(50), // current block nonce
)

boot.ComputeNodeState()

assert.False(t, boot.HasLastBlock(),
"KLC-1920 fix: gossip-ahead gap must force hasLastBlock=false")
assert.False(t, boot.IsNodeSynchronized(),
"KLC-1920 fix: node must not declare synced when gossip is ahead")
assert.Equal(t, uint64(1), statusHandler.IsSyncing(),
"KLC-1920 fix: klv_is_syncing must report 1 — the production-bug metric was 0 (false-synced)")
}

// TestKLC1920_ComputeNodeState_GossipWithinFinalityStaysSynced confirms the
// gate does NOT spuriously fire during normal proposal rounds where gossip
// is briefly one block ahead of the just-committed block.
func TestKLC1920_ComputeNodeState_GossipWithinFinalityStaysSynced(t *testing.T) {
t.Parallel()

// Normal cycle: a BHProposed for nonce N+1 has arrived but the block
// hasn't committed yet. gap = 1 = BlockFinality — must NOT fire.
boot, statusHandler := buildKLC1920Bootstrap(
uint64(50), // probable
uint64(51), // highest received (one proposal ahead — normal)
uint64(50), // current block nonce
)

boot.ComputeNodeState()

assert.True(t, boot.HasLastBlock(),
"normal proposal cycle: gap == BlockFinality must NOT force not-synced")
assert.True(t, boot.IsNodeSynchronized(),
"normal proposal cycle: node remains synced; consensus must not be gated")
assert.Equal(t, uint64(0), statusHandler.IsSyncing(),
"normal proposal cycle: klv_is_syncing stays 0")
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
96 changes: 96 additions & 0 deletions core/process/sync/klc1920_repro_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package sync_test

import (
"fmt"
"testing"
"time"

"github.com/klever-io/klever-go/common/mock"
consensusMock "github.com/klever-io/klever-go/core/consensus/mock"
"github.com/klever-io/klever-go/core/process"
"github.com/klever-io/klever-go/core/process/sync"
"github.com/klever-io/klever-go/data/block"
"github.com/stretchr/testify/assert"
)

// klc1920_repro_test.go pins down the invariant the KLC-1920 fix relies on:
// under the production failure mode (only BHProposed deliveries arrive, the
// BHReceived path is broken by peer churn after an election), the fork
// detector's HighestNonceReceived must advance with gossip while
// ProbableHighestNonce stays at the last processed nonce. The gap between
// them is what baseBootstrap.computeNodeState uses to force hasLastBlock=false
// and prevent the false isNodeSynchronized=true reported in the Slack-thread
// log at sprint-97/KLC-1920/slack-thread/log.txt.

func newSlotManagerForRepro(slot int64) *consensusMock.SlotManagerMock {
return &consensusMock.SlotManagerMock{
SlotIndex: slot,
TimeDurationCalled: func() time.Duration { return 0 },
}
}

// TestKLC1920_HighestNonceReceivedAdvancesUnderBHProposedOnly is the
// regression guard for the gossip-ceiling invariant. Production logs showed
// `setHighestNonceReceived` firing constantly while `forkDetector.AddHeader
// state=0` (BHReceived) never appeared. This test reproduces exactly that
// shape and asserts both sides of the gap are observable.
func TestKLC1920_HighestNonceReceivedAdvancesUnderBHProposedOnly(t *testing.T) {
t.Parallel()

bfd, err := sync.NewMetaForkDetector(newSlotManagerForRepro(100), &mock.BlackListHandlerStub{}, 0)
assert.Nil(t, err)
assert.NotNil(t, bfd)

processedHdr := &block.BlockHeader{Nonce: 10, Slot: 10}
err = bfd.AddHeader(&block.Block{Header: processedHdr}, []byte("processed-10"), process.BHProcessed, nil, nil)
assert.Nil(t, err)
assert.Equal(t, uint64(10), bfd.ProbableHighestNonce(),
"baseline: probable highest after BHProcessed at 10")
assert.Equal(t, uint64(10), bfd.HighestNonceReceived(),
"baseline: highest received tracks the same processed nonce")

for nonce := uint64(11); nonce <= uint64(15); nonce++ {
hdr := &block.BlockHeader{Nonce: nonce, Slot: nonce}
hash := []byte(fmt.Sprintf("proposed-%d", nonce))
err = bfd.AddHeader(&block.Block{Header: hdr}, hash, process.BHProposed, nil, nil)
assert.Nil(t, err)
}

assert.Equal(t, uint64(15), bfd.HighestNonceReceived(),
"gossip ceiling must reflect every BHProposed delivery")
assert.Equal(t, uint64(10), bfd.ProbableHighestNonce(),
"probableHighestNonce intentionally stays at last processed — BHProposed must not advance it (would break consensus during proposal rounds)")

gap := bfd.HighestNonceReceived() - bfd.ProbableHighestNonce()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test pins HighestNonceReceived − ProbableHighestNonce, but fix compares against currentBlockNonce. Coincide here only because no BHReceived headers were added.
Reframe the assertion around HighestNonceReceived − currentBlockNonce so the guard tracks what the fix actually evaluates.

assert.Equal(t, uint64(5), gap,
"the gap between gossip ceiling and probable is the signal computeNodeState uses to force hasLastBlock=false when it exceeds BlockFinality")
}

// TestKLC1920_GapExceedsBlockFinality demonstrates that the gap threshold
// (HighestNonceReceived - currentBlockNonce > BlockFinality) is the
// condition the fix watches for. BlockFinality is 1, so any gap >= 2
// indicates the node is not really synced.
func TestKLC1920_GapExceedsBlockFinality(t *testing.T) {
t.Parallel()

bfd, err := sync.NewMetaForkDetector(newSlotManagerForRepro(100), &mock.BlackListHandlerStub{}, 0)
assert.Nil(t, err)

processedHdr := &block.BlockHeader{Nonce: 50, Slot: 50}
err = bfd.AddHeader(&block.Block{Header: processedHdr}, []byte("p-50"), process.BHProcessed, nil, nil)
assert.Nil(t, err)

for nonce := uint64(51); nonce <= uint64(70); nonce++ {
hdr := &block.BlockHeader{Nonce: nonce, Slot: nonce}
hash := []byte(fmt.Sprintf("g-%d", nonce))
err = bfd.AddHeader(&block.Block{Header: hdr}, hash, process.BHProposed, nil, nil)
assert.Nil(t, err)
}

currentBlockNonce := uint64(50)
gossipGap := bfd.HighestNonceReceived() - currentBlockNonce
assert.Equal(t, uint64(20), gossipGap,
"matches Slack-log production amplitude (~70-block gap) at scale")
assert.True(t, gossipGap > uint64(process.BlockFinality),
"gap exceeds BlockFinality — computeNodeState must declare not-synced")
}
Loading