-
Notifications
You must be signed in to change notification settings - Fork 4
[KLC-1920] fix: force not-synced when gossip is ahead of probable highest nonce #69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
240a6e5
875fbfc
b860d55
e97e103
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -301,10 +301,23 @@ func (boot *baseBootstrap) computeNodeState() { | |
| } else { | ||
| lastNonce = currentHeader.GetNonce() | ||
| lastSlot = currentHeader.GetSlot() | ||
| boot.hasLastBlock = boot.forkDetector.ProbableHighestNonce() <= boot.chainHandler.GetCurrentBlockHeader().GetNonce() | ||
| currentBlockNonce := boot.chainHandler.GetCurrentBlockHeader().GetNonce() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the refetch of |
||
| probableHighestNonce := boot.forkDetector.ProbableHighestNonce() | ||
| highestNonceReceived := boot.forkDetector.HighestNonceReceived() | ||
| boot.hasLastBlock = probableHighestNonce <= currentBlockNonce | ||
| // KLC-1920: gossip-derived ceiling is the source of truth that | ||
| // probableHighestNonce can lag behind when the BHReceived path is | ||
| // disrupted (peer churn after an election, fallback observer not | ||
| // receiving fetched headers). If gossip reports the network ahead | ||
| // by more than the normal proposal/commit window, the node is not | ||
| // really synced even if probableHighestNonce equals currentBlockNonce. | ||
| if highestNonceReceived > currentBlockNonce+process.BlockFinality { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| boot.hasLastBlock = false | ||
| } | ||
| log.Debug("computeNodeState", | ||
| "probableHighestNonce", boot.forkDetector.ProbableHighestNonce(), | ||
| "currentBlockNonce", boot.chainHandler.GetCurrentBlockHeader().GetNonce(), | ||
| "probableHighestNonce", probableHighestNonce, | ||
| "highestNonceReceived", highestNonceReceived, | ||
| "currentBlockNonce", currentBlockNonce, | ||
| "boot.hasLastBlock", boot.hasLastBlock) | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,166 @@ | ||
| package sync_test | ||
|
|
||
| import ( | ||
| "sync" | ||
| "testing" | ||
| "time" | ||
|
|
||
| commonMock "github.com/klever-io/klever-go/common/mock" | ||
| "github.com/klever-io/klever-go/core" | ||
| consensusMock "github.com/klever-io/klever-go/core/consensus/mock" | ||
| "github.com/klever-io/klever-go/core/process" | ||
| syncpkg "github.com/klever-io/klever-go/core/process/sync" | ||
| "github.com/klever-io/klever-go/data" | ||
| "github.com/klever-io/klever-go/data/block" | ||
| "github.com/stretchr/testify/assert" | ||
| ) | ||
|
|
||
| // klc1920_node_state_test.go covers the new branch in | ||
| // baseBootstrap.computeNodeState: when HighestNonceReceived is more than | ||
| // BlockFinality blocks ahead of currentBlockNonce, hasLastBlock is forced | ||
| // to false so isNodeSynchronized correctly reports the node is behind. | ||
| // | ||
| // Without this branch, a fallback whose BHReceived path is broken (peer | ||
| // churn after election) would have probableHighestNonce == currentBlockNonce | ||
| // and falsely declare itself synced — the production failure mode KLC-1920 | ||
| // and KLC-2389 describe. | ||
|
|
||
| type observableStatusHandler struct { | ||
| mu sync.Mutex | ||
| isSyncing uint64 | ||
| } | ||
|
|
||
| func (o *observableStatusHandler) Increment(_ string) {} | ||
| func (o *observableStatusHandler) AddUint64(_ string, _ uint64) {} | ||
| func (o *observableStatusHandler) Decrement(_ string) {} | ||
| func (o *observableStatusHandler) SetInt64Value(_ string, _ int64) {} | ||
| func (o *observableStatusHandler) SetUInt64Value(key string, value uint64) { | ||
| if key != core.MetricIsSyncing { | ||
| return | ||
| } | ||
| o.mu.Lock() | ||
| o.isSyncing = value | ||
| o.mu.Unlock() | ||
| } | ||
| func (o *observableStatusHandler) SetStringValue(_ string, _ string) {} | ||
| func (o *observableStatusHandler) Close() {} | ||
| func (o *observableStatusHandler) IsInterfaceNil() bool { return o == nil } | ||
|
|
||
| func (o *observableStatusHandler) IsSyncing() uint64 { | ||
| o.mu.Lock() | ||
| defer o.mu.Unlock() | ||
| return o.isSyncing | ||
| } | ||
|
|
||
| func buildKLC1920Bootstrap(probable, highest, currentBlockNonce uint64) (*syncpkg.BaseBootstrap, *observableStatusHandler) { | ||
| forkDetector := &commonMock.ForkDetectorMock{ | ||
| CheckForkCalled: func() *process.ForkInfo { return &process.ForkInfo{} }, | ||
| ProbableHighestNonceCalled: func() uint64 { return probable }, | ||
| HighestNonceReceivedCalled: func() uint64 { return highest }, | ||
| GetHighestFinalBlockNonceCalled: func() uint64 { return 0 }, | ||
| } | ||
|
|
||
| genesisHeader := &block.Block{Header: &block.BlockHeader{Nonce: 0, Slot: 0}} | ||
| currentHeader := &block.Block{Header: &block.BlockHeader{Nonce: currentBlockNonce, Slot: currentBlockNonce}} | ||
|
|
||
| chainHandler := &commonMock.BlockChainMock{ | ||
| GetGenesisHeaderCalled: func() data.HeaderHandler { return genesisHeader }, | ||
| GetCurrentBlockHeaderCalled: func() data.HeaderHandler { return currentHeader }, | ||
| } | ||
|
|
||
| slotManager := &consensusMock.SlotManagerMock{ | ||
| SlotIndex: int64(currentBlockNonce + 5), | ||
| TimeDurationCalled: func() time.Duration { return 0 }, | ||
| BeforeGenesisCalled: func() bool { return true }, // suppress requestHeadersIfSyncIsStuck path | ||
| } | ||
|
|
||
| networkWatcher := &commonMock.MessengerStub{ | ||
| IsConnectedToTheNetworkCalled: func() bool { return true }, | ||
| } | ||
| statusHandler := &observableStatusHandler{} | ||
|
|
||
| boot := syncpkg.NewBaseBootstrapForKLC1920Test( | ||
| forkDetector, | ||
| chainHandler, | ||
| slotManager, | ||
| networkWatcher, | ||
| statusHandler, | ||
| ) | ||
|
|
||
| return boot, statusHandler | ||
| } | ||
|
|
||
| // TestKLC1920_ComputeNodeState_GossipAheadForcesNotSynced is the regression | ||
| // guard for the synced-state gate. Pre-fix: with probable == current the | ||
| // node declared itself synced even when HighestNonceReceived was far ahead. | ||
| // Post-fix: any gossip-vs-current gap > BlockFinality forces hasLastBlock= | ||
| // false and isNodeSynchronized=false. | ||
| func TestKLC1920_ComputeNodeState_GossipAheadForcesNotSynced(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| // Production failure shape: probable matches current (fork detector | ||
| // thinks it's caught up) but gossip has reported headers many blocks | ||
| // ahead. Use a generous multiple of BlockFinality so we're well past | ||
| // the threshold regardless of how it gets tuned later. | ||
| const probable = uint64(50) | ||
| current := probable | ||
| highest := current + uint64(process.BlockFinality)*20 | ||
|
|
||
| boot, statusHandler := buildKLC1920Bootstrap(probable, highest, current) | ||
|
|
||
| boot.ComputeNodeState() | ||
|
|
||
| assert.False(t, boot.HasLastBlock(), | ||
| "KLC-1920 fix: gossip-ahead gap must force hasLastBlock=false") | ||
| assert.False(t, boot.IsNodeSynchronized(), | ||
| "KLC-1920 fix: node must not declare synced when gossip is ahead") | ||
| assert.Equal(t, uint64(1), statusHandler.IsSyncing(), | ||
| "KLC-1920 fix: klv_is_syncing must report 1 — the production-bug metric was 0 (false-synced)") | ||
| } | ||
|
|
||
| // TestKLC1920_ComputeNodeState_GossipAtBoundaryStaysSynced confirms the gate | ||
| // does NOT spuriously fire when gossip is exactly BlockFinality ahead of the | ||
| // last committed block — the natural proposal-vs-commit window during normal | ||
| // consensus operation. | ||
| func TestKLC1920_ComputeNodeState_GossipAtBoundaryStaysSynced(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| // gap == BlockFinality: a BHProposed for nonce N+BlockFinality has been | ||
| // seen but not yet committed. This is normal — must NOT trip the gate. | ||
| const probable = uint64(50) | ||
| current := probable | ||
| highest := current + uint64(process.BlockFinality) | ||
|
|
||
| boot, statusHandler := buildKLC1920Bootstrap(probable, highest, current) | ||
|
|
||
| boot.ComputeNodeState() | ||
|
|
||
| assert.True(t, boot.HasLastBlock(), | ||
| "normal proposal cycle: gap == BlockFinality must NOT force not-synced") | ||
| assert.True(t, boot.IsNodeSynchronized(), | ||
| "normal proposal cycle: node remains synced; consensus must not be gated") | ||
| assert.Equal(t, uint64(0), statusHandler.IsSyncing(), | ||
| "normal proposal cycle: klv_is_syncing stays 0") | ||
| } | ||
|
|
||
| // TestKLC1920_ComputeNodeState_GossipOneOverBoundaryNotSynced pins down the | ||
| // exact `>` boundary: one block past BlockFinality must trip the gate. This | ||
| // guards against the check accidentally becoming `>=` in a future refactor. | ||
| func TestKLC1920_ComputeNodeState_GossipOneOverBoundaryNotSynced(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| const probable = uint64(50) | ||
| current := probable | ||
| highest := current + uint64(process.BlockFinality) + 1 | ||
|
|
||
| boot, statusHandler := buildKLC1920Bootstrap(probable, highest, current) | ||
|
|
||
| boot.ComputeNodeState() | ||
|
|
||
| assert.False(t, boot.HasLastBlock(), | ||
| "boundary: gap == BlockFinality+1 must force not-synced") | ||
| assert.False(t, boot.IsNodeSynchronized(), | ||
| "boundary: gap == BlockFinality+1 must flip isNodeSynchronized to false") | ||
| assert.Equal(t, uint64(1), statusHandler.IsSyncing(), | ||
| "boundary: klv_is_syncing == 1 at the first nonce past BlockFinality") | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.