From d7f491186937001f7816cf3e622456c269d9099e Mon Sep 17 00:00:00 2001 From: sweexordious Date: Tue, 4 Nov 2025 12:18:04 +0400 Subject: [PATCH 1/7] chore: introduce a precommit delay tolerance --- consensus/state.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/consensus/state.go b/consensus/state.go index 43f4063cfd..7100934695 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -2102,8 +2102,12 @@ func (cs *State) recordMetrics(height int64, block *types.Block) { cs.metrics.CommittedHeight.Set(float64(block.Height)) } -// KMSSigningDelay is a constant representing a delay used primarily to adjust for KMS signing latencies. -const KMSSigningDelay = 200 * time.Millisecond +const ( + // KMSSigningDelay is a constant representing a delay used primarily to adjust for KMS signing latencies. + KMSSigningDelay = 200 * time.Millisecond + // precommit delay rescheduling tolerance + precommitDelayTolerance = 5 * time.Millisecond +) // isReadyToPrecommit calculates if the process has waited at least a certain number of seconds // from their start time before they can vote @@ -2118,7 +2122,7 @@ func (cs *State) isReadyToPrecommit() (bool, time.Duration) { if _, ok := cs.privValidator.(*privval.SignerClient); ok { waitTime = waitTime - KMSSigningDelay } - return waitTime <= 0, waitTime + return waitTime <= precommitDelayTolerance, waitTime } //----------------------------------------------------------------------------- From 615f634f431d5bab4998289bdb4e633564c70aa0 Mon Sep 17 00:00:00 2001 From: sweexordious Date: Tue, 4 Nov 2025 13:25:31 +0400 Subject: [PATCH 2/7] chore: replace rescheduling with a sleep --- consensus/state.go | 29 ++++++++++++++++------------- consensus/types/round_state.go | 10 ++++++---- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/consensus/state.go b/consensus/state.go index 7100934695..9cae6cd0d1 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1622,9 +1622,16 @@ func (cs *State) enterPrecommit(height int64, round int32) { return } - if ready, waitTime := cs.isReadyToPrecommit(); !ready { - logger.Debug("rescheduling precommit", "delay(ms)", waitTime.Milliseconds()) - cs.scheduleTimeout(waitTime, height, round, cstypes.RoundStepPrevoteWait) + if !cs.rs.StartedPrecommitSleep.Load() { + cs.rs.StartedPrecommitSleep.Store(true) + waitTime := cs.precommitDelay() + logger.Debug("delaying precommit", "delay", waitTime) + cs.unlockAll() + time.Sleep(waitTime) + cs.lockAll() + cs.rs.StartedPrecommitSleep.Store(false) + } else { + // if any other routine tries to enter precommit, we just return return } @@ -2102,27 +2109,23 @@ func (cs *State) recordMetrics(height int64, block *types.Block) { cs.metrics.CommittedHeight.Set(float64(block.Height)) } -const ( - // KMSSigningDelay is a constant representing a delay used primarily to adjust for KMS signing latencies. - KMSSigningDelay = 200 * time.Millisecond - // precommit delay rescheduling tolerance - precommitDelayTolerance = 5 * time.Millisecond -) +// KMSSigningDelay is a constant representing a delay used primarily to adjust for KMS signing latencies. +const KMSSigningDelay = 200 * time.Millisecond -// isReadyToPrecommit calculates if the process has waited at least a certain number of seconds +// precommitDelay calculates if the process has waited at least a certain number of seconds // from their start time before they can vote // If the application's DelayedPrecommitTimeout is set to 0, no precommit wait is done. -func (cs *State) isReadyToPrecommit() (bool, time.Duration) { +func (cs *State) precommitDelay() time.Duration { if cs.state.Timeouts.DelayedPrecommitTimeout == 0 { // setting 0 as a special case not to reschedule the pre-commit - return true, 0 + return 0 } precommitVoteTime := cs.rs.StartTime.Add(cs.state.Timeouts.DelayedPrecommitTimeout) waitTime := time.Until(precommitVoteTime) if _, ok := cs.privValidator.(*privval.SignerClient); ok { waitTime = waitTime - KMSSigningDelay } - return waitTime <= precommitDelayTolerance, waitTime + return waitTime } //----------------------------------------------------------------------------- diff --git a/consensus/types/round_state.go b/consensus/types/round_state.go index 6749d4265a..bff2bd6e54 100644 --- a/consensus/types/round_state.go +++ b/consensus/types/round_state.go @@ -3,6 +3,7 @@ package types import ( "encoding/json" "fmt" + "sync/atomic" "time" "github.com/cometbft/cometbft/libs/bytes" @@ -65,10 +66,11 @@ func (rs RoundStepType) String() string { // NOTE: Not thread safe. Should only be manipulated by functions downstream // of the cs.receiveRoutine type RoundState struct { - Height int64 `json:"height"` // Height we are working on - Round int32 `json:"round"` - Step RoundStepType `json:"step"` - StartTime time.Time `json:"start_time"` + Height int64 `json:"height"` // Height we are working on + Round int32 `json:"round"` + Step RoundStepType `json:"step"` + StartTime time.Time `json:"start_time"` + StartedPrecommitSleep atomic.Bool // Subjective time when +2/3 precommits for Block at Round were found CommitTime time.Time `json:"commit_time"` From 067cb0e027522e7bbde2d7d604bce2e514d471b1 Mon Sep 17 00:00:00 2001 From: sweexordious Date: Tue, 4 Nov 2025 13:28:52 +0400 Subject: [PATCH 3/7] chore: add precommit delay log --- consensus/state.go | 1 + 1 file changed, 1 insertion(+) diff --git a/consensus/state.go b/consensus/state.go index 9cae6cd0d1..2ddfd494be 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1631,6 +1631,7 @@ func (cs *State) enterPrecommit(height int64, round int32) { cs.lockAll() cs.rs.StartedPrecommitSleep.Store(false) } else { + logger.Debug("already entered precommit sleep") // if any other routine tries to enter precommit, we just return return } From b1c73a7dd89333d69213357871d87d5e85910ca1 Mon Sep 17 00:00:00 2001 From: sweexordious Date: Tue, 4 Nov 2025 13:30:40 +0400 Subject: [PATCH 4/7] chore: use select --- consensus/state.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/consensus/state.go b/consensus/state.go index 2ddfd494be..cc61f776c0 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1627,7 +1627,12 @@ func (cs *State) enterPrecommit(height int64, round int32) { waitTime := cs.precommitDelay() logger.Debug("delaying precommit", "delay", waitTime) cs.unlockAll() - time.Sleep(waitTime) + t := time.NewTimer(waitTime) + select { + case <-cs.Quit(): + return + case <-t.C: + } cs.lockAll() cs.rs.StartedPrecommitSleep.Store(false) } else { From 78ffcdb77b07183f0a9731d1472d5dfa7fd73ecf Mon Sep 17 00:00:00 2001 From: sweexordious Date: Tue, 4 Nov 2025 13:33:17 +0400 Subject: [PATCH 5/7] chore: fix lock --- consensus/state.go | 1 + 1 file changed, 1 insertion(+) diff --git a/consensus/state.go b/consensus/state.go index cc61f776c0..a1c0e3487b 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1630,6 +1630,7 @@ func (cs *State) enterPrecommit(height int64, round int32) { t := time.NewTimer(waitTime) select { case <-cs.Quit(): + cs.lockAll() return case <-t.C: } From 6ab7433e119af57734933a0e2afeadb77c6e4c15 Mon Sep 17 00:00:00 2001 From: sweexordious Date: Tue, 4 Nov 2025 19:23:03 +0400 Subject: [PATCH 6/7] chore: comapre and swap --- consensus/state.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/consensus/state.go b/consensus/state.go index a1c0e3487b..8b9a6d34d1 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1622,8 +1622,7 @@ func (cs *State) enterPrecommit(height int64, round int32) { return } - if !cs.rs.StartedPrecommitSleep.Load() { - cs.rs.StartedPrecommitSleep.Store(true) + if cs.rs.StartedPrecommitSleep.CompareAndSwap(false, true) { waitTime := cs.precommitDelay() logger.Debug("delaying precommit", "delay", waitTime) cs.unlockAll() From 945a6f2f2288c902a2b613f4610605509d860e86 Mon Sep 17 00:00:00 2001 From: sweexordious Date: Tue, 4 Nov 2025 22:13:08 +0400 Subject: [PATCH 7/7] chore: move the StartedPrecommitSleep to the state --- consensus/state.go | 12 +++++++----- consensus/types/round_state.go | 10 ++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/consensus/state.go b/consensus/state.go index 16c401b8cf..51ad346fdc 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -10,6 +10,7 @@ import ( "runtime/debug" "sort" "sync" + "sync/atomic" "time" "github.com/cometbft/cometbft/privval" @@ -145,9 +146,10 @@ type State struct { nSteps int // some functions can be overwritten for testing - decideProposal func(height int64, round int32) - doPrevote func(height int64, round int32) - setProposal func(proposal *types.Proposal) error + decideProposal func(height int64, round int32) + doPrevote func(height int64, round int32) + setProposal func(proposal *types.Proposal) error + StartedPrecommitSleep atomic.Bool // closed when we finish shutting down done chan struct{} @@ -1627,7 +1629,7 @@ func (cs *State) enterPrecommit(height int64, round int32) { return } - if cs.rs.StartedPrecommitSleep.CompareAndSwap(false, true) { + if cs.StartedPrecommitSleep.CompareAndSwap(false, true) { waitTime := cs.precommitDelay() logger.Debug("delaying precommit", "delay", waitTime) cs.unlockAll() @@ -1639,7 +1641,7 @@ func (cs *State) enterPrecommit(height int64, round int32) { case <-t.C: } cs.lockAll() - cs.rs.StartedPrecommitSleep.Store(false) + cs.StartedPrecommitSleep.Store(false) } else { logger.Debug("already entered precommit delay") // if any other routine tries to enter precommit, we just return diff --git a/consensus/types/round_state.go b/consensus/types/round_state.go index bff2bd6e54..6749d4265a 100644 --- a/consensus/types/round_state.go +++ b/consensus/types/round_state.go @@ -3,7 +3,6 @@ package types import ( "encoding/json" "fmt" - "sync/atomic" "time" "github.com/cometbft/cometbft/libs/bytes" @@ -66,11 +65,10 @@ func (rs RoundStepType) String() string { // NOTE: Not thread safe. Should only be manipulated by functions downstream // of the cs.receiveRoutine type RoundState struct { - Height int64 `json:"height"` // Height we are working on - Round int32 `json:"round"` - Step RoundStepType `json:"step"` - StartTime time.Time `json:"start_time"` - StartedPrecommitSleep atomic.Bool + Height int64 `json:"height"` // Height we are working on + Round int32 `json:"round"` + Step RoundStepType `json:"step"` + StartTime time.Time `json:"start_time"` // Subjective time when +2/3 precommits for Block at Round were found CommitTime time.Time `json:"commit_time"`