From f697f7c590d96755162ccda56b51e3b86c07850b Mon Sep 17 00:00:00 2001 From: WCat Date: Thu, 12 Feb 2026 08:45:28 -0500 Subject: [PATCH 1/2] fix(leaderlog): verify nonce from chain before serving schedules --- commands.go | 34 ++++++++++++++++++++++----- main.go | 32 ++++++++++++------------- nonce.go | 67 ++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 97 insertions(+), 36 deletions(-) diff --git a/commands.go b/commands.go index 582cfab..e08dd7a 100644 --- a/commands.go +++ b/commands.go @@ -427,7 +427,7 @@ func (i *Indexer) cmdLeaderlog(m *telebot.Message) { // Check DB first stored, storeErr := i.store.GetLeaderSchedule(ctx, targetEpoch) - if storeErr == nil && stored != nil { + if storeErr == nil && stored != nil && i.scheduleNonceMatches(ctx, stored) { msg := FormatScheduleForTelegram(stored, i.poolName, i.leaderlogTZ, i.leaderlogTimeFormat) i.bot.Send(m.Chat, msg) return @@ -443,7 +443,7 @@ func (i *Indexer) cmdLeaderlog(m *telebot.Message) { } } - epochNonce, nonceErr := i.nonceTracker.GetNonceForEpoch(targetEpoch - 1) + epochNonce, nonceErr := i.nonceTracker.GetVerifiedNonceForEpoch(targetEpoch - 1) if nonceErr != nil { replyEpoch(fmt.Sprintf("Failed to get nonce for epoch %d (using epoch %d nonce): %v", targetEpoch, targetEpoch-1, nonceErr)) return @@ -533,7 +533,7 @@ func (i *Indexer) cmdLeaderlog(m *telebot.Message) { // Check for stored schedule first stored, err := i.store.GetLeaderSchedule(ctx, targetEpoch) - if err == nil && stored != nil { + if err == nil && stored != nil && i.scheduleNonceMatches(ctx, stored) { msg := FormatScheduleForTelegram(stored, i.poolName, i.leaderlogTZ, i.leaderlogTimeFormat) i.bot.Send(m.Chat, msg) return @@ -553,7 +553,7 @@ func (i *Indexer) cmdLeaderlog(m *telebot.Message) { } } - epochNonce, err := i.nonceTracker.GetNonceForEpoch(targetEpoch - 1) + epochNonce, err := i.nonceTracker.GetVerifiedNonceForEpoch(targetEpoch - 1) if err != nil { reply(fmt.Sprintf("Failed to get nonce for epoch %d (using epoch %d nonce): %v", targetEpoch, targetEpoch-1, err)) return @@ -634,7 +634,7 @@ func (i *Indexer) cmdNonce(m *telebot.Message) { label = "next" } - nonce, err := i.nonceTracker.GetNonceForEpoch(epoch) + nonce, err := i.nonceTracker.GetVerifiedNonceForEpoch(epoch) if err != nil { i.bot.Send(m.Chat, fmt.Sprintf("Error getting %s epoch nonce: %v", label, err)) return @@ -922,6 +922,9 @@ func (i *Indexer) cmdNextBlock(m *telebot.Message) { ctxShort, cancelShort := context.WithTimeout(context.Background(), 10*time.Second) schedule, err := i.store.GetLeaderSchedule(ctxShort, currentEpoch) cancelShort() + if err == nil && schedule != nil && !i.scheduleNonceMatches(ctxShort, schedule) { + schedule = nil + } // If no schedule, auto-compute it if err != nil || schedule == nil { @@ -937,7 +940,7 @@ func (i *Indexer) cmdNextBlock(m *telebot.Message) { ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) defer cancel() - epochNonce, nonceErr := i.nonceTracker.GetNonceForEpoch(currentEpoch - 1) + epochNonce, nonceErr := i.nonceTracker.GetVerifiedNonceForEpoch(currentEpoch - 1) if nonceErr != nil { reply(fmt.Sprintf("Failed to get nonce for epoch %d (using epoch %d nonce): %v", currentEpoch, currentEpoch-1, nonceErr)) return @@ -1064,6 +1067,25 @@ func (i *Indexer) cmdNextBlock(m *telebot.Message) { i.bot.Send(m.Chat, msg) } +// scheduleNonceMatches validates that a cached schedule was built with the +// currently expected nonce for that epoch (epoch-1 final nonce). +func (i *Indexer) scheduleNonceMatches(ctx context.Context, schedule *LeaderSchedule) bool { + if schedule == nil { + return false + } + expected, err := i.nonceTracker.GetVerifiedNonceForEpoch(schedule.Epoch - 1) + if err != nil { + log.Printf("Failed to load expected nonce for epoch %d: %v", schedule.Epoch, err) + return false + } + if schedule.EpochNonce == hex.EncodeToString(expected) { + return true + } + log.Printf("Ignoring stale schedule for epoch %d: cached nonce %s != expected %s", + schedule.Epoch, schedule.EpochNonce, hex.EncodeToString(expected)) + return false +} + func (i *Indexer) cmdVersion(m *telebot.Message) { if !i.isGroupAllowed(m) { return diff --git a/main.go b/main.go index 05873ef..f9661e6 100644 --- a/main.go +++ b/main.go @@ -60,8 +60,8 @@ const ( // Channel to broadcast block events to connected clients var clients = make(map[*websocket.Conn]bool) // connected clients -var clientsMutex sync.RWMutex // protects clients map -var broadcast = make(chan interface{}, 100) // broadcast channel (buffered to prevent deadlock) +var clientsMutex sync.RWMutex // protects clients map +var broadcast = make(chan interface{}, 100) // broadcast channel (buffered to prevent deadlock) var upgrader = websocket.Upgrader{ CheckOrigin: func(r *http.Request) bool { return true @@ -114,14 +114,14 @@ type Indexer struct { leaderlogEnabled bool leaderlogTZ string leaderlogTimeFormat string - store Store - nonceTracker *NonceTracker - leaderlogMu sync.Mutex - leaderlogCalcing map[int]bool // epochs currently being calculated - leaderlogFailed map[int]time.Time // cooldown: epoch -> last failure time - scheduleExists map[int]bool // cached: epoch -> schedule already in DB - syncer *ChainSyncer // nil in lite mode - lastBlockTime int64 // atomic: unix timestamp of last block received + store Store + nonceTracker *NonceTracker + leaderlogMu sync.Mutex + leaderlogCalcing map[int]bool // epochs currently being calculated + leaderlogFailed map[int]time.Time // cooldown: epoch -> last failure time + scheduleExists map[int]bool // cached: epoch -> schedule already in DB + syncer *ChainSyncer // nil in lite mode + lastBlockTime int64 // atomic: unix timestamp of last block received } type BlockEvent struct { @@ -1256,7 +1256,7 @@ func (i *Indexer) checkLeaderlogTrigger(slot uint64) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) existing, err := i.store.GetLeaderSchedule(ctx, nextEpoch) cancel() - if err == nil && existing != nil { + if err == nil && existing != nil && i.scheduleNonceMatches(ctx, existing) { i.scheduleExists[nextEpoch] = true i.leaderlogMu.Unlock() return @@ -1306,7 +1306,7 @@ func (i *Indexer) calculateAndPostLeaderlog(epoch int) bool { // Get epoch nonce (local first, Koios fallback) // For epoch N leaderlog, use the nonce computed during epoch N-1 - epochNonce, err := i.nonceTracker.GetNonceForEpoch(epoch - 1) + epochNonce, err := i.nonceTracker.GetVerifiedNonceForEpoch(epoch - 1) if err != nil { log.Printf("Failed to get nonce for epoch %d (tried epoch %d nonce): %v", epoch, epoch-1, err) return false @@ -1445,15 +1445,15 @@ func (i *Indexer) backfillSchedules(ctx context.Context) error { failed := 0 for epoch := shelleyStart + 1; epoch <= i.epoch; epoch++ { - // Check if schedule already exists + // Check if schedule already exists and matches expected nonce existing, _ := i.store.GetLeaderSchedule(ctx, epoch) - if existing != nil { + if existing != nil && i.scheduleNonceMatches(ctx, existing) { skipped++ continue } - // Get nonce from DB - nonce, err := i.store.GetFinalNonce(ctx, epoch) + // Get nonce from DB (epoch N schedule uses nonce from epoch N-1) + nonce, err := i.store.GetFinalNonce(ctx, epoch-1) if err != nil || nonce == nil { continue // no nonce available for this epoch } diff --git a/nonce.go b/nonce.go index 59d0f7d..b05f9aa 100644 --- a/nonce.go +++ b/nonce.go @@ -316,6 +316,45 @@ func (nt *NonceTracker) GetNonceForEpoch(epoch int) ([]byte, error) { return nonce, nil } +// GetVerifiedNonceForEpoch returns a nonce that is verified against canonical +// data for that epoch, repairing stale DB cache entries if needed. +// +// Full mode: always recompute from local chain data and upsert DB cache. +// Lite mode: use existing lookup priority (DB -> Koios). +func (nt *NonceTracker) GetVerifiedNonceForEpoch(epoch int) ([]byte, error) { + if !nt.fullMode { + return nt.GetNonceForEpoch(epoch) + } + + computeCtx, computeCancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer computeCancel() + computed, err := nt.ComputeEpochNonce(computeCtx, epoch) + if err != nil { + return nil, fmt.Errorf("failed to verify nonce for epoch %d: %w", epoch, err) + } + + checkCtx, checkCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer checkCancel() + cached, cacheErr := nt.store.GetFinalNonce(checkCtx, epoch) + if cacheErr == nil && cached != nil && bytes.Equal(cached, computed) { + return cached, nil + } + + source := "computed-verified" + if cacheErr == nil && cached != nil && !bytes.Equal(cached, computed) { + log.Printf("Correcting stale cached nonce for epoch %d: cached %x != computed %x", epoch, cached, computed) + source = "computed-correction" + } + + storeCtx, storeCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer storeCancel() + if err := nt.store.SetFinalNonce(storeCtx, epoch, computed, source); err != nil { + log.Printf("Failed to persist verified nonce for epoch %d: %v", epoch, err) + } + + return computed, nil +} + // ComputeEpochNonce computes the epoch nonce for targetEpoch entirely from local chain data. // Streams all blocks from Shelley genesis, evolving the nonce and freezing at the // stability window of each epoch, then computing: @@ -600,26 +639,26 @@ func (nt *NonceTracker) BackfillNonces(ctx context.Context) error { // IntegrityResult holds the verification result for a single epoch. type IntegrityResult struct { - Epoch int - Computed string // hex - DBStored string // hex, or "n/a" - Koios string // hex, or "n/a" - DBMatch bool + Epoch int + Computed string // hex + DBStored string // hex, or "n/a" + Koios string // hex, or "n/a" + DBMatch bool KoiosMatch bool } // IntegrityReport is the summary of a full nonce integrity check. type IntegrityReport struct { - TotalEpochs int - KoiosMatched int + TotalEpochs int + KoiosMatched int KoiosMismatched int - KoiosUnavail int - DBMatched int - DBMismatched int - VrfErrors int - TotalBlocks int - Duration time.Duration - FirstMismatch int // epoch of first Koios mismatch, 0 if none + KoiosUnavail int + DBMatched int + DBMismatched int + VrfErrors int + TotalBlocks int + Duration time.Duration + FirstMismatch int // epoch of first Koios mismatch, 0 if none } // NonceIntegrityCheck recomputes all epoch nonces from raw VRF outputs and From 915a4b563ef84e9b6348d85977d32e0cf1d55d67 Mon Sep 17 00:00:00 2001 From: wcatz Date: Thu, 12 Feb 2026 09:03:13 -0500 Subject: [PATCH 2/2] fix(pr71): address CodeRabbit performance and context issues 1. Fix canceled context in /nextblock command: - Move cancelShort() after scheduleNonceMatches() call - Prevents passing canceled context to function 2. Add in-memory verified nonce cache: - Add verifiedNonces map[int][]byte to NonceTracker - Check cache before recomputing from genesis (10min operation) - Prevents blocking adder pipeline on hot paths - Cache guards with existing mutex Resolves CodeRabbit findings from PR #71. Co-Authored-By: Claude Opus 4.6 --- commands.go | 2 +- nonce.go | 36 +++++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/commands.go b/commands.go index e08dd7a..8d5c7f6 100644 --- a/commands.go +++ b/commands.go @@ -921,10 +921,10 @@ func (i *Indexer) cmdNextBlock(m *telebot.Message) { // Try DB first with short timeout ctxShort, cancelShort := context.WithTimeout(context.Background(), 10*time.Second) schedule, err := i.store.GetLeaderSchedule(ctxShort, currentEpoch) - cancelShort() if err == nil && schedule != nil && !i.scheduleNonceMatches(ctxShort, schedule) { schedule = nil } + cancelShort() // If no schedule, auto-compute it if err != nil || schedule == nil { diff --git a/nonce.go b/nonce.go index b05f9aa..640a08a 100644 --- a/nonce.go +++ b/nonce.go @@ -35,7 +35,8 @@ type NonceTracker struct { blockCount int candidateFroze bool // whether candidate nonce was frozen this epoch networkMagic int - fullMode bool // true = genesis-seeded rolling nonce, false = lite (zero-seeded) + fullMode bool // true = genesis-seeded rolling nonce, false = lite (zero-seeded) + verifiedNonces map[int][]byte // in-memory cache of verified epoch nonces (full mode only) } // NewNonceTracker creates a NonceTracker and attempts to restore state from DB. @@ -43,11 +44,12 @@ type NonceTracker struct { // In lite mode, the initial nonce is zero (current behavior). func NewNonceTracker(store Store, koiosClient *koios.Client, epoch, networkMagic int, fullMode bool) *NonceTracker { nt := &NonceTracker{ - store: store, - koiosClient: koiosClient, - currentEpoch: epoch, - networkMagic: networkMagic, - fullMode: fullMode, + store: store, + koiosClient: koiosClient, + currentEpoch: epoch, + networkMagic: networkMagic, + fullMode: fullMode, + verifiedNonces: make(map[int][]byte), } // Try to restore evolving nonce from DB for current epoch @@ -319,13 +321,23 @@ func (nt *NonceTracker) GetNonceForEpoch(epoch int) ([]byte, error) { // GetVerifiedNonceForEpoch returns a nonce that is verified against canonical // data for that epoch, repairing stale DB cache entries if needed. // -// Full mode: always recompute from local chain data and upsert DB cache. +// Full mode: check in-memory cache first, then recompute from local chain data +// if not cached. Repairs stale DB entries on mismatch. // Lite mode: use existing lookup priority (DB -> Koios). func (nt *NonceTracker) GetVerifiedNonceForEpoch(epoch int) ([]byte, error) { if !nt.fullMode { return nt.GetNonceForEpoch(epoch) } + // Check in-memory cache first (prevents repeated genesis recomputation) + nt.mu.Lock() + if cached, ok := nt.verifiedNonces[epoch]; ok { + nt.mu.Unlock() + return cached, nil + } + nt.mu.Unlock() + + // Not in cache — recompute from local chain data computeCtx, computeCancel := context.WithTimeout(context.Background(), 10*time.Minute) defer computeCancel() computed, err := nt.ComputeEpochNonce(computeCtx, epoch) @@ -333,10 +345,15 @@ func (nt *NonceTracker) GetVerifiedNonceForEpoch(epoch int) ([]byte, error) { return nil, fmt.Errorf("failed to verify nonce for epoch %d: %w", epoch, err) } + // Check if DB cache needs repair checkCtx, checkCancel := context.WithTimeout(context.Background(), 5*time.Second) defer checkCancel() cached, cacheErr := nt.store.GetFinalNonce(checkCtx, epoch) if cacheErr == nil && cached != nil && bytes.Equal(cached, computed) { + // DB matches — store in memory cache and return + nt.mu.Lock() + nt.verifiedNonces[epoch] = computed + nt.mu.Unlock() return cached, nil } @@ -352,6 +369,11 @@ func (nt *NonceTracker) GetVerifiedNonceForEpoch(epoch int) ([]byte, error) { log.Printf("Failed to persist verified nonce for epoch %d: %v", epoch, err) } + // Store in memory cache + nt.mu.Lock() + nt.verifiedNonces[epoch] = computed + nt.mu.Unlock() + return computed, nil }