Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions comprehensive_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"bytes"
"context"
"encoding/hex"
"encoding/json"
Expand Down Expand Up @@ -1602,3 +1603,65 @@ func TestGetCandidateNonce(t *testing.T) {
}
}
}

// TestResyncFromDB_BlockCountMismatch verifies that ResyncFromDB detects and
// recomputes the evolving nonce when epoch_nonces.block_count is stale — the
// scenario where InsertBlockBatch (CopyFrom) succeeds but the process is
// OOMKilled before ProcessBatch can update epoch_nonces.
func TestResyncFromDB_BlockCountMismatch(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()

// Use preview network magic for simpler slot math
networkMagic := PreprodNetworkMagic
epoch := 10

// Insert 5 blocks for the epoch (simulating CopyFrom that succeeded)
epochStart := GetEpochStartSlot(epoch, networkMagic)
for i := 0; i < 5; i++ {
slot := epochStart + uint64(i*20)
vrf := []byte{byte(i + 1), 0x02, 0x03, 0x04}
hash := fmt.Sprintf("hash%d", i)
nonce := vrfNonceValueForEpoch(vrf, epoch, networkMagic)
_, err := store.InsertBlock(ctx, slot, epoch, hash, vrf, nonce)
if err != nil {
t.Fatalf("InsertBlock %d: %v", i, err)
}
}

// Set a stale evolving nonce with block_count=2 (simulating OOM after only 2 blocks processed)
staleNonce := []byte{0xDE, 0xAD, 0xBE, 0xEF, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
if err := store.UpsertEvolvingNonce(ctx, epoch, staleNonce, 2); err != nil {
t.Fatalf("UpsertEvolvingNonce: %v", err)
}

// Also need a previous epoch nonce for the recompute to start from
prevNonce := initialNonce(true)
if err := store.UpsertEvolvingNonce(ctx, epoch-1, prevNonce, 100); err != nil {
t.Fatalf("UpsertEvolvingNonce prev: %v", err)
}

// Create NonceTracker and call ResyncFromDB
nt := NewNonceTracker(store, nil, epoch, networkMagic, true)
nt.ResyncFromDB()

// Verify block count was corrected
_, repairedCount, err := store.GetEvolvingNonce(ctx, epoch)
if err != nil {
t.Fatalf("GetEvolvingNonce after resync: %v", err)
}
if repairedCount != 5 {
t.Fatalf("expected block_count=5 after resync repair, got %d", repairedCount)
}

// Verify the nonce is no longer the stale value
repairedNonce, _, _ := store.GetEvolvingNonce(ctx, epoch)
if bytes.Equal(repairedNonce, staleNonce) {
t.Fatal("nonce was not recomputed — still has stale value")
}

t.Logf("ResyncFromDB correctly recomputed nonce from 5 blocks (was stale at 2)")
}
43 changes: 43 additions & 0 deletions nonce.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,49 @@ func (nt *NonceTracker) ResyncFromDB() {
nt.blockCount = blockCount
nt.candidateFroze = false
log.Printf("ResyncFromDB: restored epoch %d, block count %d", epoch, blockCount)

// Cross-check: if blocks were bulk-inserted (CopyFrom) but the process
// was OOMKilled before ProcessBatch could update epoch_nonces.block_count,
// the nonce state is stale. Detect and recompute from raw VRF outputs.
actualCount, countErr := nt.store.GetBlockCountForEpoch(ctx, epoch)
if countErr != nil || actualCount == blockCount {
return
}

log.Printf("ResyncFromDB: block count mismatch for epoch %d (nonce tracker: %d, blocks table: %d) — recomputing",
epoch, blockCount, actualCount)

// Start from previous epoch's evolving nonce (nonce rolls across boundaries)
var etaV []byte
if epoch > 0 {
prevNonce, _, prevErr := nt.store.GetEvolvingNonce(ctx, epoch-1)
if prevErr == nil && prevNonce != nil {
etaV = prevNonce
}
}
if etaV == nil {
etaV = initialNonce(nt.fullMode)
}

vrfBlocks, vrfErr := nt.store.GetVrfOutputsForEpoch(ctx, epoch)
if vrfErr != nil {
log.Printf("ResyncFromDB: recompute failed for epoch %d: %v", epoch, vrfErr)
return
}

for _, b := range vrfBlocks {
nonceValue := vrfNonceValueForEpoch(b.VrfOutput, b.Epoch, nt.networkMagic)
etaV = evolveNonce(etaV, nonceValue)
}

if upsertErr := nt.store.UpsertEvolvingNonce(ctx, epoch, etaV, len(vrfBlocks)); upsertErr != nil {
log.Printf("ResyncFromDB: failed to persist recomputed nonce for epoch %d: %v", epoch, upsertErr)
return
}

nt.evolvingNonce = etaV
nt.blockCount = len(vrfBlocks)
log.Printf("ResyncFromDB: epoch %d recomputed from %d blocks", epoch, len(vrfBlocks))
}

// RecomputeCurrentEpochNonce recomputes the evolving nonce for a specific epoch
Expand Down
Loading