|
| 1 | +// Copyright 2026 The Erigon Authors |
| 2 | +// This file is part of Erigon. |
| 3 | +// |
| 4 | +// Erigon is free software: you can redistribute it and/or modify |
| 5 | +// it under the terms of the GNU Lesser General Public License as published by |
| 6 | +// the Free Software Foundation, either version 3 of the License, or |
| 7 | +// (at your option) any later version. |
| 8 | +// |
| 9 | +// Erigon is distributed in the hope that it will be useful, |
| 10 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | +// GNU Lesser General Public License for more details. |
| 13 | +// |
| 14 | +// You should have received a copy of the GNU Lesser General Public License |
| 15 | +// along with Erigon. If not, see <http://www.gnu.org/licenses/>. |
| 16 | + |
| 17 | +package stagedsync |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "testing" |
| 22 | + |
| 23 | + "github.com/stretchr/testify/require" |
| 24 | + |
| 25 | + "github.com/erigontech/erigon/common" |
| 26 | + "github.com/erigontech/erigon/common/log/v3" |
| 27 | + "github.com/erigontech/erigon/db/datadir" |
| 28 | + "github.com/erigontech/erigon/db/kv" |
| 29 | + "github.com/erigontech/erigon/db/kv/dbcfg" |
| 30 | + "github.com/erigontech/erigon/db/kv/mdbx" |
| 31 | + "github.com/erigontech/erigon/db/kv/rawdbv3" |
| 32 | + "github.com/erigontech/erigon/db/kv/temporal" |
| 33 | + "github.com/erigontech/erigon/db/snapshotsync/freezeblocks" |
| 34 | + dbstate "github.com/erigontech/erigon/db/state" |
| 35 | + "github.com/erigontech/erigon/db/state/changeset" |
| 36 | + "github.com/erigontech/erigon/db/state/execctx" |
| 37 | + "github.com/erigontech/erigon/execution/chain/networkname" |
| 38 | + "github.com/erigontech/erigon/execution/stagedsync/stages" |
| 39 | + "github.com/erigontech/erigon/node/ethconfig" |
| 40 | +) |
| 41 | + |
| 42 | +// TestUnwindExecutionStage_PrunesUncommittedOverlayWrite is a regression test |
| 43 | +// for the Hoodi block-3004265 gas-used mismatch (originally found on |
| 44 | +// release/3.4, same gap on main). |
| 45 | +// |
| 46 | +// Repro of the original chain of events: |
| 47 | +// |
| 48 | +// 1. In serial batch execution, block 3004265 tx19 does a first-time SSTORE to |
| 49 | +// ca5daf64 slot0. That write lands in the in-RAM SharedDomains / |
| 50 | +// TemporalMemBatch overlay (stamped with tx19's txNum) but the block then |
| 51 | +// fails its post-execution gas check, so its step is never committed. |
| 52 | +// 2. The executor schedules UnwindTo(3004264). Because 3004265 was never |
| 53 | +// committed, the committed execution-stage progress (s.BlockNumber) sits at |
| 54 | +// or below the unwind point, so UnwindExecutionStage hit the |
| 55 | +// `u.UnwindPoint >= s.BlockNumber` early return and skipped unwindExec3 — |
| 56 | +// i.e. it never called sd.Unwind, so the overlay prune added by #20625 |
| 57 | +// (which only runs from unwindExec3) never executed. |
| 58 | +// 3. The same overlay is reused across the unwind→retry loop inside one |
| 59 | +// sync.Run, so on retry the storage read returned tx19's own stale |
| 60 | +// first-write (…a3a34) instead of the committed 0. The contract took the |
| 61 | +// "already initialised" branch, skipped an SSTORE_SET (20000 gas), the block |
| 62 | +// came up exactly 21045 gas short, and the node spun in an unwind/retry loop. |
| 63 | +// |
| 64 | +// Before the fix, step 2's early return leaves the overlay untouched, so the |
| 65 | +// failed block's write is still visible after the unwind — this test asserts it |
| 66 | +// is gone, while a write at/below the unwind point survives (no over-pruning). |
| 67 | +func TestUnwindExecutionStage_PrunesUncommittedOverlayWrite(t *testing.T) { |
| 68 | + t.Parallel() |
| 69 | + |
| 70 | + logger := log.New() |
| 71 | + dirs := datadir.New(t.TempDir()) |
| 72 | + rawDb := mdbx.New(dbcfg.ChainDB, logger).InMem(t, dirs.Chaindata).MustOpen() |
| 73 | + t.Cleanup(rawDb.Close) |
| 74 | + |
| 75 | + // stepSize far above every txNum used here keeps everything in step 0 — the |
| 76 | + // overlay prune compares raw txNums, so the commitment/step machinery is moot. |
| 77 | + agg, err := dbstate.NewTest(dirs).StepSize(10_000).Logger(logger).Open(context.Background(), rawDb) |
| 78 | + require.NoError(t, err) |
| 79 | + t.Cleanup(agg.Close) |
| 80 | + |
| 81 | + db, err := temporal.New(rawDb, agg) |
| 82 | + require.NoError(t, err) |
| 83 | + |
| 84 | + // Block reader backed only by MDBX — the unwind range is at the tip, above |
| 85 | + // any frozen snapshot boundary, so no snapshots are needed. |
| 86 | + snaps := freezeblocks.NewRoSnapshots(ethconfig.BlocksFreezing{ChainName: networkname.Mainnet}, dirs.Snap, logger) |
| 87 | + t.Cleanup(snaps.Close) |
| 88 | + br := freezeblocks.NewBlockReader(snaps, nil) |
| 89 | + |
| 90 | + ctx := context.Background() |
| 91 | + tx, err := db.BeginTemporalRw(ctx) |
| 92 | + require.NoError(t, err) |
| 93 | + defer tx.Rollback() |
| 94 | + |
| 95 | + // txNum layout: block b owns txNums [perBlock*b, perBlock*b+perBlock-1], so |
| 96 | + // TxnumReader().Min(b) == perBlock*b (the block's first/system txNum). |
| 97 | + const perBlock = uint64(10) |
| 98 | + for b := uint64(0); b <= 8; b++ { |
| 99 | + require.NoError(t, rawdbv3.TxNums.Append(tx, b, b*perBlock+perBlock-1)) |
| 100 | + } |
| 101 | + |
| 102 | + doms, err := execctx.NewSharedDomains(ctx, tx, logger) |
| 103 | + require.NoError(t, err) |
| 104 | + defer doms.Close() |
| 105 | + doms.SetChangesetAccumulator(&changeset.StateChangeSet{}) |
| 106 | + |
| 107 | + const ( |
| 108 | + committedBlock = uint64(5) // execution-stage progress (last flushed block) |
| 109 | + unwindPoint = uint64(7) // = failedBlock-1; >= committedBlock => disk no-op |
| 110 | + failedBlock = uint64(8) // block whose post-exec gas check failed mid-batch |
| 111 | + ) |
| 112 | + boundaryTxNum, err := br.TxnumReader().Min(ctx, tx, unwindPoint+1) |
| 113 | + require.NoError(t, err) |
| 114 | + require.Equal(t, failedBlock*perBlock, boundaryTxNum, "sanity: Min(failedBlock) == first txNum of failedBlock") |
| 115 | + |
| 116 | + // staleKey mirrors ca5daf64 slot0: first-written by failedBlock's tx19 at a |
| 117 | + // txNum strictly above the unwind boundary — must be pruned on unwind. |
| 118 | + staleAddr := common.HexToAddress("0xca5daf6473971693b760cc65d726f72c6849d615") |
| 119 | + staleSlot := common.Hash{} // slot 0 |
| 120 | + staleKey := append(append([]byte{}, staleAddr[:]...), staleSlot[:]...) |
| 121 | + staleValHash := common.HexToHash("0xd7549f2a387fa81a1d5a77adc7bd3f782ac0780c460689d88e22aee6916a3a34") |
| 122 | + staleVal := staleValHash[:] |
| 123 | + const staleTxNum = failedBlock*perBlock + 5 // inside failedBlock, above the boundary |
| 124 | + |
| 125 | + // keepKey is written by a block at/below the unwind point — must survive. |
| 126 | + keepAddr := common.HexToAddress("0x00000000000000000000000000000000000000aa") |
| 127 | + keepSlot := common.Hash{31: 0x01} |
| 128 | + keepKey := append(append([]byte{}, keepAddr[:]...), keepSlot[:]...) |
| 129 | + keepVal := []byte{0xbe, 0xef} |
| 130 | + const keepTxNum = unwindPoint * perBlock // inside the unwind-target block |
| 131 | + |
| 132 | + doms.SetTxNum(keepTxNum) |
| 133 | + require.NoError(t, doms.DomainPut(kv.StorageDomain, tx, keepKey, keepVal, keepTxNum, nil)) |
| 134 | + doms.SetTxNum(staleTxNum) |
| 135 | + require.NoError(t, doms.DomainPut(kv.StorageDomain, tx, staleKey, staleVal, staleTxNum, nil)) |
| 136 | + |
| 137 | + // Sanity: both visible through the overlay before the unwind. |
| 138 | + got, _, err := doms.GetLatest(kv.StorageDomain, tx, staleKey) |
| 139 | + require.NoError(t, err) |
| 140 | + require.Equal(t, staleVal, got, "precondition: stale write must be visible pre-unwind") |
| 141 | + got, _, err = doms.GetLatest(kv.StorageDomain, tx, keepKey) |
| 142 | + require.NoError(t, err) |
| 143 | + require.Equal(t, keepVal, got, "precondition: keep write must be visible pre-unwind") |
| 144 | + |
| 145 | + cfg := ExecuteBlockCfg{blockReader: br} |
| 146 | + s := &StageState{ID: stages.Execution, BlockNumber: committedBlock} |
| 147 | + u := &UnwindState{ID: stages.Execution, UnwindPoint: unwindPoint, CurrentBlockNumber: failedBlock} |
| 148 | + require.GreaterOrEqual(t, u.UnwindPoint, s.BlockNumber, "test must exercise the no-op-disk-unwind branch") |
| 149 | + |
| 150 | + require.NoError(t, UnwindExecutionStage(u, s, doms, tx, ctx, cfg, logger)) |
| 151 | + |
| 152 | + // The failed block's first-time write must be gone, so a re-execution reads |
| 153 | + // the committed value (absent here -> empty) and charges SSTORE_SET again. |
| 154 | + got, _, err = doms.GetLatest(kv.StorageDomain, tx, staleKey) |
| 155 | + require.NoError(t, err) |
| 156 | + require.Empty(t, got, |
| 157 | + "after Unwind to block %d, the overlay must not return failedBlock(%d) tx write %x (got %x): "+ |
| 158 | + "UnwindExecutionStage skipped the overlay prune on the no-op-disk-unwind path", |
| 159 | + unwindPoint, failedBlock, staleVal, got) |
| 160 | + |
| 161 | + // A write at/below the unwind point must be untouched (no over-pruning). |
| 162 | + got, _, err = doms.GetLatest(kv.StorageDomain, tx, keepKey) |
| 163 | + require.NoError(t, err) |
| 164 | + require.Equal(t, keepVal, got, "write at/below the unwind point must survive the prune") |
| 165 | +} |
0 commit comments