Skip to content

Commit 9c55a75

Browse files
Revert "db/snapshotsync/freezeblocks: run blocks snapshot merge off the shared build semaphore" (erigontech#22071)
Reverts erigontech#21526 because found races: erigontech#22045 (comment) and erigontech#21960
1 parent d879b3b commit 9c55a75

5 files changed

Lines changed: 11 additions & 199 deletions

File tree

cmd/utils/app/snapshots_cmd.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3511,16 +3511,6 @@ func doRetireCommand(cliCtx *cli.Context, dirs datadir.Dirs) error {
35113511
return err
35123512
}
35133513

3514-
for {
3515-
merged, err := br.MergeBlocks(ctx, log.LvlInfo, downloader.NoopSeederClient{})
3516-
if err != nil {
3517-
return err
3518-
}
3519-
if !merged {
3520-
break
3521-
}
3522-
}
3523-
35243514
if err := br.RemoveOverlaps(nil); err != nil {
35253515
return err
35263516
}

db/services/interfaces.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ type BlockRetire interface {
127127
BuildMissedIndicesIfNeed(ctx context.Context, logPrefix string, notifier DBEventNotifier) error
128128
SetWorkers(workers int)
129129
GetWorkers() int
130-
WaitForMerges(ctx context.Context)
131130
}
132131

133132
type DBEventNotifier interface {

db/snapshotsync/freezeblocks/block_retire_merge_test.go

Lines changed: 0 additions & 120 deletions
This file was deleted.

db/snapshotsync/freezeblocks/block_snapshots.go

Lines changed: 11 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,6 @@ func chooseSegmentEnd(from, to uint64, snapType snaptype.Enum, snCfg *snapcfg.Cf
140140
type BlockRetire struct {
141141
maxScheduledBlock atomic.Uint64
142142
working atomic.Bool
143-
merging atomic.Bool
144-
mergeWg sync.WaitGroup
145-
mergeMu sync.Mutex
146-
mergeClosing bool
147143
lastRetireGapStart atomic.Uint64
148144

149145
// shared semaphore with AggregatorV3 to allow only one type of snapshot building at a time
@@ -334,7 +330,8 @@ func (br *BlockRetire) retireBlocks(
334330
}
335331
}
336332

337-
return ok, nil
333+
merged, err := br.MergeBlocks(ctx, lvl, seeder)
334+
return ok || merged, err
338335
}
339336

340337
func (br *BlockRetire) MergeBlocks(
@@ -439,18 +436,16 @@ func (br *BlockRetire) RetireBlocksInBackground(
439436
defer onDone()
440437
defer br.working.Store(false)
441438

442-
// Dump under the shared semaphore: the build phase is fast and is
443-
// serialized against state-snapshot building to bound concurrent I/O.
444-
err := func() error {
445-
if br.snBuildAllowed != nil {
446-
//we are inside own goroutine - it's fine to block here
447-
if err := br.snBuildAllowed.Acquire(ctx, 1); err != nil {
448-
return err
449-
}
450-
defer br.snBuildAllowed.Release(1)
439+
if br.snBuildAllowed != nil {
440+
//we are inside own goroutine - it's fine to block here
441+
if err := br.snBuildAllowed.Acquire(ctx, 1); err != nil {
442+
br.logger.Warn("[snapshots] retire blocks", "err", err)
443+
return
451444
}
452-
return br.RetireBlocks(ctx, minBlockNum, maxBlockNum, lvl, seeder, onFinishRetire)
453-
}()
445+
defer br.snBuildAllowed.Release(1)
446+
}
447+
448+
err := br.RetireBlocks(ctx, minBlockNum, maxBlockNum, lvl, seeder, onFinishRetire)
454449
if errors.Is(err, heimdall.ErrHeimdallDataIsNotReady) {
455450
br.borDataNotReadyBefore = time.Now().Add(BorDataNotReadyTimeout)
456451
br.logger.Debug("[snapshots] bor data is not ready to be retired", "nextAttemptAt", br.borDataNotReadyBefore)
@@ -464,54 +459,11 @@ func (br *BlockRetire) RetireBlocksInBackground(
464459
br.logger.Error("[snapshots] retire blocks", "err", err)
465460
return
466461
}
467-
468-
// Merge runs without the semaphore: it is slow and expensive, and
469-
// holding the semaphore across it stalls state-snapshot collation/prune
470-
// (which bounds chaindata size). Mirrors the aggregator's MergeLoop.
471-
br.mergeBlocksInBackground(ctx, lvl, seeder)
472462
}()
473463

474464
return true
475465
}
476466

477-
// mergeBlocksInBackground runs block-snapshot merges off the shared build
478-
// semaphore. At most one merge runs at a time; a request arriving while one is
479-
// in flight is dropped and picked up by a later retire cycle.
480-
func (br *BlockRetire) mergeBlocksInBackground(ctx context.Context, lvl log.Lvl, seeder downloader.SeederClient) {
481-
br.mergeMu.Lock()
482-
defer br.mergeMu.Unlock()
483-
// Once WaitForMerges has begun draining we must not start (and Add) a new merge.
484-
if br.mergeClosing {
485-
return
486-
}
487-
if !br.merging.CompareAndSwap(false, true) {
488-
return
489-
}
490-
br.mergeWg.Add(1)
491-
go func() {
492-
defer br.mergeWg.Done()
493-
defer br.merging.Store(false)
494-
if _, err := br.MergeBlocks(ctx, lvl, seeder); err != nil {
495-
br.logger.Error("[snapshots] merge blocks", "err", err)
496-
}
497-
}()
498-
}
499-
500-
// WaitForMerges prevents new background merges from starting and waits for any
501-
// in-flight one to finish, or until ctx is done. Abandoning the wait is safe:
502-
// the merge touches neither chainDB nor the open snapshots.
503-
func (br *BlockRetire) WaitForMerges(ctx context.Context) {
504-
br.mergeMu.Lock()
505-
br.mergeClosing = true
506-
br.mergeMu.Unlock()
507-
done := make(chan struct{})
508-
go func() { br.mergeWg.Wait(); close(done) }()
509-
select {
510-
case <-done:
511-
case <-ctx.Done():
512-
}
513-
}
514-
515467
func (br *BlockRetire) RetireBlocks(
516468
ctx context.Context,
517469
requestedMinBlockNum uint64,

node/eth/backend.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1545,15 +1545,6 @@ func (s *Ethereum) Stop() error {
15451545
}
15461546
}
15471547

1548-
// Drain the fire-and-forget background block-snapshot merge goroutine for
1549-
// shutdown hygiene; the merge itself touches neither chainDB nor snapshots,
1550-
// so the wait is bounded.
1551-
if s.components != nil && s.components.Storage != nil && s.components.Storage.BlockRetire != nil {
1552-
mergeCtx, mergeCancel := context.WithTimeout(context.Background(), 30*time.Second)
1553-
s.components.Storage.BlockRetire.WaitForMerges(mergeCtx)
1554-
mergeCancel()
1555-
}
1556-
15571548
s.chainDB.Close()
15581549

15591550
if s.config.Downloader != nil {

0 commit comments

Comments
 (0)