Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 52 additions & 55 deletions execution/stagedsync/stage_execute.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,80 +462,77 @@ func PruneExecutionStage(ctx context.Context, s *PruneState, tx kv.RwTx, cfg Exe
// - stop prune when `tx.SpaceDirty()` is big
// - and set ~500ms timeout
// because on slow disks - prune is slower. but for now - let's tune for nvme first, and add `tx.SpaceDirty()` check later https://github.com/erigontech/erigon/issues/11635
quickPruneTimeout := time.Duration(cfg.chainConfig.SecondsPerSlot()*1000/3) * time.Millisecond / 2
stagePruneTimeout := time.Duration(cfg.chainConfig.SecondsPerSlot()*1000/3) * time.Millisecond / 2
if timeout > 0 && timeout > stagePruneTimeout {
stagePruneTimeout = timeout
}

if timeout > 0 && timeout > quickPruneTimeout {
quickPruneTimeout = timeout
pruneDiffsLimit := 1_000
pruneBalLimit := 10_000
if s.CurrentSyncCycle.IsInitialCycle {
pruneDiffsLimit = math.MaxInt
pruneBalLimit = math.MaxInt
stagePruneTimeout = 12 * time.Hour
}

stagePruneStartTime := time.Now()
remainingPruneTimeout := func() time.Duration {
remaining := stagePruneTimeout - time.Since(stagePruneStartTime)
if remaining <= 0 {
return 0
}
return remaining
}

if s.ForwardProgress > cfg.syncCfg.MaxReorgDepth && !cfg.syncCfg.AlwaysGenerateChangesets {
// (chunkLen is 8Kb) * (1_000 chunks) = 8mb
// Some blocks on bor-mainnet have 400 chunks of diff = 3mb
var pruneDiffsLimitOnChainTip = 1_000
pruneTimeout := quickPruneTimeout
if s.CurrentSyncCycle.IsInitialCycle {
pruneDiffsLimitOnChainTip = math.MaxInt
pruneTimeout = time.Hour
}
pruneChangeSetsStartTime := time.Now()
if err := rawdb.PruneTable(
tx,
kv.ChangeSets3,
s.ForwardProgress-cfg.syncCfg.MaxReorgDepth,
ctx,
pruneDiffsLimitOnChainTip,
pruneTimeout,
logger,
s.LogPrefix(),
); err != nil {
return err
}
if duration := time.Since(pruneChangeSetsStartTime); duration > quickPruneTimeout {
logger.Debug(
fmt.Sprintf("[%s] prune changesets timing", s.LogPrefix()),
"duration", duration,
"initialCycle", s.CurrentSyncCycle.IsInitialCycle,
)
if pruneTimeout := remainingPruneTimeout(); pruneTimeout > 0 {
if err := rawdb.PruneTable(
tx,
kv.ChangeSets3,
s.ForwardProgress-cfg.syncCfg.MaxReorgDepth,
ctx,
pruneDiffsLimit,
pruneTimeout,
logger,
s.LogPrefix(),
); err != nil {
return err
}
}
}

if s.ForwardProgress > cfg.syncCfg.MaxReorgDepth {
pruneBalLimit := 10_000
pruneTimeout := quickPruneTimeout
if s.CurrentSyncCycle.IsInitialCycle {
pruneBalLimit = math.MaxInt
pruneTimeout = time.Hour
}
if err := rawdb.PruneTable(
tx,
kv.BlockAccessList,
s.ForwardProgress-cfg.syncCfg.MaxReorgDepth,
ctx,
pruneBalLimit,
pruneTimeout,
logger,
s.LogPrefix(),
); err != nil {
return err
if pruneTimeout := remainingPruneTimeout(); pruneTimeout > 0 {
if err := rawdb.PruneTable(
tx,
kv.BlockAccessList,
s.ForwardProgress-cfg.syncCfg.MaxReorgDepth,
ctx,
pruneBalLimit,
pruneTimeout,
logger,
s.LogPrefix(),
); err != nil {
return err
}
}
}

agg := cfg.db.(state.HasAgg).Agg().(*state.Aggregator)
mxExecStepsInDB.Set(rawdbhelpers.IdxStepsCountV3(tx, agg.StepSize()) * 100)

pruneTimeout := quickPruneTimeout
if s.CurrentSyncCycle.IsInitialCycle {
pruneTimeout = 12 * time.Hour
}

pruneSmallBatchesStartTime := time.Now()
if _, err := tx.(kv.TemporalRwTx).PruneSmallBatches(ctx, pruneTimeout); err != nil {
return err
if pruneTimeout := remainingPruneTimeout(); pruneTimeout > 0 {
if _, err := tx.(kv.TemporalRwTx).PruneSmallBatches(ctx, pruneTimeout); err != nil {
return err
}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how removing

if s.CurrentSyncCycle.IsInitialCycle {
		pruneTimeout = 12 * time.Hour
	}

will work on non-chain-tip (i mean when re-exec from 0)?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you miss-understand issue. problem is not prune > 2s problem is prune has 12 hours timeout if existing synced node restart (because s.CurrentSyncCycle.IsInitialCycle) is true. And it has 12 hours timeout because - how else we can prune data on non-chain-tip (when executing from 0)

Copy link
Copy Markdown
Member Author

@JkLondon JkLondon Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how else we can prune data on non-chain-tip (when executing from 0)

After some creating and merging changeset and other files why not? Also, you've mixed here 2 different situations (sync from 0 and restart on synced node), so I don't get what we're discussing. However, on exec prune u shared I saw spike to 9s after restart (on ottersync it was ~1min)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how removing

if s.CurrentSyncCycle.IsInitialCycle {
		pruneTimeout = 12 * time.Hour
	}

will work on non-chain-tip (i mean when re-exec from 0)?

It will attempt to prune "small batch" in a time every time we will enter the exec function (in exec prune) same for all stages, eventually it will prune all

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ou've mixed here 2 different situations (sync from 0 and restart on synced node).
We don't have in Erigon code separation of this 2 cases.
And this is a problem.
s.CurrentSyncCycle.IsInitialCycle is true in both cases.

if your PR remove

if s.CurrentSyncCycle.IsInitialCycle {
		pruneTimeout = 12 * time.Hour
	}

it also will affect 2 cases.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But my PR don't remove this cond anyway.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Synced node hasn't InitialSync true as I saw, no?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test it

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oke, will sync to tip and check var after restart

}
if duration := time.Since(pruneSmallBatchesStartTime); duration > quickPruneTimeout {
if duration := time.Since(stagePruneStartTime); duration > stagePruneTimeout {
logger.Debug(
fmt.Sprintf("[%s] prune small batches timing", s.LogPrefix()),
fmt.Sprintf("[%s] prune execution timing", s.LogPrefix()),
"duration", duration,
"timeout", stagePruneTimeout,
"initialCycle", s.CurrentSyncCycle.IsInitialCycle,
)
}
Expand Down
Loading