Skip to content

Commit b2ba91e

Browse files
integrity: split checkers to slow/fast, move CheckStateVerify to slow, new docs for checkers, reduce over-concurrency. (#19499)
Today gnosis files release time-out. So, i did couple tweaks - new CheckStateVerify is too slow to run during snapshotters release. also ram-greedy. also single-threaded Also in this PR: - new docs for each check and how it differ from another - removed low-level calls of madv. only high-level left - limit checkers concurrency (each check also does own concurrency, so, better have some bounds on high-level concurrency) --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent f0e61f1 commit b2ba91e

File tree

11 files changed

+343
-307
lines changed

11 files changed

+343
-307
lines changed

cmd/utils/app/snapshots_cmd.go

Lines changed: 100 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737
g "github.com/anacrolix/generics"
3838
"github.com/c2h5oh/datasize"
3939
"github.com/urfave/cli/v2"
40+
"golang.org/x/sync/errgroup"
4041
"golang.org/x/sync/semaphore"
4142

4243
"github.com/erigontech/erigon/cl/clparams"
@@ -389,8 +390,8 @@ var snapshotCommand = cli.Command{
389390
Description: "run slow validation of files. use --check to run multiple/single",
390391
Flags: joinFlags([]cli.Flag{
391392
&utils.DataDirFlag,
392-
&cli.StringFlag{Name: "check", Usage: fmt.Sprintf("comma separated list from: %s", integrity.AllChecks)},
393-
&cli.StringFlag{Name: "skip-check", Usage: fmt.Sprintf("comma separated list from: %s", integrity.AllChecks)},
393+
&cli.StringFlag{Name: "check", Usage: fmt.Sprintf("comma separated list from: %s", integrity.FastChecks)},
394+
&cli.StringFlag{Name: "skip-check", Usage: fmt.Sprintf("comma separated list from: %s", integrity.FastChecks)},
394395
&cli.BoolFlag{Name: "failFast", Value: true, Usage: "to stop after 1st problem or print WARN log and continue check"},
395396
&cli.Uint64Flag{Name: "fromStep", Value: 0, Usage: "skip files before given step"},
396397
}),
@@ -1076,14 +1077,14 @@ func doIntegrity(cliCtx *cli.Context) error {
10761077
}
10771078

10781079
for _, check := range requestedChecks {
1079-
if slices.Contains(integrity.AllChecks, check) || slices.Contains(integrity.NonDefaultChecks, check) {
1080+
if slices.Contains(integrity.FastChecks, check) || slices.Contains(integrity.SlowChecks, check) {
10801081
continue
10811082
}
10821083

10831084
return fmt.Errorf("requested check %s not found", check)
10841085
}
10851086
} else {
1086-
requestedChecks = integrity.AllChecks
1087+
requestedChecks = integrity.FastChecks
10871088
}
10881089

10891090
skipChecks := cliCtx.String("skip-check")
@@ -1133,96 +1134,103 @@ func doIntegrity(cliCtx *cli.Context) error {
11331134

11341135
blockReader, _ := blockRetire.IO()
11351136
heimdallStore, _ := blockRetire.BorStore()
1137+
1138+
g, ctx := errgroup.WithContext(ctx)
1139+
g.SetLimit(4)
11361140
for _, chk := range requestedChecks {
1137-
logger.Info("[integrity] starting", "check", chk)
1138-
switch chk {
1139-
case integrity.BlocksTxnID:
1140-
if err := blockReader.(*freezeblocks.BlockReader).IntegrityTxnID(failFast); err != nil {
1141-
return err
1142-
}
1143-
case integrity.HeaderNoGaps:
1144-
if err := integrity.NoGapsInCanonicalHeaders(ctx, db, blockReader, failFast); err != nil {
1145-
return err
1146-
}
1147-
case integrity.Blocks:
1148-
if err := integrity.SnapBlocksRead(ctx, db, blockReader, 0, 0, failFast); err != nil {
1149-
return err
1150-
}
1151-
case integrity.InvertedIndex:
1152-
if err := integrity.E3EfFiles(ctx, db, failFast, fromStep); err != nil {
1153-
return err
1154-
}
1155-
case integrity.HistoryNoSystemTxs:
1156-
if err := integrity.HistoryCheckNoSystemTxs(ctx, db, blockReader); err != nil {
1157-
return err
1158-
}
1159-
case integrity.BorEvents:
1160-
if !CheckBorChain(chainConfig.ChainName) {
1161-
logger.Info("BorEvents skipped because not bor chain")
1162-
continue
1163-
}
1164-
snapshots := blockReader.BorSnapshots().(*heimdall.RoSnapshots)
1165-
if err := bridge.ValidateBorEvents(ctx, db, blockReader, snapshots, 0, 0, failFast); err != nil {
1166-
return err
1167-
}
1168-
case integrity.BorSpans:
1169-
if !CheckBorChain(chainConfig.ChainName) {
1170-
logger.Info("BorSpans skipped because not bor chain")
1171-
continue
1172-
}
1173-
if err := heimdall.ValidateBorSpans(ctx, logger, dirs, heimdallStore, borSnaps, failFast); err != nil {
1174-
return err
1175-
}
1176-
case integrity.BorCheckpoints:
1177-
if !CheckBorChain(chainConfig.ChainName) {
1178-
logger.Info("BorCheckpoints skipped because not bor chain")
1179-
continue
1180-
}
1181-
if err := heimdall.ValidateBorCheckpoints(ctx, logger, dirs, heimdallStore, borSnaps, failFast); err != nil {
1182-
return err
1183-
}
1184-
case integrity.ReceiptsNoDups:
1185-
if err := integrity.CheckReceiptsNoDups(ctx, db, blockReader, failFast); err != nil {
1186-
return err
1187-
}
1188-
case integrity.RCacheNoDups:
1189-
if err := integrity.CheckRCacheNoDups(ctx, db, blockReader, failFast); err != nil {
1190-
return err
1191-
}
1192-
case integrity.StateProgress:
1193-
if err := integrity.CheckStateProgress(ctx, db, blockReader, failFast); err != nil {
1194-
return err
1195-
}
1196-
case integrity.Publishable:
1197-
if err := doPublishable(cliCtx, chainDB); err != nil {
1198-
return err
1199-
}
1200-
case integrity.CommitmentRoot:
1201-
if err := integrity.CheckCommitmentRoot(ctx, db, blockReader, failFast, logger); err != nil {
1202-
return err
1203-
}
1204-
case integrity.CommitmentKvi:
1205-
if err := integrity.CheckCommitmentKvi(ctx, db, failFast, logger); err != nil {
1206-
return err
1207-
}
1208-
case integrity.CommitmentKvDeref:
1209-
if err := integrity.CheckCommitmentKvDeref(ctx, db, failFast, logger); err != nil {
1210-
return err
1211-
}
1212-
case integrity.CommitmentHistVal:
1213-
if err := integrity.CheckCommitmentHistVal(ctx, db, blockReader, failFast, logger); err != nil {
1214-
return err
1215-
}
1216-
case integrity.StateVerify:
1217-
if err := integrity.CheckStateVerify(ctx, db, failFast, fromStep, logger); err != nil {
1218-
return err
1141+
chk := chk
1142+
g.Go(func() error {
1143+
logger.Info("[integrity] starting", "check", chk)
1144+
switch chk {
1145+
case integrity.BlocksTxnID:
1146+
if err := blockReader.(*freezeblocks.BlockReader).IntegrityTxnID(failFast); err != nil {
1147+
return err
1148+
}
1149+
case integrity.HeaderNoGaps:
1150+
if err := integrity.NoGapsInCanonicalHeaders(ctx, db, blockReader, failFast); err != nil {
1151+
return err
1152+
}
1153+
case integrity.Blocks:
1154+
if err := integrity.SnapBlocksRead(ctx, db, blockReader, 0, 0, failFast); err != nil {
1155+
return err
1156+
}
1157+
case integrity.InvertedIndex:
1158+
if err := integrity.E3EfFiles(ctx, db, failFast, fromStep); err != nil {
1159+
return err
1160+
}
1161+
case integrity.HistoryNoSystemTxs:
1162+
if err := integrity.HistoryCheckNoSystemTxs(ctx, db, blockReader); err != nil {
1163+
return err
1164+
}
1165+
case integrity.BorEvents:
1166+
if !CheckBorChain(chainConfig.ChainName) {
1167+
logger.Info("BorEvents skipped because not bor chain")
1168+
return nil
1169+
}
1170+
snapshots := blockReader.BorSnapshots().(*heimdall.RoSnapshots)
1171+
if err := bridge.ValidateBorEvents(ctx, db, blockReader, snapshots, 0, 0, failFast); err != nil {
1172+
return err
1173+
}
1174+
case integrity.BorSpans:
1175+
if !CheckBorChain(chainConfig.ChainName) {
1176+
logger.Info("BorSpans skipped because not bor chain")
1177+
return nil
1178+
}
1179+
if err := heimdall.ValidateBorSpans(ctx, logger, dirs, heimdallStore, borSnaps, failFast); err != nil {
1180+
return err
1181+
}
1182+
case integrity.BorCheckpoints:
1183+
if !CheckBorChain(chainConfig.ChainName) {
1184+
logger.Info("BorCheckpoints skipped because not bor chain")
1185+
return nil
1186+
}
1187+
if err := heimdall.ValidateBorCheckpoints(ctx, logger, dirs, heimdallStore, borSnaps, failFast); err != nil {
1188+
return err
1189+
}
1190+
case integrity.ReceiptsNoDups:
1191+
if err := integrity.CheckReceiptsNoDups(ctx, db, blockReader, failFast); err != nil {
1192+
return err
1193+
}
1194+
case integrity.RCacheNoDups:
1195+
if err := integrity.CheckRCacheNoDups(ctx, db, blockReader, failFast); err != nil {
1196+
return err
1197+
}
1198+
case integrity.StateProgress:
1199+
if err := integrity.CheckStateProgress(ctx, db, blockReader, failFast); err != nil {
1200+
return err
1201+
}
1202+
case integrity.Publishable:
1203+
if err := doPublishable(cliCtx, chainDB); err != nil {
1204+
return err
1205+
}
1206+
case integrity.CommitmentRoot:
1207+
if err := integrity.CheckCommitmentRoot(ctx, db, blockReader, failFast, logger); err != nil {
1208+
return err
1209+
}
1210+
case integrity.CommitmentKvi:
1211+
if err := integrity.CheckCommitmentKvi(ctx, db, failFast, logger); err != nil {
1212+
return err
1213+
}
1214+
case integrity.CommitmentKvDeref:
1215+
if err := integrity.CheckCommitmentKvDeref(ctx, db, failFast, logger); err != nil {
1216+
return err
1217+
}
1218+
case integrity.CommitmentHistVal:
1219+
if err := integrity.CheckCommitmentHistVal(ctx, db, blockReader, failFast, logger); err != nil {
1220+
return err
1221+
}
1222+
case integrity.StateVerify:
1223+
if err := integrity.CheckStateVerify(ctx, db, failFast, fromStep, logger); err != nil {
1224+
return err
1225+
}
1226+
default:
1227+
return fmt.Errorf("unknown check: %s", chk)
12191228
}
1220-
default:
1221-
return fmt.Errorf("unknown check: %s", chk)
1222-
}
1229+
return nil
1230+
})
12231231
}
12241232

1225-
return nil
1233+
return g.Wait()
12261234
}
12271235

12281236
func doCheckCommitmentHistAtBlk(cliCtx *cli.Context, logger log.Logger) error {
@@ -1584,13 +1592,13 @@ func checkIfStateSnapshotsPublishable(dirs datadir.Dirs, chainDB kv.RoDB) error
15841592
if err != nil {
15851593
return fmt.Errorf("failed to read PersistReceipts config: %w", err)
15861594
}
1587-
log.Warn("This installation doesn't persist receipts cache; ignoring .rcache checks")
1595+
log.Warn("[integrity] This installation doesn't persist receipts cache; ignoring .rcache checks")
15881596

15891597
commitmentHistory, _, err = rawdb.ReadDBCommitmentHistoryEnabled(tx)
15901598
if err != nil {
15911599
return fmt.Errorf("failed to read CommitmentHistory config: %w", err)
15921600
}
1593-
log.Warn("This installation doesn't persist commitment history; ignoring commitment history checks")
1601+
log.Warn("[integrity] This installation doesn't persist commitment history; ignoring commitment history checks")
15941602

15951603
return nil
15961604
}); err != nil {

0 commit comments

Comments
 (0)