Skip to content

Commit 5f8be5e

Browse files
cache directory entries to avoid expensive filepath.Glob in index-related ops (#18939)
## Summary - Added `MatchVersionedFile` to search pre-scanned directory entries instead of per-file `filepath.Glob` calls - Updated `Domain.OpenList` to accept `ScanDirsResult` struct instead of individual arrays - Pre-scans directory entries once upfront to avoid repeated filesystem calls when opening dirty files - `CaplinSnapshots.OpenList` now uses `snaptype.IdxFiles` instead of `os.ReadDir` for efficiency - Added test for `MatchVersionedFile` handling seg/idx with different base names (blobsidecars.seg → blocksidecars.idx) - `SnapshotRepo.openDirtyFiles` now uses `MatchVersionedFile` with pre-scanned entries ## Some numbers gnosis has large number of caplin/block files...offline commands/erigon startup had gotten slow because of `filepath.Globs`.. a `integration stage_exec` before/after: ``` real 1m38.648s -> 0m27.946s user 2m39.027s -> 1m26.677s sys 0m14.967s -> 0m2.525s ``` ## In Future PRs - [ ] `fileItemsWithMissedAccessors` in `db/state/dirty_files.go:777` - uses `dir.FileExist` per accessor, could use pre-scanned entries instead - [ ] If `FindFilesWithVersionsByPattern` can be fully replaced, move supported version check into `MatchVersionedFile` - [ ] Let `openFolder`/`openList` in InvertedIndex/Domain/History accept `ScanDirsResult` directly - [ ] Build missed accessors can use `MatchVersionedFile` - [ ] Rename `BuildMissingIndices` to `BuildMissedAccessors` in caplin/RoSnapshots for consistency with rest of codebase - [ ] Snaptype operations: `Index.HasFile`, `SnapType.FileExist`, `ParseFromFile` in `db/snaptype/type.go` - [ ] Block types: body/tx path resolution in `db/snaptype2/block_types.go`
1 parent d455aaf commit 5f8be5e

File tree

13 files changed

+376
-58
lines changed

13 files changed

+376
-58
lines changed

agents.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ Erigon is a high-performance Ethereum execution client with embedded consensus l
5151
## Conventions
5252

5353
Commit messages: prefix with package(s) modified, e.g., `eth, rpc: make trace configs optional`
54+
55+
**Important**: Always run `make lint` after making code changes and before committing. Fix any linter errors before proceeding.

db/snapshotsync/freezeblocks/caplin_snapshots.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,17 @@ func (s *CaplinSnapshots) OpenList(fileNames []string, optimistic bool) error {
161161
defer s.dirtyLock.Unlock()
162162

163163
s.closeWhatNotInList(fileNames)
164+
165+
// Get idx files for efficient index file lookups
166+
idxFiles, err := snaptype.IdxFiles(s.dir)
167+
if err != nil {
168+
return fmt.Errorf("read idx files %s: %w", s.dir, err)
169+
}
170+
dirEntries := make([]string, 0, len(idxFiles))
171+
for _, f := range idxFiles {
172+
dirEntries = append(dirEntries, f.Name())
173+
}
174+
164175
var segmentsMax uint64
165176
var segmentsMaxSet bool
166177
Loop:
@@ -215,7 +226,7 @@ Loop:
215226
// then make segment available even if index open may fail
216227
s.dirty[snaptype.BeaconBlocks.Enum()].Set(sn)
217228
}
218-
if err := sn.OpenIdxIfNeed(s.dir, optimistic); err != nil {
229+
if err := sn.OpenIdxIfNeed(s.dir, optimistic, dirEntries); err != nil {
219230
return err
220231
}
221232
// Only bob sidecars count for progression
@@ -271,7 +282,7 @@ Loop:
271282
// then make segment available even if index open may fail
272283
s.dirty[snaptype.BlobSidecars.Enum()].Set(sn)
273284
}
274-
if err := sn.OpenIdxIfNeed(s.dir, optimistic); err != nil {
285+
if err := sn.OpenIdxIfNeed(s.dir, optimistic, dirEntries); err != nil {
275286
return err
276287
}
277288
}

db/snapshotsync/merger.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ func (m *Merger) mergeSubSegment(
129129
if err = buildIdx(ctx, sn, indexBuilder, m.chainConfig, m.tmpDir, p, m.lvl, m.logger); err != nil {
130130
return
131131
}
132-
err = newDirtySegment.openIdx(snapDir)
132+
err = newDirtySegment.openIdx(snapDir, nil)
133133
if err != nil {
134134
return
135135
}

db/snapshotsync/snapshots.go

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -434,13 +434,13 @@ func (s *DirtySegment) closeAndRemoveFiles() {
434434
}
435435
}
436436

437-
func (s *DirtySegment) OpenIdxIfNeed(dir string, optimistic bool) (err error) {
437+
func (s *DirtySegment) OpenIdxIfNeed(dir string, optimistic bool, dirEntries []string) (err error) {
438438
if len(s.Type().IdxFileNames(s.from, s.to)) == 0 {
439439
return nil
440440
}
441441

442442
if s.refcount.Load() == 0 {
443-
err = s.openIdx(dir)
443+
err = s.openIdx(dir, dirEntries)
444444

445445
if err != nil {
446446
if !errors.Is(err, os.ErrNotExist) {
@@ -456,7 +456,7 @@ func (s *DirtySegment) OpenIdxIfNeed(dir string, optimistic bool) (err error) {
456456
return nil
457457
}
458458

459-
func (s *DirtySegment) openIdx(dir string) (err error) {
459+
func (s *DirtySegment) openIdx(dir string, dirEntries []string) (err error) {
460460
if s.Decompressor == nil {
461461
return nil
462462
}
@@ -469,11 +469,18 @@ func (s *DirtySegment) openIdx(dir string) (err error) {
469469
if s.indexes[i] != nil {
470470
continue
471471
}
472-
fPathMask, err := version.ReplaceVersionWithMask(filepath.Join(dir, fileName))
472+
fPathMask, err := version.ReplaceVersionWithMask(fileName)
473473
if err != nil {
474474
return fmt.Errorf("[open index] can't replace with mask in file %s: %w", fileName, err)
475475
}
476-
fPath, _, ok, err := version.FindFilesWithVersionsByPattern(fPathMask)
476+
477+
var fPath string
478+
var ok bool
479+
if dirEntries != nil {
480+
fPath, _, ok, err = version.MatchVersionedFile(fPathMask, dirEntries, dir)
481+
} else {
482+
fPath, _, ok, err = version.FindFilesWithVersionsByPattern(filepath.Join(dir, fPathMask))
483+
}
477484
if err != nil {
478485
return fmt.Errorf("%w, fileName: %s", err, fileName)
479486
}
@@ -1082,6 +1089,21 @@ func (s *RoSnapshots) openSegments(fileNames []string, open bool, optimistic boo
10821089

10831090
snConfig, _ := snapcfg.KnownCfg(s.cfg.ChainName)
10841091

1092+
// Read full directory listing once for efficient index file lookups
1093+
var dirEntries []string
1094+
if open {
1095+
entries, err := os.ReadDir(s.dir)
1096+
if err != nil && !os.IsNotExist(err) {
1097+
return fmt.Errorf("read dir %s: %w", s.dir, err)
1098+
}
1099+
dirEntries = make([]string, 0, len(entries))
1100+
for _, e := range entries {
1101+
if !e.IsDir() {
1102+
dirEntries = append(dirEntries, e.Name())
1103+
}
1104+
}
1105+
}
1106+
10851107
for _, fName := range fileNames {
10861108
f, isState, ok := snaptype.ParseFileName(s.dir, fName)
10871109
if !ok || isState || snaptype.IsTorrentPartial(f.Ext) {
@@ -1142,7 +1164,7 @@ func (s *RoSnapshots) openSegments(fileNames []string, open bool, optimistic boo
11421164

11431165
if open {
11441166
wg.Go(func() error {
1145-
if err := sn.OpenIdxIfNeed(s.dir, optimistic); err != nil {
1167+
if err := sn.OpenIdxIfNeed(s.dir, optimistic, dirEntries); err != nil {
11461168
return err
11471169
}
11481170
return nil

db/state/aggregator.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,6 @@ func (a *Aggregator) OpenFolder() error {
331331
return nil
332332
}
333333

334-
// TODO: convert this func to `map` or struct instead of 4 return params
335334
func scanDirs(dirs datadir.Dirs) (r *ScanDirsResult, err error) {
336335
r = &ScanDirsResult{}
337336
r.iiFiles, err = filesFromDir(dirs.SnapIdx)
@@ -346,13 +345,18 @@ func scanDirs(dirs datadir.Dirs) (r *ScanDirsResult, err error) {
346345
if err != nil {
347346
return
348347
}
348+
r.accessorFiles, err = filesFromDir(dirs.SnapAccessors)
349+
if err != nil {
350+
return
351+
}
349352
return r, nil
350353
}
351354

352355
type ScanDirsResult struct {
353-
domainFiles []string
354-
historyFiles []string
355-
iiFiles []string
356+
domainFiles []string
357+
historyFiles []string
358+
iiFiles []string
359+
accessorFiles []string
356360
}
357361

358362
func (a *Aggregator) openFolder() error {

db/state/dirty_files.go

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -320,15 +320,15 @@ func deleteMergeFile(dirtyFiles *btree2.BTreeG[*FilesItem], outs []*FilesItem, f
320320
}
321321
}
322322

323-
func (d *Domain) openDirtyFiles() (err error) {
323+
func (d *Domain) openDirtyFiles(dirEntries []string) (err error) {
324324
invalidFileItems := make([]*FilesItem, 0)
325325
invalidFileItemsLock := sync.Mutex{}
326326
d.dirtyFiles.Walk(func(items []*FilesItem) bool {
327327
for _, item := range items {
328328
fromStep, toStep := item.StepRange(d.stepSize)
329329
if item.decompressor == nil {
330-
fPathMask := d.kvFilePathMask(fromStep, toStep)
331-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathMask)
330+
fNameMask := d.kvFileNameMask(fromStep, toStep)
331+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, dirEntries, d.dirs.SnapDomain)
332332
if err != nil {
333333
_, fName := filepath.Split(fPath)
334334
d.logger.Debug("[agg] Domain.openDirtyFiles: FileExist err", "f", fName, "err", err)
@@ -338,7 +338,7 @@ func (d *Domain) openDirtyFiles() (err error) {
338338
continue
339339
}
340340
if !ok {
341-
_, fName := filepath.Split(fPath)
341+
fName := fNameMask
342342
d.logger.Debug("[agg] Domain.openDirtyFiles: file does not exists", "f", fName)
343343
invalidFileItemsLock.Lock()
344344
invalidFileItems = append(invalidFileItems, item)
@@ -367,8 +367,8 @@ func (d *Domain) openDirtyFiles() (err error) {
367367
}
368368

369369
if item.index == nil && d.Accessors.Has(statecfg.AccessorHashMap) {
370-
fPathMask := d.kviAccessorFilePathMask(fromStep, toStep)
371-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathMask)
370+
fNameMask := d.kviAccessorFileNameMask(fromStep, toStep)
371+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, dirEntries, d.dirs.SnapDomain)
372372
if err != nil {
373373
_, fName := filepath.Split(fPath)
374374
d.logger.Warn("[agg] Domain.openDirtyFiles", "err", err, "f", fName)
@@ -386,8 +386,8 @@ func (d *Domain) openDirtyFiles() (err error) {
386386
}
387387
}
388388
if item.bindex == nil && d.Accessors.Has(statecfg.AccessorBTree) {
389-
fPathMask := d.kvBtAccessorFilePathMask(fromStep, toStep)
390-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathMask)
389+
fNameMask := d.kvBtAccessorFileNameMask(fromStep, toStep)
390+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, dirEntries, d.dirs.SnapDomain)
391391
if err != nil {
392392
_, fName := filepath.Split(fPath)
393393
d.logger.Warn("[agg] Domain.openDirtyFiles", "err", err, "f", fName)
@@ -405,8 +405,8 @@ func (d *Domain) openDirtyFiles() (err error) {
405405
}
406406
}
407407
if item.existence == nil && d.Accessors.Has(statecfg.AccessorExistence) {
408-
fPathMask := d.kvExistenceIdxFilePathMask(fromStep, toStep)
409-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathMask)
408+
fNameMask := d.kvExistenceIdxFileNameMask(fromStep, toStep)
409+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, dirEntries, d.dirs.SnapDomain)
410410
if err != nil {
411411
_, fName := filepath.Split(fPath)
412412
d.logger.Warn("[agg] Domain.openDirtyFiles", "err", err, "f", fName)
@@ -435,15 +435,15 @@ func (d *Domain) openDirtyFiles() (err error) {
435435
return nil
436436
}
437437

438-
func (h *History) openDirtyFiles() error {
438+
func (h *History) openDirtyFiles(dataEntries, accessorEntries []string) error {
439439
invalidFilesMu := sync.Mutex{}
440440
invalidFileItems := make([]*FilesItem, 0)
441441
h.dirtyFiles.Walk(func(items []*FilesItem) bool {
442442
for _, item := range items {
443443
fromStep, toStep := item.StepRange(h.stepSize)
444444
if item.decompressor == nil {
445-
fPathMask := h.vFilePathMask(fromStep, toStep)
446-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathMask)
445+
fNameMask := h.vFileNameMask(fromStep, toStep)
446+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, dataEntries, h.dirs.SnapHistory)
447447
if err != nil {
448448
_, fName := filepath.Split(fPath)
449449
h.logger.Debug("[agg] History.openDirtyFiles: FileExist", "f", fName, "err", err)
@@ -453,7 +453,7 @@ func (h *History) openDirtyFiles() error {
453453
continue
454454
}
455455
if !ok {
456-
_, fName := filepath.Split(fPath)
456+
fName := fNameMask
457457
h.logger.Debug("[agg] History.openDirtyFiles: file does not exists", "f", fName)
458458
invalidFilesMu.Lock()
459459
invalidFileItems = append(invalidFileItems, item)
@@ -494,8 +494,8 @@ func (h *History) openDirtyFiles() error {
494494
}
495495

496496
if item.index == nil {
497-
fPathMask := h.vAccessorFilePathMask(fromStep, toStep)
498-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathMask)
497+
fNameMask := h.vAccessorFileNameMask(fromStep, toStep)
498+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, accessorEntries, h.dirs.SnapAccessors)
499499
if err != nil {
500500
_, fName := filepath.Split(fPath)
501501
h.logger.Warn("[agg] History.openDirtyFiles", "err", err, "f", fName)
@@ -523,26 +523,26 @@ func (h *History) openDirtyFiles() error {
523523
return nil
524524
}
525525

526-
func (ii *InvertedIndex) openDirtyFiles() error {
526+
func (ii *InvertedIndex) openDirtyFiles(dataEntries, accessorEntries []string) error {
527527
var invalidFileItems []*FilesItem
528528
invalidFileItemsLock := sync.Mutex{}
529529
ii.dirtyFiles.Walk(func(items []*FilesItem) bool {
530530
for _, item := range items {
531531
fromStep, toStep := item.StepRange(ii.stepSize)
532532
if item.decompressor == nil {
533-
fPathPattern := ii.efFilePathMask(fromStep, toStep)
534-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathPattern)
533+
fNameMask := ii.efFileNameMask(fromStep, toStep)
534+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, dataEntries, ii.dirs.SnapIdx)
535535
if err != nil {
536536
_, fName := filepath.Split(fPath)
537-
ii.logger.Debug("[agg] InvertedIndex.openDirtyFiles: FindFilesWithVersionsByPattern error", "f", fName, "err", err)
537+
ii.logger.Debug("[agg] InvertedIndex.openDirtyFiles: MatchVersionedFile error", "f", fName, "err", err)
538538
invalidFileItemsLock.Lock()
539539
invalidFileItems = append(invalidFileItems, item)
540540
invalidFileItemsLock.Unlock()
541541
continue
542542
}
543543

544544
if !ok {
545-
_, fName := filepath.Split(fPath)
545+
fName := fNameMask
546546
ii.logger.Debug("[agg] InvertedIndex.openDirtyFiles: file does not exists", "f", fName)
547547
invalidFileItemsLock.Lock()
548548
invalidFileItems = append(invalidFileItems, item)
@@ -571,8 +571,8 @@ func (ii *InvertedIndex) openDirtyFiles() error {
571571
}
572572

573573
if item.index == nil {
574-
fPathPattern := ii.efAccessorFilePathMask(fromStep, toStep)
575-
fPath, fileVer, ok, err := version.FindFilesWithVersionsByPattern(fPathPattern)
574+
fNameMask := ii.efAccessorFileNameMask(fromStep, toStep)
575+
fPath, fileVer, ok, err := version.MatchVersionedFile(fNameMask, accessorEntries, ii.dirs.SnapAccessors)
576576
if err != nil {
577577
_, fName := filepath.Split(fPath)
578578
ii.logger.Warn("[agg] InvertedIndex.openDirtyFiles", "err", err, "f", fName)

db/state/domain.go

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,19 @@ func (d *Domain) kvBtAccessorFilePathMask(fromStep, toStep kv.Step) string {
166166
return filepath.Join(d.dirs.SnapDomain, fmt.Sprintf("*-%s.%d-%d.bt", d.FilenameBase, fromStep, toStep))
167167
}
168168

169+
func (d *Domain) kvFileNameMask(fromStep, toStep kv.Step) string {
170+
return fmt.Sprintf("*-%s.%d-%d.kv", d.FilenameBase, fromStep, toStep)
171+
}
172+
func (d *Domain) kviAccessorFileNameMask(fromStep, toStep kv.Step) string {
173+
return fmt.Sprintf("*-%s.%d-%d.kvi", d.FilenameBase, fromStep, toStep)
174+
}
175+
func (d *Domain) kvExistenceIdxFileNameMask(fromStep, toStep kv.Step) string {
176+
return fmt.Sprintf("*-%s.%d-%d.kvei", d.FilenameBase, fromStep, toStep)
177+
}
178+
func (d *Domain) kvBtAccessorFileNameMask(fromStep, toStep kv.Step) string {
179+
return fmt.Sprintf("*-%s.%d-%d.bt", d.FilenameBase, fromStep, toStep)
180+
}
181+
169182
// maxStepInDB - return the latest available step in db (at-least 1 value in such step)
170183
func (d *Domain) maxStepInDB(tx kv.Tx) (lstInDb kv.Step) {
171184
lstIdx, _ := kv.LastKey(tx, d.History.KeysTable)
@@ -214,14 +227,14 @@ func (dt *DomainRoTx) NewWriter() *DomainBufferedWriter { return dt.newWriter(dt
214227
// It's ok if some files was open earlier.
215228
// If some file already open: noop.
216229
// If some file already open but not in provided list: close and remove from `files` field.
217-
func (d *Domain) OpenList(idxFiles, histFiles, domainFiles []string) error {
218-
if err := d.History.openList(idxFiles, histFiles); err != nil {
230+
func (d *Domain) OpenList(scanResult ScanDirsResult) error {
231+
if err := d.History.openList(scanResult.iiFiles, scanResult.historyFiles, scanResult.accessorFiles); err != nil {
219232
return err
220233
}
221234

222-
d.closeWhatNotInList(domainFiles)
223-
d.scanDirtyFiles(domainFiles)
224-
if err := d.openDirtyFiles(); err != nil {
235+
d.closeWhatNotInList(scanResult.domainFiles)
236+
d.scanDirtyFiles(scanResult.domainFiles)
237+
if err := d.openDirtyFiles(scanResult.domainFiles); err != nil {
225238
return fmt.Errorf("Domain(%s).openList: %w", d.FilenameBase, err)
226239
}
227240
d.protectFromHistoryFilesAheadOfDomainFiles()
@@ -239,11 +252,7 @@ func (d *Domain) openFolder(r *ScanDirsResult) error {
239252
if d.Disable {
240253
return nil
241254
}
242-
243-
if err := d.OpenList(r.iiFiles, r.historyFiles, r.domainFiles); err != nil {
244-
return err
245-
}
246-
return nil
255+
return d.OpenList(*r)
247256
}
248257

249258
func (d *Domain) closeFilesAfterStep(lowerBound kv.Step) {

db/state/history.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,10 @@ func (h *History) vFilePathMask(fromStep, toStep kv.Step) string {
119119
return filepath.Join(h.dirs.SnapHistory, h.vFileNameMask(fromStep, toStep))
120120
}
121121
func (h *History) vAccessorFilePathMask(fromStep, toStep kv.Step) string {
122-
return filepath.Join(h.dirs.SnapAccessors, fmt.Sprintf("*-%s.%d-%d.vi", h.FilenameBase, fromStep, toStep))
122+
return filepath.Join(h.dirs.SnapAccessors, h.vAccessorFileNameMask(fromStep, toStep))
123+
}
124+
func (h *History) vAccessorFileNameMask(fromStep, toStep kv.Step) string {
125+
return fmt.Sprintf("*-%s.%d-%d.vi", h.FilenameBase, fromStep, toStep)
123126
}
124127

125128
func (h *History) openHashMapAccessor(fPath string) (*recsplit.Index, error) {
@@ -134,21 +137,21 @@ func (h *History) openHashMapAccessor(fPath string) (*recsplit.Index, error) {
134137
// It's ok if some files was open earlier.
135138
// If some file already open: noop.
136139
// If some file already open but not in provided list: close and remove from `files` field.
137-
func (h *History) openList(idxFiles, histNames []string) error {
138-
if err := h.InvertedIndex.openList(idxFiles); err != nil {
140+
func (h *History) openList(idxFiles, histNames, accessorFiles []string) error {
141+
if err := h.InvertedIndex.openList(idxFiles, accessorFiles); err != nil {
139142
return err
140143
}
141144

142145
h.closeWhatNotInList(histNames)
143146
h.scanDirtyFiles(histNames)
144-
if err := h.openDirtyFiles(); err != nil {
147+
if err := h.openDirtyFiles(histNames, accessorFiles); err != nil {
145148
return fmt.Errorf("History(%s).openList: %w", h.FilenameBase, err)
146149
}
147150
return nil
148151
}
149152

150153
func (h *History) openFolder(scanDirsRes *ScanDirsResult) error {
151-
return h.openList(scanDirsRes.iiFiles, scanDirsRes.historyFiles)
154+
return h.openList(scanDirsRes.iiFiles, scanDirsRes.historyFiles, scanDirsRes.accessorFiles)
152155
}
153156

154157
func (h *History) scanDirtyFiles(fileNames []string) {

0 commit comments

Comments
 (0)