Skip to content

Commit 1344a4e

Browse files
[3.4] seg: show dict size and mem in seg ls output (cherry-pick) (#20792)
Cherry-pick of #20790 to release/3.4.
1 parent 7c3a94c commit 1344a4e

5 files changed

Lines changed: 60 additions & 10 deletions

File tree

db/seg/decompress.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,8 +509,40 @@ func (d *Decompressor) DataHandle() unsafe.Pointer {
509509
}
510510
func (d *Decompressor) SerializedDictSize() uint64 { return d.serializedDictSize }
511511
func (d *Decompressor) SerializedLenSize() uint64 { return d.lenDictSize }
512+
func (d *Decompressor) SerializedTotalDictSize() uint64 { return d.serializedDictSize + d.lenDictSize }
512513
func (d *Decompressor) DictWords() int { return d.dictWords }
513514
func (d *Decompressor) DictLens() int { return d.dictLens }
515+
516+
// DictMemSize returns the in-memory size of the decoded Huffman table structures
517+
// (arena-allocated codeword/table/slot slabs). Pattern bytes are subslices of the
518+
// mmap'd file data and are not included.
519+
func (d *Decompressor) DictMemSize() uint64 {
520+
var total uint64
521+
if d.patArena != nil {
522+
total += uint64(cap(d.patArena.codewords)) * uint64(unsafe.Sizeof(codeword{}))
523+
total += uint64(cap(d.patArena.tables)) * uint64(unsafe.Sizeof(patternTable{}))
524+
total += uint64(cap(d.patArena.slots)) * uint64(unsafe.Sizeof((*codeword)(nil)))
525+
}
526+
if d.posArena != nil {
527+
total += uint64(cap(d.posArena.tables)) * uint64(unsafe.Sizeof(posTable{}))
528+
total += uint64(cap(d.posArena.entriesArr)) * uint64(unsafe.Sizeof(posEntry{}))
529+
total += uint64(cap(d.posArena.ptrsArr)) * uint64(unsafe.Sizeof((*posTable)(nil)))
530+
}
531+
return total
532+
}
533+
534+
// Stats accumulates snapshot segment stats for summary logging.
535+
type Stats struct {
536+
Words uint64
537+
Dict uint64
538+
DictMem uint64
539+
}
540+
541+
func (s *Stats) Add(d *Decompressor) {
542+
s.Words += uint64(d.Count())
543+
s.Dict += d.SerializedTotalDictSize()
544+
s.DictMem += d.DictMemSize()
545+
}
514546
func (d *Decompressor) CompressedPageValuesCount() int { return int(d.compPageValuesCount) }
515547
func (d *Decompressor) CompressionFormatVersion() uint8 { return d.version }
516548

db/snapshotsync/caplin_state_snapshots.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,17 @@ func (s *CaplinStateSnapshots) LS() {
215215
view := s.View()
216216
defer view.Close()
217217

218+
var stats seg.Stats
218219
for _, roTx := range view.roTxs {
219220
if roTx != nil {
220-
for _, seg := range roTx.Segments {
221-
s.logger.Info("[agg] ", "f", seg.src.filePath, "words", seg.src.Decompressor.Count())
221+
for _, sn := range roTx.Segments {
222+
d := sn.src.Decompressor
223+
s.logger.Info("[agg] ", "f", d.FileName(), "words", d.Count(), "dictOnDisk", common.ByteCount(d.SerializedTotalDictSize()), "dictMem", common.ByteCount(d.DictMemSize()))
224+
stats.Add(d)
222225
}
223226
}
224227
}
228+
s.logger.Info("[agg] total", "words", stats.Words, "dictOnDisk", common.ByteCount(stats.Dict), "dictMem", common.ByteCount(stats.DictMem))
225229
}
226230

227231
func (s *CaplinStateSnapshots) SegFileNames(from, to uint64) []string {

db/snapshotsync/freezeblocks/caplin_snapshots.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,22 @@ func (s *CaplinSnapshots) LS() {
109109
view := s.View()
110110
defer view.Close()
111111

112+
var stats seg.Stats
113+
lsSeg := func(d *seg.Decompressor) {
114+
log.Info("[agg] ", "f", d.FileName(), "words", d.Count(), "dictOnDisk", common.ByteCount(d.SerializedTotalDictSize()), "dictMem", common.ByteCount(d.DictMemSize()))
115+
stats.Add(d)
116+
}
112117
if view.BeaconBlockRotx != nil {
113-
for _, seg := range view.BeaconBlockRotx.Segments {
114-
log.Info("[agg] ", "f", seg.Src().Decompressor.FileName(), "words", seg.Src().Decompressor.Count())
118+
for _, sn := range view.BeaconBlockRotx.Segments {
119+
lsSeg(sn.Src().Decompressor)
115120
}
116121
}
117122
if view.BlobSidecarRotx != nil {
118-
for _, seg := range view.BlobSidecarRotx.Segments {
119-
log.Info("[agg] ", "f", seg.Src().Decompressor.FileName(), "words", seg.Src().Decompressor.Count())
123+
for _, sn := range view.BlobSidecarRotx.Segments {
124+
lsSeg(sn.Src().Decompressor)
120125
}
121126
}
127+
log.Info("[agg] total", "words", stats.Words, "dictOnDisk", common.ByteCount(stats.Dict), "dictMem", common.ByteCount(stats.DictMem))
122128
}
123129

124130
func (s *CaplinSnapshots) SegFileNames(from, to uint64) []string {

db/snapshotsync/snapshots.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -967,14 +967,18 @@ func (s *RoSnapshots) Ls() {
967967
view := s.View()
968968
defer view.Close()
969969

970+
var stats seg.Stats
970971
for _, t := range s.enums {
971-
for _, seg := range s.visible[t] {
972-
if seg.src == nil || seg.src.Decompressor == nil {
972+
for _, sn := range s.visible[t] {
973+
if sn.src == nil || sn.src.Decompressor == nil {
973974
continue
974975
}
975-
log.Info("[snapshots] ", "f", seg.src.Decompressor.FileName(), "count", seg.src.Decompressor.Count())
976+
d := sn.src.Decompressor
977+
log.Info("[snapshots] ", "f", d.FileName(), "words", d.Count(), "dictOnDisk", common.ByteCount(d.SerializedTotalDictSize()), "dictMem", common.ByteCount(d.DictMemSize()))
978+
stats.Add(d)
976979
}
977980
}
981+
log.Info("[snapshots] total", "words", stats.Words, "dictOnDisk", common.ByteCount(stats.Dict), "dictMem", common.ByteCount(stats.DictMem))
978982
}
979983

980984
func (s *RoSnapshots) Files() (list []string) {

db/state/aggregator.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import (
5050
"github.com/erigontech/erigon/db/kv/order"
5151
"github.com/erigontech/erigon/db/kv/rawdbv3"
5252
"github.com/erigontech/erigon/db/kv/stream"
53+
"github.com/erigontech/erigon/db/seg"
5354
"github.com/erigontech/erigon/db/state/changeset"
5455
"github.com/erigontech/erigon/db/state/statecfg"
5556
"github.com/erigontech/erigon/db/version"
@@ -669,13 +670,15 @@ func (a *Aggregator) Files() []string {
669670
return ac.AllFiles().Fullpaths()
670671
}
671672
func (a *Aggregator) LS() {
673+
var stats seg.Stats
672674
doLS := func(dirtyFiles *btree.BTreeG[*FilesItem]) {
673675
dirtyFiles.Walk(func(items []*FilesItem) bool {
674676
for _, item := range items {
675677
if item.decompressor == nil {
676678
continue
677679
}
678-
a.logger.Info("[agg] ", "f", item.decompressor.FileName(), "words", item.decompressor.Count())
680+
a.logger.Info("[agg] ", "f", item.decompressor.FileName(), "words", item.decompressor.Count(), "dictOnDisk", common.ByteCount(item.decompressor.SerializedTotalDictSize()), "dictMem", common.ByteCount(item.decompressor.DictMemSize()))
681+
stats.Add(item.decompressor)
679682
}
680683
return true
681684
})
@@ -691,6 +694,7 @@ func (a *Aggregator) LS() {
691694
for _, d := range a.iis {
692695
doLS(d.dirtyFiles)
693696
}
697+
a.logger.Info("[agg] total", "words", stats.Words, "dictOnDisk", common.ByteCount(stats.Dict), "dictMem", common.ByteCount(stats.DictMem))
694698
}
695699

696700
func (a *Aggregator) WaitForBuildAndMerge(ctx context.Context) chan struct{} {

0 commit comments

Comments
 (0)