Skip to content

Commit 1f87c16

Browse files
Commitment: do not use cell getter function to fold the row (#13509)
Refactors how we collect trie branch updates. - encode whole row without encoding update and merging it with existing value - access to row in grid directly, avoiding logic multiplication and need of lambda returning cell - simplify folding code: looks and reads much more straightforward --------- Co-authored-by: alex.sharov <AskAlexSharov@gmail.com>
1 parent 92dc120 commit 1f87c16

10 files changed

+397
-461
lines changed

erigon-lib/commitment/commitment.go

Lines changed: 94 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,21 @@ import (
2222
"encoding/binary"
2323
"errors"
2424
"fmt"
25-
"github.com/erigontech/erigon-lib/types/accounts"
2625
"math/bits"
2726
"sort"
2827
"strings"
2928
"unsafe"
3029

31-
"github.com/holiman/uint256"
32-
3330
"github.com/google/btree"
31+
"github.com/holiman/uint256"
3432
"golang.org/x/crypto/sha3"
3533

36-
"github.com/erigontech/erigon-lib/common"
3734
"github.com/erigontech/erigon-lib/common/cryptozerocopy"
3835
"github.com/erigontech/erigon-lib/common/length"
3936
"github.com/erigontech/erigon-lib/etl"
4037
"github.com/erigontech/erigon-lib/log/v3"
4138
"github.com/erigontech/erigon-lib/metrics"
39+
"github.com/erigontech/erigon-lib/types/accounts"
4240
)
4341

4442
var (
@@ -136,14 +134,17 @@ func InitializeTrieAndUpdates(tv TrieVariant, mode Mode, tmpdir string) (Trie, *
136134
fallthrough
137135
default:
138136

139-
trie := NewHexPatriciaHashed(length.Addr, nil, tmpdir)
137+
trie := NewHexPatriciaHashed(length.Addr, nil)
140138
tree := NewUpdates(mode, tmpdir, KeyToHexNibbleHash)
141139
return trie, tree
142140
}
143141
}
144142

143+
// cellFields is a bitmask of fields presented in the cell for encoding
145144
type cellFields uint8
146145

146+
func (c cellFields) Has(field cellFields) bool { return c&field != 0 }
147+
147148
const (
148149
fieldExtension cellFields = 1
149150
fieldAccountAddr cellFields = 2
@@ -154,19 +155,19 @@ const (
154155

155156
func (p cellFields) String() string {
156157
var sb strings.Builder
157-
if p&fieldExtension != 0 {
158+
if p.Has(fieldExtension) {
158159
sb.WriteString("DownHash")
159160
}
160-
if p&fieldAccountAddr != 0 {
161+
if p.Has(fieldAccountAddr) {
161162
sb.WriteString("+AccountPlain")
162163
}
163-
if p&fieldStorageAddr != 0 {
164+
if p.Has(fieldStorageAddr) {
164165
sb.WriteString("+StoragePlain")
165166
}
166-
if p&fieldHash != 0 {
167+
if p.Has(fieldHash) {
167168
sb.WriteString("+Hash")
168169
}
169-
if p&fieldStateHash != 0 {
170+
if p.Has(fieldStateHash) {
170171
sb.WriteString("+LeafHash")
171172
}
172173
return sb.String()
@@ -176,110 +177,110 @@ type BranchEncoder struct {
176177
buf *bytes.Buffer
177178
bitmapBuf [binary.MaxVarintLen64]byte
178179
merger *BranchMerger
179-
updates *etl.Collector
180-
tmpdir string
181180
}
182181

183-
func NewBranchEncoder(sz uint64, tmpdir string) *BranchEncoder {
184-
be := &BranchEncoder{
182+
func NewBranchEncoder(sz uint64) *BranchEncoder {
183+
return &BranchEncoder{
185184
buf: bytes.NewBuffer(make([]byte, sz)),
186-
tmpdir: tmpdir,
187185
merger: NewHexBranchMerger(sz / 2),
188186
}
189-
//be.initCollector()
190-
return be
191187
}
192188

193-
func (be *BranchEncoder) initCollector() {
194-
if be.updates != nil {
195-
be.updates.Close()
189+
func (be *BranchEncoder) putUvarAndVal(size uint64, val []byte) error {
190+
n := binary.PutUvarint(be.bitmapBuf[:], size)
191+
if _, err := be.buf.Write(be.bitmapBuf[:n]); err != nil {
192+
return err
196193
}
197-
be.updates = etl.NewCollector("commitment.BranchEncoder", be.tmpdir, etl.NewOldestEntryBuffer(etl.BufferOptimalSize/4), log.Root().New("branch-encoder"))
198-
be.updates.LogLvl(log.LvlDebug)
199-
be.updates.SortAndFlushInBackground(true)
194+
if _, err := be.buf.Write(val); err != nil {
195+
return err
196+
}
197+
return nil
200198
}
201199

202-
func (be *BranchEncoder) Load(pc PatriciaContext, args etl.TransformArgs) error {
203-
// do not collect them at least now. Write them at CollectUpdate into pc
204-
if be.updates == nil {
205-
return nil
200+
func (cell *cell) EncodeInto(be *BranchEncoder) error {
201+
var fields cellFields
202+
if cell.extLen > 0 && cell.storageAddrLen == 0 {
203+
fields |= fieldExtension
206204
}
207-
208-
if err := be.updates.Load(nil, "", func(prefix, update []byte, table etl.CurrentTableReader, next etl.LoadNextFunc) error {
209-
stateValue, stateStep, err := pc.Branch(prefix)
210-
if err != nil {
205+
if cell.accountAddrLen > 0 {
206+
fields |= fieldAccountAddr
207+
}
208+
if cell.storageAddrLen > 0 {
209+
fields |= fieldStorageAddr
210+
}
211+
if cell.hashLen > 0 {
212+
fields |= fieldHash
213+
}
214+
if cell.stateHashLen == 32 && (cell.accountAddrLen > 0 || cell.storageAddrLen > 0) {
215+
fields |= fieldStateHash
216+
}
217+
if err := be.buf.WriteByte(byte(fields)); err != nil {
218+
return err
219+
}
220+
if fields.Has(fieldExtension) {
221+
if err := be.putUvarAndVal(uint64(cell.extLen), cell.extension[:cell.extLen]); err != nil {
211222
return err
212223
}
213-
214-
cp, cu := common.Copy(prefix), common.Copy(update) // has to copy :(
215-
if err = pc.PutBranch(cp, cu, stateValue, stateStep); err != nil {
224+
}
225+
if fields.Has(fieldAccountAddr) {
226+
if err := be.putUvarAndVal(uint64(cell.accountAddrLen), cell.accountAddr[:cell.accountAddrLen]); err != nil {
216227
return err
217228
}
218-
mxTrieBranchesUpdated.Inc()
219-
return nil
220-
}, args); err != nil {
221-
return err
222-
}
223-
be.initCollector()
224-
return nil
225-
}
226-
227-
func (be *BranchEncoder) CollectUpdate(
228-
ctx PatriciaContext,
229-
prefix []byte,
230-
bitmap, touchMap, afterMap uint16,
231-
readCell func(nibble int, skip bool) (*cell, error),
232-
) (lastNibble int, err error) {
233-
234-
prev, prevStep, err := ctx.Branch(prefix)
235-
if err != nil {
236-
return 0, err
237-
}
238-
update, lastNibble, err := be.EncodeBranch(bitmap, touchMap, afterMap, readCell)
239-
if err != nil {
240-
return 0, err
241229
}
242-
243-
if len(prev) > 0 {
244-
if bytes.Equal(prev, update) {
245-
//fmt.Printf("skip collectBranchUpdate [%x]\n", prefix)
246-
return lastNibble, nil // do not write the same data for prefix
230+
if fields.Has(fieldStorageAddr) {
231+
if err := be.putUvarAndVal(uint64(cell.storageAddrLen), cell.storageAddr[:cell.storageAddrLen]); err != nil {
232+
return err
247233
}
248-
update, err = be.merger.Merge(prev, update)
249-
if err != nil {
250-
return 0, err
234+
}
235+
if fields.Has(fieldHash) {
236+
if err := be.putUvarAndVal(uint64(cell.hashLen), cell.hash[:cell.hashLen]); err != nil {
237+
return err
251238
}
252239
}
253-
//fmt.Printf("\ncollectBranchUpdate [%x] -> %s\n", prefix, BranchData(update).String())
254-
// has to copy :(
255-
if err = ctx.PutBranch(common.Copy(prefix), common.Copy(update), prev, prevStep); err != nil {
256-
return 0, err
240+
if fields.Has(fieldStateHash) {
241+
if err := be.putUvarAndVal(uint64(cell.stateHashLen), cell.stateHash[:cell.stateHashLen]); err != nil {
242+
return err
243+
}
257244
}
258-
return lastNibble, nil
245+
return nil
259246
}
260247

261-
func (be *BranchEncoder) putUvarAndVal(size uint64, val []byte) error {
262-
n := binary.PutUvarint(be.bitmapBuf[:], size)
263-
if _, err := be.buf.Write(be.bitmapBuf[:n]); err != nil {
264-
return err
248+
// EncodeDelete encodes deleted branch with given touchMap.
249+
// Returned slice is valid until next call to encodeMaps/Reset()
250+
func (be *BranchEncoder) EncodeDelete(tm uint16) ([]byte, error) {
251+
if err := be.encodeMaps(tm, 0); err != nil {
252+
return nil, err
265253
}
266-
if _, err := be.buf.Write(val); err != nil {
254+
return be.EncodedBranch(), nil
255+
}
256+
257+
// Each branch begins with 4 bytes bitmap (touchMap, afterMap).
258+
// encodeMaps resets be.buf and encodes them into be.buf
259+
func (be *BranchEncoder) encodeMaps(touchMap, afterMap uint16) error {
260+
binary.BigEndian.PutUint16(be.bitmapBuf[:], touchMap)
261+
binary.BigEndian.PutUint16(be.bitmapBuf[2:], afterMap)
262+
263+
be.buf.Reset()
264+
265+
if _, err := be.buf.Write(be.bitmapBuf[:4]); err != nil {
266+
be.buf.Reset()
267267
return err
268268
}
269269
return nil
270270
}
271271

272+
// Cells in branch comes one by one without mentionting the nibble
273+
func (be *BranchEncoder) encodeCell(c *cell) error { return c.EncodeInto(be) }
274+
275+
// Returned slice is valid until next call to encodeMaps/be.Reset()
276+
func (be *BranchEncoder) EncodedBranch() []byte { return be.buf.Bytes() }
277+
278+
func (be *BranchEncoder) Reset() { be.buf.Reset() }
279+
272280
// Encoded result should be copied before next call to EncodeBranch, underlying slice is reused
281+
// DEPRECATED
273282
func (be *BranchEncoder) EncodeBranch(bitmap, touchMap, afterMap uint16, readCell func(nibble int, skip bool) (*cell, error)) (BranchData, int, error) {
274-
be.buf.Reset()
275-
276-
var encoded [2]byte
277-
binary.BigEndian.PutUint16(encoded[:], touchMap)
278-
if _, err := be.buf.Write(encoded[:]); err != nil {
279-
return nil, 0, err
280-
}
281-
binary.BigEndian.PutUint16(encoded[:], afterMap)
282-
if _, err := be.buf.Write(encoded[:]); err != nil {
283+
if err := be.encodeMaps(touchMap, afterMap); err != nil {
283284
return nil, 0, err
284285
}
285286

@@ -300,59 +301,16 @@ func (be *BranchEncoder) EncodeBranch(bitmap, touchMap, afterMap uint16, readCel
300301
}
301302

302303
if bitmap&bit != 0 {
303-
var fields cellFields
304-
if cell.extLen > 0 && cell.storageAddrLen == 0 {
305-
fields |= fieldExtension
306-
}
307-
if cell.accountAddrLen > 0 {
308-
fields |= fieldAccountAddr
309-
}
310-
if cell.storageAddrLen > 0 {
311-
fields |= fieldStorageAddr
312-
}
313-
if cell.hashLen > 0 {
314-
fields |= fieldHash
315-
}
316-
if cell.stateHashLen == 32 && (cell.accountAddrLen > 0 || cell.storageAddrLen > 0) {
317-
fields |= fieldStateHash
318-
}
319-
if err := be.buf.WriteByte(byte(fields)); err != nil {
304+
if err := cell.EncodeInto(be); err != nil {
320305
return nil, 0, err
321306
}
322-
if fields&fieldExtension != 0 {
323-
if err := be.putUvarAndVal(uint64(cell.extLen), cell.extension[:cell.extLen]); err != nil {
324-
return nil, 0, err
325-
}
326-
}
327-
if fields&fieldAccountAddr != 0 {
328-
if err := be.putUvarAndVal(uint64(cell.accountAddrLen), cell.accountAddr[:cell.accountAddrLen]); err != nil {
329-
return nil, 0, err
330-
}
331-
}
332-
if fields&fieldStorageAddr != 0 {
333-
if err := be.putUvarAndVal(uint64(cell.storageAddrLen), cell.storageAddr[:cell.storageAddrLen]); err != nil {
334-
return nil, 0, err
335-
}
336-
}
337-
if fields&fieldHash != 0 {
338-
if err := be.putUvarAndVal(uint64(cell.hashLen), cell.hash[:cell.hashLen]); err != nil {
339-
return nil, 0, err
340-
}
341-
}
342-
if fields&fieldStateHash != 0 {
343-
if err := be.putUvarAndVal(uint64(cell.stateHashLen), cell.stateHash[:cell.stateHashLen]); err != nil {
344-
return nil, 0, err
345-
}
346-
}
347307
}
348308
bitset ^= bit
349309
}
350310
//fmt.Printf("EncodeBranch [%x] size: %d\n", be.buf.Bytes(), be.buf.Len())
351-
return be.buf.Bytes(), lastNibble, nil
311+
return be.EncodedBranch(), lastNibble, nil
352312
}
353313

354-
func RetrieveCellNoop(nibble int, skip bool) (*cell, error) { return nil, nil }
355-
356314
type BranchData []byte
357315

358316
func (branchData BranchData) String() string {
@@ -379,27 +337,7 @@ func (branchData BranchData) String() string {
379337
// This is used for test output, so ok to panic
380338
panic(err)
381339
}
382-
sb.WriteString("{")
383-
var comma string
384-
if cell.hashedExtLen > 0 {
385-
fmt.Fprintf(&sb, "hashedExtension=[%x]", cell.hashedExtension[:cell.hashedExtLen])
386-
comma = ","
387-
}
388-
if cell.accountAddrLen > 0 {
389-
fmt.Fprintf(&sb, "%saccountAddr=[%x]", comma, cell.accountAddr[:cell.accountAddrLen])
390-
comma = ","
391-
}
392-
if cell.storageAddrLen > 0 {
393-
fmt.Fprintf(&sb, "%sstorageAddr=[%x]", comma, cell.storageAddr[:cell.storageAddrLen])
394-
comma = ","
395-
}
396-
if cell.hashLen > 0 {
397-
fmt.Fprintf(&sb, "%shash=[%x]", comma, cell.hash[:cell.hashLen])
398-
}
399-
if cell.stateHashLen > 0 {
400-
fmt.Fprintf(&sb, "%sleafHash=[%x]", comma, cell.stateHash[:cell.stateHashLen])
401-
}
402-
sb.WriteString("}\n")
340+
sb.WriteString(cell.String())
403341
}
404342
bitset ^= bit
405343
}
@@ -425,7 +363,7 @@ func (branchData BranchData) ReplacePlainKeys(newData []byte, fn func(key []byte
425363
fields := cellFields(branchData[pos])
426364
newData = append(newData, byte(fields))
427365
pos++
428-
if fields&fieldExtension != 0 {
366+
if fields.Has(fieldExtension) {
429367
l, n := binary.Uvarint(branchData[pos:])
430368
if n == 0 {
431369
return nil, errors.New("replacePlainKeys buffer too small for hashedKey len")
@@ -442,7 +380,7 @@ func (branchData BranchData) ReplacePlainKeys(newData []byte, fn func(key []byte
442380
pos += int(l)
443381
}
444382
}
445-
if fields&fieldAccountAddr != 0 {
383+
if fields.Has(fieldAccountAddr) {
446384
l, n := binary.Uvarint(branchData[pos:])
447385
if n == 0 {
448386
return nil, errors.New("replacePlainKeys buffer too small for accountAddr len")
@@ -475,7 +413,7 @@ func (branchData BranchData) ReplacePlainKeys(newData []byte, fn func(key []byte
475413
newData = append(newData, newKey...)
476414
}
477415
}
478-
if fields&fieldStorageAddr != 0 {
416+
if fields.Has(fieldStorageAddr) {
479417
l, n := binary.Uvarint(branchData[pos:])
480418
if n == 0 {
481419
return nil, errors.New("replacePlainKeys buffer too small for storageAddr len")
@@ -508,7 +446,7 @@ func (branchData BranchData) ReplacePlainKeys(newData []byte, fn func(key []byte
508446
newData = append(newData, newKey...)
509447
}
510448
}
511-
if fields&fieldHash != 0 {
449+
if fields.Has(fieldHash) {
512450
l, n := binary.Uvarint(branchData[pos:])
513451
if n == 0 {
514452
return nil, errors.New("replacePlainKeys buffer too small for hash len")
@@ -525,7 +463,7 @@ func (branchData BranchData) ReplacePlainKeys(newData []byte, fn func(key []byte
525463
pos += int(l)
526464
}
527465
}
528-
if fields&fieldStateHash != 0 {
466+
if fields.Has(fieldStateHash) {
529467
l, n := binary.Uvarint(branchData[pos:])
530468
if n == 0 {
531469
return nil, errors.New("replacePlainKeys buffer too small for acLeaf hash len")
@@ -865,7 +803,7 @@ func DecodeBranchAndCollectStat(key, branch []byte, tv TrieVariant) *BranchStat
865803
if c == nil {
866804
continue
867805
}
868-
enc := uint64(len(c.Encode()))
806+
enc := uint64(len(c.EncodeRoot()))
869807
stat.MinCellSize = min(stat.MinCellSize, enc)
870808
stat.MaxCellSize = max(stat.MaxCellSize, enc)
871809
switch {

0 commit comments

Comments
 (0)