Skip to content

Commit b09a677

Browse files
authored
Merge branch 'main' into no-transfer-syscall
2 parents ff4c8ff + e3b1eac commit b09a677

17 files changed

Lines changed: 211 additions & 22 deletions

File tree

.github/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,41 @@ Consider setting tighter defaults in the workflow matrix for jobs that are known
217217
be memory- or disk-heavy, rather than working around pressure by adjusting unrelated
218218
constraints like timeouts or GC tuning.
219219

220+
## Checking benchmarks
221+
222+
The purpose of `make test-bench` in CI is to verify that benchmarks compile and
223+
execute at least one iteration — not to produce meaningful performance numbers.
224+
225+
### Why benchmarks are slow by default
226+
227+
Many benchmarks are sized for profiling or comparison work: a single iteration can
228+
take minutes. Go's benchmark runner will execute exactly 1 iteration in that case
229+
(`-benchtime=1x`), so the `ns/op` figure is meaningless and the run just wastes
230+
time.
231+
232+
The fix is to keep benchmark iteration work small enough to be loopable, and use
233+
`testing.Short()` to trim parameter sweeps when all we need is a smoke test:
234+
235+
```go
236+
if testing.Short() {
237+
totalSteps = 10 // instead of 200+
238+
keyCount = 10_000 // instead of 1_000_000
239+
}
240+
```
241+
242+
`make test-bench` passes `-short` so these guards are active in CI.
243+
244+
### Why you cannot parallelize across packages
245+
246+
`go test` forces benchmark packages to run **serially** regardless of the `-p` flag.
247+
The serialization is enforced at the action-graph level in `cmd/go` when `-bench` is
248+
set — each package run is added as a dependency of the previous one. Passing
249+
`-p N` only affects compilation parallelism, not execution order.
250+
251+
The only way to reduce `make test-bench` wall time is to reduce the work done per
252+
benchmark iteration, which is why right-sizing benchmarks (via `testing.Short()`) is
253+
the correct approach rather than parallelizing the runner.
254+
220255
## Local reproducibility
221256

222257
Every CI job should have a local equivalent so developers can pre-check before pushing.

.github/workflows/ci-gate.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ jobs:
8181
secrets: inherit
8282

8383
ci-gate:
84-
name: Required
8584
if: always()
8685
needs:
8786
- lint

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,4 @@ mdbx.lck
123123

124124
# Prevent accidental commit of locally-built binaries
125125
/erigon
126-
126+
.claude/worktrees/

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ test-all: override GO_FLAGS := -timeout $(default_test_timeout) $(GO_FLAGS)
226226
test-all: test-filtered
227227

228228
## test-bench: check the benchmarks compile and run
229-
test-bench: override GO_FLAGS += -run=^$$ -bench=. -benchtime=1x
229+
test-bench: override GO_FLAGS += -run=^$$ -bench=. -benchtime=1x -short -timeout=5m
230230
test-bench:
231231
$(GOTEST)
232232

cl/phase1/core/checkpoint_sync/util.go

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,71 @@ package checkpoint_sync
33
import (
44
"context"
55
"fmt"
6+
"path/filepath"
67

78
"github.com/spf13/afero"
89

910
"github.com/erigontech/erigon/cl/clparams"
1011
"github.com/erigontech/erigon/cl/persistence/genesisdb"
1112
"github.com/erigontech/erigon/cl/phase1/core/state"
13+
"github.com/erigontech/erigon/cl/utils"
14+
"github.com/erigontech/erigon/common/log/v3"
1215
"github.com/erigontech/erigon/db/datadir"
1316
)
1417

1518
// ReadOrFetchLatestBeaconState reads the latest beacon state from disk or fetches it from the network.
19+
// If remote checkpoint sync fails, it falls back to the local head state on disk.
20+
// If no local head state is available, it returns an error.
1621
func ReadOrFetchLatestBeaconState(ctx context.Context, dirs datadir.Dirs, beaconCfg *clparams.BeaconChainConfig, caplinConfig clparams.CaplinConfig, genesisDB genesisdb.GenesisDB) (*state.CachingBeaconState, error) {
17-
var syncer CheckpointSyncer
1822
remoteSync := !caplinConfig.DisabledCheckpointSync && !caplinConfig.IsDevnet()
1923

2024
if remoteSync {
21-
syncer = NewRemoteCheckpointSync(beaconCfg, caplinConfig.NetworkId)
22-
} else {
23-
aferoFs := afero.NewOsFs()
25+
syncer := NewRemoteCheckpointSync(beaconCfg, caplinConfig.NetworkId)
26+
st, err := syncer.GetLatestBeaconState(ctx)
27+
if err == nil {
28+
return st, nil
29+
}
30+
log.Warn("[Checkpoint Sync] Remote checkpoint sync failed, attempting to read local head state", "err", err)
2431

25-
genesisState, err := genesisDB.ReadGenesisState()
26-
if err != nil {
27-
return nil, fmt.Errorf("could not read genesis state: %w", err)
32+
// Fallback: try to read the local head state from disk
33+
localState, localErr := ReadLocalHeadState(dirs, beaconCfg)
34+
if localErr == nil {
35+
log.Info("[Checkpoint Sync] Successfully loaded local head state", "slot", localState.Slot())
36+
return localState, nil
2837
}
29-
syncer = NewLocalCheckpointSyncer(genesisState, afero.NewBasePathFs(aferoFs, dirs.CaplinLatest))
38+
log.Error("[Checkpoint Sync] No local head state available either", "err", localErr)
39+
return nil, fmt.Errorf("remote checkpoint sync failed: %w, and no local head state: %w", err, localErr)
40+
}
41+
42+
// Non-remote sync path (disabled checkpoint sync or devnet)
43+
aferoFs := afero.NewOsFs()
44+
genesisState, err := genesisDB.ReadGenesisState()
45+
if err != nil {
46+
return nil, fmt.Errorf("could not read genesis state: %w", err)
3047
}
48+
syncer := NewLocalCheckpointSyncer(genesisState, afero.NewBasePathFs(aferoFs, dirs.CaplinLatest))
3149
return syncer.GetLatestBeaconState(ctx)
3250
}
51+
52+
// ReadLocalHeadState reads the head state directly from disk without falling back to genesis.
53+
func ReadLocalHeadState(dirs datadir.Dirs, beaconCfg *clparams.BeaconChainConfig) (*state.CachingBeaconState, error) {
54+
statePath := filepath.Join(dirs.CaplinLatest, clparams.LatestStateFileName)
55+
snappyEncoded, err := afero.ReadFile(afero.NewOsFs(), statePath)
56+
if err != nil {
57+
return nil, fmt.Errorf("could not read local head state file: %w", err)
58+
}
59+
decompressed, err := utils.DecompressSnappy(snappyEncoded, false)
60+
if err != nil {
61+
return nil, fmt.Errorf("local head state is corrupt: %w", err)
62+
}
63+
slot, err := utils.ExtractSlotFromSerializedBeaconState(decompressed)
64+
if err != nil {
65+
return nil, fmt.Errorf("could not extract slot from local head state: %w", err)
66+
}
67+
bs := state.New(beaconCfg)
68+
epoch := slot / beaconCfg.SlotsPerEpoch
69+
if err := bs.DecodeSSZ(decompressed, int(beaconCfg.GetCurrentStateVersion(epoch))); err != nil {
70+
return nil, fmt.Errorf("could not decode local head state: %w", err)
71+
}
72+
return bs, nil
73+
}

db/datastruct/btindex/bpstree_bench_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ func BenchmarkBpsTreeSeek(t *testing.B) {
1414
tmp := t.TempDir()
1515
logger := log.New()
1616
keyCount, M := 12_000_000, 256
17+
if testing.Short() {
18+
keyCount = 10_000
19+
}
1720
t.Logf("N: %d, M: %d skip since shard <= %d", keyCount, M, DefaultBtreeStartSkip)
1821
compressFlags := seg.CompressKeys | seg.CompressVals
1922

db/datastruct/btindex/btree_index_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,9 @@ func TestNewBtIndex(t *testing.T) {
429429

430430
func BenchmarkBtIndex_Get(b *testing.B) {
431431
keyCount := 1_000_000
432+
if testing.Short() {
433+
keyCount = 10_000
434+
}
432435
compress := seg.CompressKeys
433436

434437
for _, M := range []uint64{256, 128, 64, 32} {

db/kv/kv_interface.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,9 @@ type TemporalDebugTx interface {
449449
// TraceKey returns stream of <txNum->value_after_txnum_change> for a given key
450450
TraceKey(domain Domain, k []byte, fromTxNum, toTxNum uint64) (stream.U64V, error)
451451

452+
// HistoryKeyTxNumRange returns (key, txNum) pairs for every txNum at which a key changed in [fromTs, toTs)
453+
HistoryKeyTxNumRange(name Domain, fromTs, toTs int, asc order.By, limit int) (it stream.KU64, err error)
454+
452455
DomainFiles(domain ...Domain) VisibleFiles
453456
CurrentDomainVersion(domain Domain) version.Version
454457
TxNumsInFiles(domains ...Domain) (minTxNum uint64)

db/kv/remotedb/kv_remote.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,9 @@ func (tx *tx) GetLatestFromFiles(domain kv.Domain, k []byte, maxTxNum uint64) (v
261261
func (tx *tx) TraceKey(domain kv.Domain, k []byte, fromTxNum, toTxNum uint64) (stream.U64V, error) {
262262
panic("not implemented")
263263
}
264+
func (tx *tx) HistoryKeyTxNumRange(name kv.Domain, fromTs, toTs int, asc order.By, limit int) (stream.KU64, error) {
265+
panic("not implemented")
266+
}
264267
func (tx *tx) IIProgress(domain kv.InvertedIdx) uint64 { panic("not implemented") }
265268
func (tx *tx) RangeLatest(domain kv.Domain, from, to []byte, limit int) (stream.KV, error) {
266269
panic("not implemented")

db/kv/temporal/kv_temporal.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,23 @@ func (tx *RwTx) HistoryRange(name kv.Domain, fromTs, toTs int, asc order.By, lim
604604
return tx.historyRange(name, tx.RwTx, fromTs, toTs, asc, limit)
605605
}
606606

607+
func (tx *tx) historyKeyTxNumRange(name kv.Domain, dbTx kv.Tx, fromTs, toTs int, asc order.By, limit int) (stream.KU64, error) {
608+
it, err := tx.aggtx.HistoryKeyTxNumRange(name, fromTs, toTs, asc, limit, dbTx)
609+
if err != nil {
610+
return nil, err
611+
}
612+
tx.resourcesToClose = append(tx.resourcesToClose, it)
613+
return it, nil
614+
}
615+
616+
func (tx *Tx) HistoryKeyTxNumRange(name kv.Domain, fromTs, toTs int, asc order.By, limit int) (stream.KU64, error) {
617+
return tx.historyKeyTxNumRange(name, tx.Tx, fromTs, toTs, asc, limit)
618+
}
619+
620+
func (tx *RwTx) HistoryKeyTxNumRange(name kv.Domain, fromTs, toTs int, asc order.By, limit int) (stream.KU64, error) {
621+
return tx.historyKeyTxNumRange(name, tx.RwTx, fromTs, toTs, asc, limit)
622+
}
623+
607624
// Write methods
608625

609626
func (tx *tx) DomainPut(domain kv.Domain, k, v []byte, txNum uint64, prevVal []byte) error {

0 commit comments

Comments
 (0)