Skip to content

Commit de6e682

Browse files
author
info@weblogix.biz
committed
Merge remote-tracking branch 'origin/main' into alex/ef_reset_34
2 parents 8869784 + 0ec4079 commit de6e682

File tree

23 files changed

+207
-140
lines changed

23 files changed

+207
-140
lines changed

db/recsplit/eliasfano16/elias_fano.go

Lines changed: 66 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"unsafe"
2626

2727
"github.com/c2h5oh/datasize"
28+
2829
"github.com/erigontech/erigon/common/bitutil"
2930
)
3031

@@ -118,36 +119,35 @@ func (ef *EliasFano) deriveFields() int {
118119
// Build construct Elias Fano index for a given sequences
119120
func (ef *EliasFano) Build() {
120121
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ {
121-
for b := uint64(0); b < 64; b++ {
122-
if ef.upperBits[i]&(uint64(1)<<b) != 0 {
123-
if (c & superQMask) == 0 {
124-
// When c is multiple of 2^14 (4096)
125-
lastSuperQ = i*64 + b
126-
ef.jump[(c/superQ)*superQSize] = lastSuperQ
127-
}
128-
if (c & qMask) == 0 {
129-
// When c is multiple of 2^8 (256)
130-
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
131-
// offset needs to be encoded as 16-bit integer, therefore the following check
132-
if offset >= (1 << 16) {
133-
fmt.Printf("ef.l=%x,ef.u=%x\n", ef.l, ef.u)
134-
fmt.Printf("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n", offset, lastSuperQ, i, b, c)
135-
fmt.Printf("ef.minDelta=%x\n", ef.minDelta)
136-
//fmt.Printf("ef.upperBits=%x\n", ef.upperBits)
137-
//fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits)
138-
//fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits)
139-
panic("")
140-
}
141-
// c % superQ is the bit index inside the group of 4096 bits
142-
jumpSuperQ := (c / superQ) * superQSize
143-
jumpInsideSuperQ := (c % superQ) / q
144-
idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2)
145-
shift := 16 * (jumpInsideSuperQ % 4)
146-
mask := uint64(0xffff) << shift
147-
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
122+
for word := ef.upperBits[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
123+
b := uint64(bits.TrailingZeros64(word))
124+
if (c & superQMask) == 0 {
125+
// When c is multiple of 2^14 (4096)
126+
lastSuperQ = i*64 + b
127+
ef.jump[(c/superQ)*superQSize] = lastSuperQ
128+
}
129+
if (c & qMask) == 0 {
130+
// When c is multiple of 2^8 (256)
131+
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
132+
// offset needs to be encoded as 16-bit integer, therefore the following check
133+
if offset >= (1 << 16) {
134+
fmt.Printf("ef.l=%x,ef.u=%x\n", ef.l, ef.u)
135+
fmt.Printf("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n", offset, lastSuperQ, i, b, c)
136+
fmt.Printf("ef.minDelta=%x\n", ef.minDelta)
137+
//fmt.Printf("ef.upperBits=%x\n", ef.upperBits)
138+
//fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits)
139+
//fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits)
140+
panic("")
148141
}
149-
c++
142+
// c % superQ is the bit index inside the group of 4096 bits
143+
jumpSuperQ := (c / superQ) * superQSize
144+
jumpInsideSuperQ := (c % superQ) / q
145+
idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2)
146+
shift := 16 * (jumpInsideSuperQ % 4)
147+
mask := uint64(0xffff) << shift
148+
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
150149
}
150+
c++
151151
}
152152
}
153153
}
@@ -355,54 +355,52 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
355355
// c/superQ is the index of the current 4096 block of bits
356356
// superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits
357357
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ {
358-
for b := uint64(0); b < 64; b++ {
359-
if ef.upperBitsCumKeys[i]&(uint64(1)<<b) != 0 {
360-
if (c & superQMask) == 0 {
361-
// When c is multiple of 2^14 (4096)
362-
lastSuperQ = i*64 + b
363-
ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ
364-
}
365-
if (c & qMask) == 0 {
366-
// When c is multiple of 2^8 (256)
367-
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
368-
// offset needs to be encoded as 16-bit integer, therefore the following check
369-
if offset >= (1 << 16) {
370-
panic("")
371-
}
372-
// c % superQ is the bit index inside the group of 4096 bits
373-
jumpSuperQ := (c / superQ) * (superQSize * 2)
374-
jumpInsideSuperQ := 2 * (c % superQ) / q
375-
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
376-
shift := 16 * (jumpInsideSuperQ % 4)
377-
mask := uint64(0xffff) << shift
378-
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
358+
for word := ef.upperBitsCumKeys[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
359+
b := uint64(bits.TrailingZeros64(word))
360+
if (c & superQMask) == 0 {
361+
// When c is multiple of 2^14 (4096)
362+
lastSuperQ = i*64 + b
363+
ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ
364+
}
365+
if (c & qMask) == 0 {
366+
// When c is multiple of 2^8 (256)
367+
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
368+
// offset needs to be encoded as 16-bit integer, therefore the following check
369+
if offset >= (1 << 16) {
370+
panic("")
379371
}
380-
c++
372+
// c % superQ is the bit index inside the group of 4096 bits
373+
jumpSuperQ := (c / superQ) * (superQSize * 2)
374+
jumpInsideSuperQ := 2 * (c % superQ) / q
375+
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
376+
shift := 16 * (jumpInsideSuperQ % 4)
377+
mask := uint64(0xffff) << shift
378+
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
381379
}
380+
c++
382381
}
383382
}
384383

385384
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ {
386-
for b := uint64(0); b < 64; b++ {
387-
if ef.upperBitsPosition[i]&(uint64(1)<<b) != 0 {
388-
if (c & superQMask) == 0 {
389-
lastSuperQ = i*64 + b
390-
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ
391-
}
392-
if (c & qMask) == 0 {
393-
var offset = i*64 + b - lastSuperQ
394-
if offset >= (1 << 16) {
395-
panic("")
396-
}
397-
jumpSuperQ := (c / superQ) * (superQSize * 2)
398-
jumpInsideSuperQ := 2*((c%superQ)/q) + 1
399-
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
400-
shift := 16 * (jumpInsideSuperQ % 4)
401-
mask := uint64(0xffff) << shift
402-
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
385+
for word := ef.upperBitsPosition[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
386+
b := uint64(bits.TrailingZeros64(word))
387+
if (c & superQMask) == 0 {
388+
lastSuperQ = i*64 + b
389+
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ
390+
}
391+
if (c & qMask) == 0 {
392+
var offset = i*64 + b - lastSuperQ
393+
if offset >= (1 << 16) {
394+
panic("")
403395
}
404-
c++
396+
jumpSuperQ := (c / superQ) * (superQSize * 2)
397+
jumpInsideSuperQ := 2*((c%superQ)/q) + 1
398+
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
399+
shift := 16 * (jumpInsideSuperQ % 4)
400+
mask := uint64(0xffff) << shift
401+
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
405402
}
403+
c++
406404
}
407405
}
408406
//fmt.Printf("jump: %x\n", ef.jump)

db/recsplit/eliasfano32/elias_fano.go

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,8 @@ func (ef *EliasFano) ResetForWrite(count, maxOffset uint64) {
137137
// Build construct Elias Fano index for a given sequences
138138
func (ef *EliasFano) Build() {
139139
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ {
140-
for b := uint64(0); b < 64; b++ {
141-
if ef.upperBits[i]&(uint64(1)<<b) == 0 {
142-
continue
143-
}
140+
for word := ef.upperBits[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
141+
b := uint64(bits.TrailingZeros64(word))
144142
if (c & superQMask) == 0 {
145143
// When c is multiple of 2^14 (4096)
146144
lastSuperQ = i*64 + b
@@ -753,10 +751,8 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
753751
// c/superQ is the index of the current 4096 block of bits
754752
// superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits
755753
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ {
756-
for b := uint64(0); b < 64; b++ {
757-
if ef.upperBitsCumKeys[i]&(uint64(1)<<b) == 0 {
758-
continue
759-
}
754+
for word := ef.upperBitsCumKeys[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
755+
b := uint64(bits.TrailingZeros64(word))
760756
if (c & superQMask) == 0 {
761757
// When c is multiple of 2^14 (4096)
762758
lastSuperQ = i*64 + b
@@ -782,11 +778,8 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
782778
}
783779

784780
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ {
785-
for b := uint64(0); b < 64; b++ {
786-
if ef.upperBitsPosition[i]&(uint64(1)<<b) == 0 {
787-
continue
788-
}
789-
781+
for word := ef.upperBitsPosition[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
782+
b := uint64(bits.TrailingZeros64(word))
790783
if (c & superQMask) == 0 {
791784
lastSuperQ = i*64 + b
792785
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ

db/recsplit/eliasfano32/elias_fano_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package eliasfano32
1818

1919
import (
2020
"bytes"
21+
"fmt"
2122
"math"
2223
"math/bits"
2324
"testing"
@@ -590,6 +591,22 @@ func BenchmarkEF(b *testing.B) {
590591
})
591592
}
592593

594+
func BenchmarkBuild(b *testing.B) {
595+
for _, count := range []uint64{100, 1_000_000} {
596+
b.Run(fmt.Sprintf("count=%d", count), func(b *testing.B) {
597+
maxOffset := (count - 1) * 123
598+
ef := NewEliasFano(count, maxOffset)
599+
for i := uint64(0); i < count; i++ {
600+
ef.AddOffset(i * 123)
601+
}
602+
b.ResetTimer()
603+
for b.Loop() {
604+
ef.Build()
605+
}
606+
})
607+
}
608+
}
609+
593610
func naiveReverseIterator(ef *EliasFano) *stream.ArrStream[uint64] {
594611
it := ef.Iterator()
595612
var values []uint64

docs/gitbook/src/fundamentals/basic-usage.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ To run Erigon with RPCDaemon, TxPool, and other components in a single process i
9595
* `--log.dir.path` dictates where [logs](logs.md) will be output - useful for sending reports to the Erigon team when issues occur.
9696
* Based on the [sync mode](sync-modes.md) you want to run you can add `--prune.mode=archive` to run a archive node, `--prune.mode=full` for a full node (default value) or `--prune.mode=minimal` for a minimal node.
9797
* `--http.addr="0.0.0.0" --http.api=eth,web3,net,debug,trace,txpool` to use [RPC Service](../interacting-with-erigon/) and e.g. be able to connect your [wallet](web3-wallet.md).
98-
* `--torrent.download.rate=512mb` to increase download speed. While the default downloading speed is 128mb, with this flag Erigon will use as much download speed as it can, up to a maximum of 512 megabytes per second. This means it will try to download data as quickly as possible, but it won't exceed the 512 MB/s limit you've set.
98+
* `--torrent.download.rate=512mb` sets the maximum download speed. The default is `512mb`. You can lower this value to limit bandwidth usage, for example `--torrent.download.rate=128mb` to cap downloads at 128 MB/s.
9999

100100
To stop the Erigon node you can use the `CTRL+C` command.
101101

0 commit comments

Comments
 (0)