Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 66 additions & 68 deletions db/recsplit/eliasfano16/elias_fano.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"unsafe"

"github.com/c2h5oh/datasize"

"github.com/erigontech/erigon/common/bitutil"
)

Expand Down Expand Up @@ -118,36 +119,35 @@ func (ef *EliasFano) deriveFields() int {
// Build construct Elias Fano index for a given sequences
func (ef *EliasFano) Build() {
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ {
for b := uint64(0); b < 64; b++ {
if ef.upperBits[i]&(uint64(1)<<b) != 0 {
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*superQSize] = lastSuperQ
}
if (c & qMask) == 0 {
// When c is multiple of 2^8 (256)
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
// offset needs to be encoded as 16-bit integer, therefore the following check
if offset >= (1 << 16) {
fmt.Printf("ef.l=%x,ef.u=%x\n", ef.l, ef.u)
fmt.Printf("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n", offset, lastSuperQ, i, b, c)
fmt.Printf("ef.minDelta=%x\n", ef.minDelta)
//fmt.Printf("ef.upperBits=%x\n", ef.upperBits)
//fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits)
//fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits)
panic("")
}
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * superQSize
jumpInsideSuperQ := (c % superQ) / q
idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
for word := ef.upperBits[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*superQSize] = lastSuperQ
}
if (c & qMask) == 0 {
// When c is multiple of 2^8 (256)
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
// offset needs to be encoded as 16-bit integer, therefore the following check
if offset >= (1 << 16) {
fmt.Printf("ef.l=%x,ef.u=%x\n", ef.l, ef.u)
fmt.Printf("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n", offset, lastSuperQ, i, b, c)
fmt.Printf("ef.minDelta=%x\n", ef.minDelta)
//fmt.Printf("ef.upperBits=%x\n", ef.upperBits)
//fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits)
//fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits)
panic("")
}
c++
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * superQSize
jumpInsideSuperQ := (c % superQ) / q
idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}
}
Expand Down Expand Up @@ -355,54 +355,52 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
// c/superQ is the index of the current 4096 block of bits
// superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ {
for b := uint64(0); b < 64; b++ {
if ef.upperBitsCumKeys[i]&(uint64(1)<<b) != 0 {
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ
}
if (c & qMask) == 0 {
// When c is multiple of 2^8 (256)
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
// offset needs to be encoded as 16-bit integer, therefore the following check
if offset >= (1 << 16) {
panic("")
}
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2 * (c % superQ) / q
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
for word := ef.upperBitsCumKeys[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ
}
if (c & qMask) == 0 {
// When c is multiple of 2^8 (256)
var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
// offset needs to be encoded as 16-bit integer, therefore the following check
if offset >= (1 << 16) {
panic("")
}
c++
// c % superQ is the bit index inside the group of 4096 bits
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2 * (c % superQ) / q
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}

for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ {
for b := uint64(0); b < 64; b++ {
if ef.upperBitsPosition[i]&(uint64(1)<<b) != 0 {
if (c & superQMask) == 0 {
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ
}
if (c & qMask) == 0 {
var offset = i*64 + b - lastSuperQ
if offset >= (1 << 16) {
panic("")
}
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2*((c%superQ)/q) + 1
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
for word := ef.upperBitsPosition[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ
}
if (c & qMask) == 0 {
var offset = i*64 + b - lastSuperQ
if offset >= (1 << 16) {
panic("")
}
c++
jumpSuperQ := (c / superQ) * (superQSize * 2)
jumpInsideSuperQ := 2*((c%superQ)/q) + 1
idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2)
shift := 16 * (jumpInsideSuperQ % 4)
mask := uint64(0xffff) << shift
ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift)
}
c++
}
}
//fmt.Printf("jump: %x\n", ef.jump)
Expand Down
19 changes: 6 additions & 13 deletions db/recsplit/eliasfano32/elias_fano.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,8 @@ func (ef *EliasFano) deriveFields() int {
// Build construct Elias Fano index for a given sequences
func (ef *EliasFano) Build() {
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ {
for b := uint64(0); b < 64; b++ {
if ef.upperBits[i]&(uint64(1)<<b) == 0 {
continue
}
for word := ef.upperBits[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
Expand Down Expand Up @@ -739,10 +737,8 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
// c/superQ is the index of the current 4096 block of bits
// superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits
for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ {
for b := uint64(0); b < 64; b++ {
if ef.upperBitsCumKeys[i]&(uint64(1)<<b) == 0 {
continue
}
for word := ef.upperBitsCumKeys[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
// When c is multiple of 2^14 (4096)
lastSuperQ = i*64 + b
Expand All @@ -768,11 +764,8 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
}

for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ {
for b := uint64(0); b < 64; b++ {
if ef.upperBitsPosition[i]&(uint64(1)<<b) == 0 {
continue
}

for word := ef.upperBitsPosition[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
b := uint64(bits.TrailingZeros64(word))
if (c & superQMask) == 0 {
lastSuperQ = i*64 + b
ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ
Expand Down
17 changes: 17 additions & 0 deletions db/recsplit/eliasfano32/elias_fano_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package eliasfano32

import (
"bytes"
"fmt"
"math"
"math/bits"
"testing"
Expand Down Expand Up @@ -590,6 +591,22 @@ func BenchmarkEF(b *testing.B) {
})
}

func BenchmarkBuild(b *testing.B) {
for _, count := range []uint64{100, 1_000_000} {
b.Run(fmt.Sprintf("count=%d", count), func(b *testing.B) {
maxOffset := (count - 1) * 123
ef := NewEliasFano(count, maxOffset)
for i := uint64(0); i < count; i++ {
ef.AddOffset(i * 123)
}
b.ResetTimer()
for b.Loop() {
ef.Build()
}
})
}
}

func naiveReverseIterator(ef *EliasFano) *stream.ArrStream[uint64] {
it := ef.Iterator()
var values []uint64
Expand Down
Loading