diff --git a/db/recsplit/eliasfano16/elias_fano.go b/db/recsplit/eliasfano16/elias_fano.go index a4d61ab8bb7..2060928b0b9 100644 --- a/db/recsplit/eliasfano16/elias_fano.go +++ b/db/recsplit/eliasfano16/elias_fano.go @@ -25,6 +25,7 @@ import ( "unsafe" "github.com/c2h5oh/datasize" + "github.com/erigontech/erigon/common/bitutil" ) @@ -118,36 +119,35 @@ func (ef *EliasFano) deriveFields() int { // Build construct Elias Fano index for a given sequences func (ef *EliasFano) Build() { for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ { - for b := uint64(0); b < 64; b++ { - if ef.upperBits[i]&(uint64(1)<= (1 << 16) { - fmt.Printf("ef.l=%x,ef.u=%x\n", ef.l, ef.u) - fmt.Printf("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n", offset, lastSuperQ, i, b, c) - fmt.Printf("ef.minDelta=%x\n", ef.minDelta) - //fmt.Printf("ef.upperBits=%x\n", ef.upperBits) - //fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits) - //fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits) - panic("") - } - // c % superQ is the bit index inside the group of 4096 bits - jumpSuperQ := (c / superQ) * superQSize - jumpInsideSuperQ := (c % superQ) / q - idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2) - shift := 16 * (jumpInsideSuperQ % 4) - mask := uint64(0xffff) << shift - ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) + for word := ef.upperBits[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit + b := uint64(bits.TrailingZeros64(word)) + if (c & superQMask) == 0 { + // When c is multiple of 2^14 (4096) + lastSuperQ = i*64 + b + ef.jump[(c/superQ)*superQSize] = lastSuperQ + } + if (c & qMask) == 0 { + // When c is multiple of 2^8 (256) + var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256 + // offset needs to be encoded as 16-bit integer, therefore the following check + if offset >= (1 << 16) { + fmt.Printf("ef.l=%x,ef.u=%x\n", ef.l, ef.u) + fmt.Printf("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n", offset, lastSuperQ, i, b, c) + fmt.Printf("ef.minDelta=%x\n", ef.minDelta) + //fmt.Printf("ef.upperBits=%x\n", ef.upperBits) + //fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits) + //fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits) + panic("") } - c++ + // c % superQ is the bit index inside the group of 4096 bits + jumpSuperQ := (c / superQ) * superQSize + jumpInsideSuperQ := (c % superQ) / q + idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2) + shift := 16 * (jumpInsideSuperQ % 4) + mask := uint64(0xffff) << shift + ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) } + c++ } } } @@ -355,54 +355,52 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) { // c/superQ is the index of the current 4096 block of bits // superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ { - for b := uint64(0); b < 64; b++ { - if ef.upperBitsCumKeys[i]&(uint64(1)<= (1 << 16) { - panic("") - } - // c % superQ is the bit index inside the group of 4096 bits - jumpSuperQ := (c / superQ) * (superQSize * 2) - jumpInsideSuperQ := 2 * (c % superQ) / q - idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2) - shift := 16 * (jumpInsideSuperQ % 4) - mask := uint64(0xffff) << shift - ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) + for word := ef.upperBitsCumKeys[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit + b := uint64(bits.TrailingZeros64(word)) + if (c & superQMask) == 0 { + // When c is multiple of 2^14 (4096) + lastSuperQ = i*64 + b + ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ + } + if (c & qMask) == 0 { + // When c is multiple of 2^8 (256) + var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256 + // offset needs to be encoded as 16-bit integer, therefore the following check + if offset >= (1 << 16) { + panic("") } - c++ + // c % superQ is the bit index inside the group of 4096 bits + jumpSuperQ := (c / superQ) * (superQSize * 2) + jumpInsideSuperQ := 2 * (c % superQ) / q + idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2) + shift := 16 * (jumpInsideSuperQ % 4) + mask := uint64(0xffff) << shift + ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) } + c++ } } for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ { - for b := uint64(0); b < 64; b++ { - if ef.upperBitsPosition[i]&(uint64(1)<= (1 << 16) { - panic("") - } - jumpSuperQ := (c / superQ) * (superQSize * 2) - jumpInsideSuperQ := 2*((c%superQ)/q) + 1 - idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2) - shift := 16 * (jumpInsideSuperQ % 4) - mask := uint64(0xffff) << shift - ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) + for word := ef.upperBitsPosition[i]; word != 0; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit + b := uint64(bits.TrailingZeros64(word)) + if (c & superQMask) == 0 { + lastSuperQ = i*64 + b + ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ + } + if (c & qMask) == 0 { + var offset = i*64 + b - lastSuperQ + if offset >= (1 << 16) { + panic("") } - c++ + jumpSuperQ := (c / superQ) * (superQSize * 2) + jumpInsideSuperQ := 2*((c%superQ)/q) + 1 + idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2) + shift := 16 * (jumpInsideSuperQ % 4) + mask := uint64(0xffff) << shift + ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) } + c++ } } //fmt.Printf("jump: %x\n", ef.jump) diff --git a/db/recsplit/eliasfano32/elias_fano.go b/db/recsplit/eliasfano32/elias_fano.go index 13a34e6d80e..9f9335ff17a 100644 --- a/db/recsplit/eliasfano32/elias_fano.go +++ b/db/recsplit/eliasfano32/elias_fano.go @@ -123,10 +123,8 @@ func (ef *EliasFano) deriveFields() int { // Build construct Elias Fano index for a given sequences func (ef *EliasFano) Build() { for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ { - for b := uint64(0); b < 64; b++ { - if ef.upperBits[i]&(uint64(1)<