@@ -25,6 +25,7 @@ import (
2525 "unsafe"
2626
2727 "github.com/c2h5oh/datasize"
28+
2829 "github.com/erigontech/erigon/common/bitutil"
2930)
3031
@@ -118,36 +119,35 @@ func (ef *EliasFano) deriveFields() int {
118119// Build construct Elias Fano index for a given sequences
119120func (ef * EliasFano ) Build () {
120121 for i , c , lastSuperQ := uint64 (0 ), uint64 (0 ), uint64 (0 ); i < uint64 (ef .wordsUpperBits ); i ++ {
121- for b := uint64 (0 ); b < 64 ; b ++ {
122- if ef .upperBits [i ]& (uint64 (1 )<< b ) != 0 {
123- if (c & superQMask ) == 0 {
124- // When c is multiple of 2^14 (4096)
125- lastSuperQ = i * 64 + b
126- ef .jump [(c / superQ )* superQSize ] = lastSuperQ
127- }
128- if (c & qMask ) == 0 {
129- // When c is multiple of 2^8 (256)
130- var offset = i * 64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
131- // offset needs to be encoded as 16-bit integer, therefore the following check
132- if offset >= (1 << 16 ) {
133- fmt .Printf ("ef.l=%x,ef.u=%x\n " , ef .l , ef .u )
134- fmt .Printf ("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n " , offset , lastSuperQ , i , b , c )
135- fmt .Printf ("ef.minDelta=%x\n " , ef .minDelta )
136- //fmt.Printf("ef.upperBits=%x\n", ef.upperBits)
137- //fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits)
138- //fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits)
139- panic ("" )
140- }
141- // c % superQ is the bit index inside the group of 4096 bits
142- jumpSuperQ := (c / superQ ) * superQSize
143- jumpInsideSuperQ := (c % superQ ) / q
144- idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2 )
145- shift := 16 * (jumpInsideSuperQ % 4 )
146- mask := uint64 (0xffff ) << shift
147- ef .jump [idx64 ] = (ef .jump [idx64 ] &^ mask ) | (offset << shift )
122+ for word := ef .upperBits [i ]; word != 0 ; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
123+ b := uint64 (bits .TrailingZeros64 (word ))
124+ if (c & superQMask ) == 0 {
125+ // When c is multiple of 2^14 (4096)
126+ lastSuperQ = i * 64 + b
127+ ef .jump [(c / superQ )* superQSize ] = lastSuperQ
128+ }
129+ if (c & qMask ) == 0 {
130+ // When c is multiple of 2^8 (256)
131+ var offset = i * 64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
132+ // offset needs to be encoded as 16-bit integer, therefore the following check
133+ if offset >= (1 << 16 ) {
134+ fmt .Printf ("ef.l=%x,ef.u=%x\n " , ef .l , ef .u )
135+ fmt .Printf ("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n " , offset , lastSuperQ , i , b , c )
136+ fmt .Printf ("ef.minDelta=%x\n " , ef .minDelta )
137+ //fmt.Printf("ef.upperBits=%x\n", ef.upperBits)
138+ //fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits)
139+ //fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits)
140+ panic ("" )
148141 }
149- c ++
142+ // c % superQ is the bit index inside the group of 4096 bits
143+ jumpSuperQ := (c / superQ ) * superQSize
144+ jumpInsideSuperQ := (c % superQ ) / q
145+ idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2 )
146+ shift := 16 * (jumpInsideSuperQ % 4 )
147+ mask := uint64 (0xffff ) << shift
148+ ef .jump [idx64 ] = (ef .jump [idx64 ] &^ mask ) | (offset << shift )
150149 }
150+ c ++
151151 }
152152 }
153153}
@@ -355,54 +355,52 @@ func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) {
355355 // c/superQ is the index of the current 4096 block of bits
356356 // superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits
357357 for i , c , lastSuperQ := uint64 (0 ), uint64 (0 ), uint64 (0 ); i < uint64 (wordsCumKeys ); i ++ {
358- for b := uint64 (0 ); b < 64 ; b ++ {
359- if ef .upperBitsCumKeys [i ]& (uint64 (1 )<< b ) != 0 {
360- if (c & superQMask ) == 0 {
361- // When c is multiple of 2^14 (4096)
362- lastSuperQ = i * 64 + b
363- ef .jump [(c / superQ )* (superQSize * 2 )] = lastSuperQ
364- }
365- if (c & qMask ) == 0 {
366- // When c is multiple of 2^8 (256)
367- var offset = i * 64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
368- // offset needs to be encoded as 16-bit integer, therefore the following check
369- if offset >= (1 << 16 ) {
370- panic ("" )
371- }
372- // c % superQ is the bit index inside the group of 4096 bits
373- jumpSuperQ := (c / superQ ) * (superQSize * 2 )
374- jumpInsideSuperQ := 2 * (c % superQ ) / q
375- idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2 )
376- shift := 16 * (jumpInsideSuperQ % 4 )
377- mask := uint64 (0xffff ) << shift
378- ef .jump [idx64 ] = (ef .jump [idx64 ] &^ mask ) | (offset << shift )
358+ for word := ef .upperBitsCumKeys [i ]; word != 0 ; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
359+ b := uint64 (bits .TrailingZeros64 (word ))
360+ if (c & superQMask ) == 0 {
361+ // When c is multiple of 2^14 (4096)
362+ lastSuperQ = i * 64 + b
363+ ef .jump [(c / superQ )* (superQSize * 2 )] = lastSuperQ
364+ }
365+ if (c & qMask ) == 0 {
366+ // When c is multiple of 2^8 (256)
367+ var offset = i * 64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256
368+ // offset needs to be encoded as 16-bit integer, therefore the following check
369+ if offset >= (1 << 16 ) {
370+ panic ("" )
379371 }
380- c ++
372+ // c % superQ is the bit index inside the group of 4096 bits
373+ jumpSuperQ := (c / superQ ) * (superQSize * 2 )
374+ jumpInsideSuperQ := 2 * (c % superQ ) / q
375+ idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2 )
376+ shift := 16 * (jumpInsideSuperQ % 4 )
377+ mask := uint64 (0xffff ) << shift
378+ ef .jump [idx64 ] = (ef .jump [idx64 ] &^ mask ) | (offset << shift )
381379 }
380+ c ++
382381 }
383382 }
384383
385384 for i , c , lastSuperQ := uint64 (0 ), uint64 (0 ), uint64 (0 ); i < uint64 (wordsPosition ); i ++ {
386- for b := uint64 (0 ); b < 64 ; b ++ {
387- if ef .upperBitsPosition [i ]& (uint64 (1 )<< b ) != 0 {
388- if (c & superQMask ) == 0 {
389- lastSuperQ = i * 64 + b
390- ef .jump [(c / superQ )* (superQSize * 2 )+ 1 ] = lastSuperQ
391- }
392- if (c & qMask ) == 0 {
393- var offset = i * 64 + b - lastSuperQ
394- if offset >= (1 << 16 ) {
395- panic ("" )
396- }
397- jumpSuperQ := (c / superQ ) * (superQSize * 2 )
398- jumpInsideSuperQ := 2 * ((c % superQ )/ q ) + 1
399- idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2 )
400- shift := 16 * (jumpInsideSuperQ % 4 )
401- mask := uint64 (0xffff ) << shift
402- ef .jump [idx64 ] = (ef .jump [idx64 ] &^ mask ) | (offset << shift )
385+ for word := ef .upperBitsPosition [i ]; word != 0 ; word &= word - 1 { // iterate over set bits only; word &= word-1 clears the lowest set bit
386+ b := uint64 (bits .TrailingZeros64 (word ))
387+ if (c & superQMask ) == 0 {
388+ lastSuperQ = i * 64 + b
389+ ef .jump [(c / superQ )* (superQSize * 2 )+ 1 ] = lastSuperQ
390+ }
391+ if (c & qMask ) == 0 {
392+ var offset = i * 64 + b - lastSuperQ
393+ if offset >= (1 << 16 ) {
394+ panic ("" )
403395 }
404- c ++
396+ jumpSuperQ := (c / superQ ) * (superQSize * 2 )
397+ jumpInsideSuperQ := 2 * ((c % superQ )/ q ) + 1
398+ idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2 )
399+ shift := 16 * (jumpInsideSuperQ % 4 )
400+ mask := uint64 (0xffff ) << shift
401+ ef .jump [idx64 ] = (ef .jump [idx64 ] &^ mask ) | (offset << shift )
405402 }
403+ c ++
406404 }
407405 }
408406 //fmt.Printf("jump: %x\n", ef.jump)
0 commit comments