Skip to content

Commit cd61e31

Browse files
decomp: speedup nextPos() (#19324)
- split `nextPos(clear bool)` to `nextPosClear()` and `nextPos()` - for better for inlining - added new pre-computed field `getter.posMask` - added new u64 typed field `getter.dataLen` to avoid type-casting in `.HasNext()` (most called method)
1 parent d9fc176 commit cd61e31

File tree

3 files changed

+70
-60
lines changed

3 files changed

+70
-60
lines changed

db/seg/compress_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ func checksum(file string) uint32 {
8181
return hasher.Sum32()
8282
}
8383

84-
func prepareDict(t *testing.T, multiplier int, keys int) *Decompressor {
84+
func prepareDict(t testing.TB, multiplier int, keys int) *Decompressor {
8585
return prepareDictMetadata(t, multiplier, false, nil, keys)
8686
}
8787

88-
func prepareDictMetadata(t *testing.T, multiplier int, hasMetadata bool, metadata []byte, keys int) *Decompressor {
88+
func prepareDictMetadata(t testing.TB, multiplier int, hasMetadata bool, metadata []byte, keys int) *Decompressor {
8989
t.Helper()
9090
logger := log.New()
9191
tmpDir := t.TempDir()

db/seg/decompress.go

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -545,14 +545,17 @@ func (d *Decompressor) MadvWillNeed() *Decompressor {
545545
// Getter represent "reader" or "iterator" that can move across the data of the decompressor
546546
// The full state of the getter can be captured by saving dataP, and dataBit
547547
type Getter struct {
548+
dataP uint64 // current byte offset in data
549+
dataLen uint64 // len(data), precomputed
550+
dataBit int // bit offset within current byte (0-7)
551+
posMask uint16 // cached posDict.mask, avoids pointer chain
552+
posDict *posTable // Huffman table for positions
553+
data []byte // compressed bitstream (ptr at 48, len at 56 = CL0)
554+
//less hot fields
548555
patternDict *patternTable
549-
posDict *posTable
556+
d *Decompressor
550557
fName string
551-
data []byte
552-
dataP uint64
553-
dataBit int // Value 0..7 - position of the bit
554558
trace bool
555-
d *Decompressor
556559
}
557560

558561
func (g *Getter) MadvNormal() MadvDisabler {
@@ -565,21 +568,26 @@ func (g *Getter) Count() int { return g.d.Count() }
565568
func (g *Getter) FileName() string { return g.fName }
566569
func (g *Getter) GetMetadata() []byte { return g.d.GetMetadata() }
567570

568-
func (g *Getter) nextPos(clean bool) uint64 {
569-
if clean && g.dataBit > 0 {
571+
// nextPosClean aligns to the next byte boundary then reads the next position.
572+
func (g *Getter) nextPosClean() uint64 {
573+
if g.dataBit > 0 {
570574
g.dataP++
571575
g.dataBit = 0
572576
}
573-
table := g.posDict
574-
if table.bitLen == 0 {
575-
return table.pos[0]
577+
return g.nextPos()
578+
}
579+
580+
// nextPos reads the next position from the Huffman-coded bitstream.
581+
func (g *Getter) nextPos() uint64 {
582+
if g.posDict.bitLen == 0 {
583+
return g.posDict.pos[0]
576584
}
585+
table := g.posDict
577586
data := g.data
578587
dataP := g.dataP
579588
dataBit := g.dataBit
580589
dataLen := uint64(len(data))
581-
// Precompute mask for the first table (hot path optimization)
582-
mask := uint16(1)<<table.bitLen - 1
590+
mask := g.posMask
583591
for {
584592
// Read up to 16 bits starting at dataP, shifted by dataBit
585593
code := uint16(data[dataP]) >> dataBit
@@ -613,11 +621,10 @@ func (g *Getter) nextPattern() []byte {
613621
data := g.data
614622
dataP := g.dataP
615623
dataBit := g.dataBit
616-
dataLen := uint64(len(data))
617624

618625
for {
619626
code := uint16(data[dataP]) >> dataBit
620-
if 8-dataBit < table.bitLen && dataP+1 < dataLen {
627+
if 8-dataBit < table.bitLen && dataP+1 < g.dataLen {
621628
code |= uint16(data[dataP+1]) << (8 - dataBit)
622629
}
623630
code &= (uint16(1) << table.bitLen) - 1
@@ -650,13 +657,19 @@ func (d *Decompressor) EmptyWordsCount() int { return int(d.emptyWordsCount) }
650657
// Getter is not thread-safe, but there can be multiple getters used simultaneously and concurrently
651658
// for the same decompressor
652659
func (d *Decompressor) MakeGetter() *Getter {
653-
return &Getter{
660+
data := d.data[d.wordsStart:]
661+
g := &Getter{
654662
d: d,
655663
posDict: d.posDict,
656-
data: d.data[d.wordsStart:],
664+
data: data,
665+
dataLen: uint64(len(data)),
657666
patternDict: d.dict,
658667
fName: d.FileName(),
659668
}
669+
if d.posDict != nil {
670+
g.posMask = uint16(1)<<g.posDict.bitLen - 1
671+
}
672+
return g
660673
}
661674

662675
func (g *Getter) DataLen() int {
@@ -669,15 +682,15 @@ func (g *Getter) Reset(offset uint64) {
669682
}
670683

671684
func (g *Getter) HasNext() bool {
672-
return g.dataP < uint64(len(g.data))
685+
return g.dataP < g.dataLen
673686
}
674687

675688
// Next extracts a compressed word from current offset in the file
676689
// and appends it to the given buf, returning the result of appending
677690
// After extracting next word, it moves to the beginning of the next one
678691
func (g *Getter) Next(buf []byte) ([]byte, uint64) {
679692
savePos := g.dataP
680-
wordLen := g.nextPos(true)
693+
wordLen := g.nextPosClean()
681694
wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
682695
if wordLen == 0 {
683696
if g.dataBit > 0 {
@@ -707,7 +720,7 @@ func (g *Getter) Next(buf []byte) ([]byte, uint64) {
707720
// Loop below fills in the patterns
708721
// Tracking position in buf where to insert part of the word
709722
bufPos := bufOffset
710-
for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
723+
for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
711724
bufPos += int(pos) - 1 // Positions where to insert patterns are encoded relative to one another
712725
pt := g.nextPattern()
713726
copy(buf[bufPos:], pt)
@@ -719,14 +732,14 @@ func (g *Getter) Next(buf []byte) ([]byte, uint64) {
719732
postLoopPos := g.dataP
720733
g.dataP = savePos
721734
g.dataBit = 0
722-
g.nextPos(true /* clean */) // Reset the state of huffman reader
735+
g.nextPosClean() // Reset the state of huffman reader
723736

724737
// Restore to the beginning of buf
725738
bufPos = bufOffset
726739
lastUncovered := bufOffset
727740

728741
// Loop below fills the data which is not in the patterns
729-
for pos := g.nextPos(false); pos != 0; pos = g.nextPos(false) {
742+
for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
730743
bufPos += int(pos) - 1 // Positions where to insert patterns are encoded relative to one another
731744
if bufPos > lastUncovered {
732745
dif := uint64(bufPos - lastUncovered)
@@ -746,7 +759,7 @@ func (g *Getter) Next(buf []byte) ([]byte, uint64) {
746759
}
747760

748761
func (g *Getter) NextUncompressed() ([]byte, uint64) {
749-
wordLen := g.nextPos(true)
762+
wordLen := g.nextPosClean()
750763
wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
751764
if wordLen == 0 {
752765
if g.dataBit > 0 {
@@ -755,7 +768,7 @@ func (g *Getter) NextUncompressed() ([]byte, uint64) {
755768
}
756769
return g.data[g.dataP:g.dataP], g.dataP
757770
}
758-
g.nextPos(false)
771+
g.nextPos()
759772
if g.dataBit > 0 {
760773
g.dataP++
761774
g.dataBit = 0
@@ -767,7 +780,7 @@ func (g *Getter) NextUncompressed() ([]byte, uint64) {
767780

768781
// Skip moves offset to the next word and returns the new offset and the length of the word.
769782
func (g *Getter) Skip() (uint64, int) {
770-
l := g.nextPos(true)
783+
l := g.nextPosClean()
771784
l-- // because when create huffman tree we do ++ , because 0 is terminator
772785
if l == 0 {
773786
if g.dataBit > 0 {
@@ -781,7 +794,7 @@ func (g *Getter) Skip() (uint64, int) {
781794
var add uint64
782795
var bufPos int
783796
var lastUncovered int
784-
for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
797+
for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
785798
bufPos += int(pos) - 1
786799
if wordLen < bufPos {
787800
panic(fmt.Sprintf("likely .idx is invalid: %s", g.fName))
@@ -804,7 +817,7 @@ func (g *Getter) Skip() (uint64, int) {
804817
}
805818

806819
func (g *Getter) SkipUncompressed() (uint64, int) {
807-
wordLen := g.nextPos(true)
820+
wordLen := g.nextPosClean()
808821
wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
809822
if wordLen == 0 {
810823
if g.dataBit > 0 {
@@ -813,7 +826,7 @@ func (g *Getter) SkipUncompressed() (uint64, int) {
813826
}
814827
return g.dataP, 0
815828
}
816-
g.nextPos(false)
829+
g.nextPos()
817830
if g.dataBit > 0 {
818831
g.dataP++
819832
g.dataBit = 0
@@ -826,7 +839,7 @@ func (g *Getter) SkipUncompressed() (uint64, int) {
826839
func (g *Getter) MatchPrefix(prefix []byte) bool {
827840
savePos := g.dataP
828841

829-
wordLen := g.nextPos(true /* clean */)
842+
wordLen := g.nextPosClean()
830843
wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
831844
prefixLen := len(prefix)
832845
if wordLen == 0 || int(wordLen) < prefixLen {
@@ -837,7 +850,7 @@ func (g *Getter) MatchPrefix(prefix []byte) bool {
837850
var bufPos int
838851
// In the first pass, we only check patterns
839852
// Only run this loop as far as the prefix goes, there is no need to check further
840-
for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
853+
for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
841854
bufPos += int(pos) - 1
842855
pattern := g.nextPattern()
843856
var comparisonLen int
@@ -860,11 +873,11 @@ func (g *Getter) MatchPrefix(prefix []byte) bool {
860873
}
861874
postLoopPos := g.dataP
862875
g.dataP, g.dataBit = savePos, 0
863-
g.nextPos(true /* clean */) // Reset the state of huffman decoder
876+
g.nextPosClean() // Reset the state of huffman decoder
864877
// Second pass - we check spaces not covered by the patterns
865878
var lastUncovered int
866879
bufPos = 0
867-
for pos := g.nextPos(false /* clean */); pos != 0 && lastUncovered < prefixLen; pos = g.nextPos(false) {
880+
for pos := g.nextPos(); pos != 0 && lastUncovered < prefixLen; pos = g.nextPos() {
868881
bufPos += int(pos) - 1
869882
if bufPos > lastUncovered {
870883
dif := uint64(bufPos - lastUncovered)
@@ -903,7 +916,7 @@ func (g *Getter) MatchPrefix(prefix []byte) bool {
903916
// returns 0 if buf == word, -1 if buf < word, 1 if buf > word
904917
func (g *Getter) MatchCmp(buf []byte) int {
905918
savePos := g.dataP
906-
wordLen := g.nextPos(true)
919+
wordLen := g.nextPosClean()
907920
wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
908921
lenBuf := len(buf)
909922
if wordLen == 0 && lenBuf != 0 {
@@ -921,7 +934,7 @@ func (g *Getter) MatchCmp(buf []byte) int {
921934
decoded := make([]byte, wordLen)
922935
var bufPos int
923936
// In the first pass, we only check patterns
924-
for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
937+
for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
925938
bufPos += int(pos) - 1
926939
pattern := g.nextPattern()
927940
copy(decoded[bufPos:], pattern)
@@ -932,11 +945,11 @@ func (g *Getter) MatchCmp(buf []byte) int {
932945
}
933946
postLoopPos := g.dataP
934947
g.dataP, g.dataBit = savePos, 0
935-
g.nextPos(true /* clean */) // Reset the state of huffman decoder
948+
g.nextPosClean() // Reset the state of huffman decoder
936949
// Second pass - we check spaces not covered by the patterns
937950
var lastUncovered int
938951
bufPos = 0
939-
for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
952+
for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
940953
bufPos += int(pos) - 1
941954
// fmt.Printf("BUF POS: %d, POS: %d, lastUncovered: %d\n", bufPos, pos, lastUncovered)
942955
if bufPos > lastUncovered {
@@ -967,7 +980,7 @@ func (g *Getter) MatchPrefixUncompressed(prefix []byte) bool {
967980
g.dataP, g.dataBit = savePos, 0
968981
}()
969982

970-
wordLen := g.nextPos(true /* clean */)
983+
wordLen := g.nextPosClean()
971984
wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
972985
prefixLen := len(prefix)
973986
if wordLen == 0 && prefixLen != 0 {
@@ -977,7 +990,7 @@ func (g *Getter) MatchPrefixUncompressed(prefix []byte) bool {
977990
return false
978991
}
979992

980-
g.nextPos(true)
993+
g.nextPosClean()
981994

982995
return bytes.HasPrefix(g.data[g.dataP:g.dataP+wordLen], prefix)
983996
}
@@ -988,7 +1001,7 @@ func (g *Getter) MatchCmpUncompressed(buf []byte) int {
9881001
g.dataP, g.dataBit = savePos, 0
9891002
}()
9901003

991-
wordLen := g.nextPos(true /* clean */)
1004+
wordLen := g.nextPosClean()
9921005
wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
9931006
bufLen := len(buf)
9941007
if wordLen == 0 && bufLen != 0 {
@@ -998,7 +1011,7 @@ func (g *Getter) MatchCmpUncompressed(buf []byte) int {
9981011
return -1
9991012
}
10001013

1001-
g.nextPos(true)
1014+
g.nextPosClean()
10021015

10031016
return bytes.Compare(buf, g.data[g.dataP:g.dataP+wordLen])
10041017
}

db/seg/decompress_bench_test.go

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ import (
2525
)
2626

2727
func BenchmarkDecompressNextBuf(b *testing.B) {
28-
t := new(testing.T)
29-
d := prepareDict(t, 1, 1_000)
28+
d := prepareDict(b, 1, 1_000)
3029
defer d.Close()
3130
b.ReportAllocs()
3231
var k []byte
@@ -43,8 +42,7 @@ func BenchmarkDecompressNextBuf(b *testing.B) {
4342
}
4443

4544
func BenchmarkDecompressNextHeap(b *testing.B) {
46-
t := new(testing.T)
47-
d := prepareDict(t, 1, 1_000)
45+
d := prepareDict(b, 1, 1_000)
4846
defer d.Close()
4947

5048
b.ReportAllocs()
@@ -61,31 +59,30 @@ func BenchmarkDecompressNextHeap(b *testing.B) {
6159
}
6260

6361
func BenchmarkDecompressSkip(b *testing.B) {
64-
t := new(testing.T)
65-
d := prepareDict(t, 1, 1_000)
62+
d := prepareDict(b, 1, 1_000_000)
6663
defer d.Close()
6764

6865
b.Run("skip", func(b *testing.B) {
6966
b.ReportAllocs()
7067
g := d.MakeGetter()
7168
for b.Loop() {
72-
_, _ = g.Skip()
73-
if !g.HasNext() {
74-
g.Reset(0)
69+
g.Reset(0)
70+
for g.HasNext() {
71+
_, _ = g.Skip()
7572
}
7673
}
7774
})
7875

79-
b.Run("matchcmp_non_existing_key", func(b *testing.B) {
80-
b.ReportAllocs()
81-
g := d.MakeGetter()
82-
for b.Loop() {
83-
_ = g.MatchCmp([]byte("longlongword"))
84-
if !g.HasNext() {
85-
g.Reset(0)
86-
}
87-
}
88-
})
76+
//b.Run("matchcmp_non_existing_key", func(b *testing.B) {
77+
// b.ReportAllocs()
78+
// g := d.MakeGetter()
79+
// for b.Loop() {
80+
// _ = g.MatchCmp([]byte("longlongword"))
81+
// if !g.HasNext() {
82+
// g.Reset(0)
83+
// }
84+
// }
85+
//})
8986
}
9087

9188
func BenchmarkDecompressTorrent(t *testing.B) {

0 commit comments

Comments
 (0)