decomp: speedup nextPos() (#19324)

AskAlexSharov · web-flow · commit cd61e31ffc0c · 2026-02-20T14:43:38.000+07:00
- split `nextPos(clear bool)` to `nextPosClear()` and `nextPos()` - for
better for inlining
- added new pre-computed field `getter.posMask`
- added new u64 typed field `getter.dataLen` to avoid type-casting in
`.HasNext()` (most called method)
diff --git a/db/seg/compress_test.go b/db/seg/compress_test.go
@@ -81,11 +81,11 @@ func checksum(file string) uint32 {
 	return hasher.Sum32()
 }
 
-func prepareDict(t *testing.T, multiplier int, keys int) *Decompressor {
+func prepareDict(t testing.TB, multiplier int, keys int) *Decompressor {
 	return prepareDictMetadata(t, multiplier, false, nil, keys)
 }
 
-func prepareDictMetadata(t *testing.T, multiplier int, hasMetadata bool, metadata []byte, keys int) *Decompressor {
+func prepareDictMetadata(t testing.TB, multiplier int, hasMetadata bool, metadata []byte, keys int) *Decompressor {
 	t.Helper()
 	logger := log.New()
 	tmpDir := t.TempDir()
diff --git a/db/seg/decompress.go b/db/seg/decompress.go
@@ -545,14 +545,17 @@ func (d *Decompressor) MadvWillNeed() *Decompressor {
 // Getter represent "reader" or "iterator" that can move across the data of the decompressor
 // The full state of the getter can be captured by saving dataP, and dataBit
 type Getter struct {
+	dataP   uint64    // current byte offset in data
+	dataLen uint64    // len(data), precomputed
+	dataBit int       // bit offset within current byte (0-7)
+	posMask uint16    // cached posDict.mask, avoids pointer chain
+	posDict *posTable // Huffman table for positions
+	data    []byte    // compressed bitstream (ptr at 48, len at 56 = CL0)
+	//less hot fields
 	patternDict *patternTable
-	posDict     *posTable
+	d           *Decompressor
 	fName       string
-	data        []byte
-	dataP       uint64
-	dataBit     int // Value 0..7 - position of the bit
 	trace       bool
-	d           *Decompressor
 }
 
 func (g *Getter) MadvNormal() MadvDisabler {
@@ -565,21 +568,26 @@ func (g *Getter) Count() int          { return g.d.Count() }
 func (g *Getter) FileName() string    { return g.fName }
 func (g *Getter) GetMetadata() []byte { return g.d.GetMetadata() }
 
-func (g *Getter) nextPos(clean bool) uint64 {
-	if clean && g.dataBit > 0 {
+// nextPosClean aligns to the next byte boundary then reads the next position.
+func (g *Getter) nextPosClean() uint64 {
+	if g.dataBit > 0 {
 		g.dataP++
 		g.dataBit = 0
 	}
-	table := g.posDict
-	if table.bitLen == 0 {
-		return table.pos[0]
+	return g.nextPos()
+}
+
+// nextPos reads the next position from the Huffman-coded bitstream.
+func (g *Getter) nextPos() uint64 {
+	if g.posDict.bitLen == 0 {
+		return g.posDict.pos[0]
 	}
+	table := g.posDict
 	data := g.data
 	dataP := g.dataP
 	dataBit := g.dataBit
 	dataLen := uint64(len(data))
-	// Precompute mask for the first table (hot path optimization)
-	mask := uint16(1)<<table.bitLen - 1
+	mask := g.posMask
 	for {
 		// Read up to 16 bits starting at dataP, shifted by dataBit
 		code := uint16(data[dataP]) >> dataBit
@@ -613,11 +621,10 @@ func (g *Getter) nextPattern() []byte {
 	data := g.data
 	dataP := g.dataP
 	dataBit := g.dataBit
-	dataLen := uint64(len(data))
 
 	for {
 		code := uint16(data[dataP]) >> dataBit
-		if 8-dataBit < table.bitLen && dataP+1 < dataLen {
+		if 8-dataBit < table.bitLen && dataP+1 < g.dataLen {
 			code |= uint16(data[dataP+1]) << (8 - dataBit)
 		}
 		code &= (uint16(1) << table.bitLen) - 1
@@ -650,13 +657,19 @@ func (d *Decompressor) EmptyWordsCount() int { return int(d.emptyWordsCount) }
 // Getter is not thread-safe, but there can be multiple getters used simultaneously and concurrently
 // for the same decompressor
 func (d *Decompressor) MakeGetter() *Getter {
-	return &Getter{
+	data := d.data[d.wordsStart:]
+	g := &Getter{
 		d:           d,
 		posDict:     d.posDict,
-		data:        d.data[d.wordsStart:],
+		data:        data,
+		dataLen:     uint64(len(data)),
 		patternDict: d.dict,
 		fName:       d.FileName(),
 	}
+	if d.posDict != nil {
+		g.posMask = uint16(1)<<g.posDict.bitLen - 1
+	}
+	return g
 }
 
 func (g *Getter) DataLen() int {
@@ -669,15 +682,15 @@ func (g *Getter) Reset(offset uint64) {
 }
 
 func (g *Getter) HasNext() bool {
-	return g.dataP < uint64(len(g.data))
+	return g.dataP < g.dataLen
 }
 
 // Next extracts a compressed word from current offset in the file
 // and appends it to the given buf, returning the result of appending
 // After extracting next word, it moves to the beginning of the next one
 func (g *Getter) Next(buf []byte) ([]byte, uint64) {
 	savePos := g.dataP
-	wordLen := g.nextPos(true)
+	wordLen := g.nextPosClean()
 	wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
 	if wordLen == 0 {
 		if g.dataBit > 0 {
@@ -707,7 +720,7 @@ func (g *Getter) Next(buf []byte) ([]byte, uint64) {
 	// Loop below fills in the patterns
 	// Tracking position in buf where to insert part of the word
 	bufPos := bufOffset
-	for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
+	for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
 		bufPos += int(pos) - 1 // Positions where to insert patterns are encoded relative to one another
 		pt := g.nextPattern()
 		copy(buf[bufPos:], pt)
@@ -719,14 +732,14 @@ func (g *Getter) Next(buf []byte) ([]byte, uint64) {
 	postLoopPos := g.dataP
 	g.dataP = savePos
 	g.dataBit = 0
-	g.nextPos(true /* clean */) // Reset the state of huffman reader
+	g.nextPosClean() // Reset the state of huffman reader
 
 	// Restore to the beginning of buf
 	bufPos = bufOffset
 	lastUncovered := bufOffset
 
 	// Loop below fills the data which is not in the patterns
-	for pos := g.nextPos(false); pos != 0; pos = g.nextPos(false) {
+	for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
 		bufPos += int(pos) - 1 // Positions where to insert patterns are encoded relative to one another
 		if bufPos > lastUncovered {
 			dif := uint64(bufPos - lastUncovered)
@@ -746,7 +759,7 @@ func (g *Getter) Next(buf []byte) ([]byte, uint64) {
 }
 
 func (g *Getter) NextUncompressed() ([]byte, uint64) {
-	wordLen := g.nextPos(true)
+	wordLen := g.nextPosClean()
 	wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
 	if wordLen == 0 {
 		if g.dataBit > 0 {
@@ -755,7 +768,7 @@ func (g *Getter) NextUncompressed() ([]byte, uint64) {
 		}
 		return g.data[g.dataP:g.dataP], g.dataP
 	}
-	g.nextPos(false)
+	g.nextPos()
 	if g.dataBit > 0 {
 		g.dataP++
 		g.dataBit = 0
@@ -767,7 +780,7 @@ func (g *Getter) NextUncompressed() ([]byte, uint64) {
 
 // Skip moves offset to the next word and returns the new offset and the length of the word.
 func (g *Getter) Skip() (uint64, int) {
-	l := g.nextPos(true)
+	l := g.nextPosClean()
 	l-- // because when create huffman tree we do ++ , because 0 is terminator
 	if l == 0 {
 		if g.dataBit > 0 {
@@ -781,7 +794,7 @@ func (g *Getter) Skip() (uint64, int) {
 	var add uint64
 	var bufPos int
 	var lastUncovered int
-	for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
+	for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
 		bufPos += int(pos) - 1
 		if wordLen < bufPos {
 			panic(fmt.Sprintf("likely .idx is invalid: %s", g.fName))
@@ -804,7 +817,7 @@ func (g *Getter) Skip() (uint64, int) {
 }
 
 func (g *Getter) SkipUncompressed() (uint64, int) {
-	wordLen := g.nextPos(true)
+	wordLen := g.nextPosClean()
 	wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
 	if wordLen == 0 {
 		if g.dataBit > 0 {
@@ -813,7 +826,7 @@ func (g *Getter) SkipUncompressed() (uint64, int) {
 		}
 		return g.dataP, 0
 	}
-	g.nextPos(false)
+	g.nextPos()
 	if g.dataBit > 0 {
 		g.dataP++
 		g.dataBit = 0
@@ -826,7 +839,7 @@ func (g *Getter) SkipUncompressed() (uint64, int) {
 func (g *Getter) MatchPrefix(prefix []byte) bool {
 	savePos := g.dataP
 
-	wordLen := g.nextPos(true /* clean */)
+	wordLen := g.nextPosClean()
 	wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
 	prefixLen := len(prefix)
 	if wordLen == 0 || int(wordLen) < prefixLen {
@@ -837,7 +850,7 @@ func (g *Getter) MatchPrefix(prefix []byte) bool {
 	var bufPos int
 	// In the first pass, we only check patterns
 	// Only run this loop as far as the prefix goes, there is no need to check further
-	for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
+	for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
 		bufPos += int(pos) - 1
 		pattern := g.nextPattern()
 		var comparisonLen int
@@ -860,11 +873,11 @@ func (g *Getter) MatchPrefix(prefix []byte) bool {
 	}
 	postLoopPos := g.dataP
 	g.dataP, g.dataBit = savePos, 0
-	g.nextPos(true /* clean */) // Reset the state of huffman decoder
+	g.nextPosClean() // Reset the state of huffman decoder
 	// Second pass - we check spaces not covered by the patterns
 	var lastUncovered int
 	bufPos = 0
-	for pos := g.nextPos(false /* clean */); pos != 0 && lastUncovered < prefixLen; pos = g.nextPos(false) {
+	for pos := g.nextPos(); pos != 0 && lastUncovered < prefixLen; pos = g.nextPos() {
 		bufPos += int(pos) - 1
 		if bufPos > lastUncovered {
 			dif := uint64(bufPos - lastUncovered)
@@ -903,7 +916,7 @@ func (g *Getter) MatchPrefix(prefix []byte) bool {
 // returns 0 if buf == word, -1 if buf < word, 1 if buf > word
 func (g *Getter) MatchCmp(buf []byte) int {
 	savePos := g.dataP
-	wordLen := g.nextPos(true)
+	wordLen := g.nextPosClean()
 	wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
 	lenBuf := len(buf)
 	if wordLen == 0 && lenBuf != 0 {
@@ -921,7 +934,7 @@ func (g *Getter) MatchCmp(buf []byte) int {
 	decoded := make([]byte, wordLen)
 	var bufPos int
 	// In the first pass, we only check patterns
-	for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
+	for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
 		bufPos += int(pos) - 1
 		pattern := g.nextPattern()
 		copy(decoded[bufPos:], pattern)
@@ -932,11 +945,11 @@ func (g *Getter) MatchCmp(buf []byte) int {
 	}
 	postLoopPos := g.dataP
 	g.dataP, g.dataBit = savePos, 0
-	g.nextPos(true /* clean */) // Reset the state of huffman decoder
+	g.nextPosClean() // Reset the state of huffman decoder
 	// Second pass - we check spaces not covered by the patterns
 	var lastUncovered int
 	bufPos = 0
-	for pos := g.nextPos(false /* clean */); pos != 0; pos = g.nextPos(false) {
+	for pos := g.nextPos(); pos != 0; pos = g.nextPos() {
 		bufPos += int(pos) - 1
 		// fmt.Printf("BUF POS: %d, POS: %d, lastUncovered: %d\n", bufPos, pos, lastUncovered)
 		if bufPos > lastUncovered {
@@ -967,7 +980,7 @@ func (g *Getter) MatchPrefixUncompressed(prefix []byte) bool {
 		g.dataP, g.dataBit = savePos, 0
 	}()
 
-	wordLen := g.nextPos(true /* clean */)
+	wordLen := g.nextPosClean()
 	wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
 	prefixLen := len(prefix)
 	if wordLen == 0 && prefixLen != 0 {
@@ -977,7 +990,7 @@ func (g *Getter) MatchPrefixUncompressed(prefix []byte) bool {
 		return false
 	}
 
-	g.nextPos(true)
+	g.nextPosClean()
 
 	return bytes.HasPrefix(g.data[g.dataP:g.dataP+wordLen], prefix)
 }
@@ -988,7 +1001,7 @@ func (g *Getter) MatchCmpUncompressed(buf []byte) int {
 		g.dataP, g.dataBit = savePos, 0
 	}()
 
-	wordLen := g.nextPos(true /* clean */)
+	wordLen := g.nextPosClean()
 	wordLen-- // because when create huffman tree we do ++ , because 0 is terminator
 	bufLen := len(buf)
 	if wordLen == 0 && bufLen != 0 {
@@ -998,7 +1011,7 @@ func (g *Getter) MatchCmpUncompressed(buf []byte) int {
 		return -1
 	}
 
-	g.nextPos(true)
+	g.nextPosClean()
 
 	return bytes.Compare(buf, g.data[g.dataP:g.dataP+wordLen])
 }
diff --git a/db/seg/decompress_bench_test.go b/db/seg/decompress_bench_test.go
@@ -25,8 +25,7 @@ import (
 )
 
 func BenchmarkDecompressNextBuf(b *testing.B) {
-	t := new(testing.T)
-	d := prepareDict(t, 1, 1_000)
+	d := prepareDict(b, 1, 1_000)
 	defer d.Close()
 	b.ReportAllocs()
 	var k []byte
@@ -43,8 +42,7 @@ func BenchmarkDecompressNextBuf(b *testing.B) {
 }
 
 func BenchmarkDecompressNextHeap(b *testing.B) {
-	t := new(testing.T)
-	d := prepareDict(t, 1, 1_000)
+	d := prepareDict(b, 1, 1_000)
 	defer d.Close()
 
 	b.ReportAllocs()
@@ -61,31 +59,30 @@ func BenchmarkDecompressNextHeap(b *testing.B) {
 }
 
 func BenchmarkDecompressSkip(b *testing.B) {
-	t := new(testing.T)
-	d := prepareDict(t, 1, 1_000)
+	d := prepareDict(b, 1, 1_000_000)
 	defer d.Close()
 
 	b.Run("skip", func(b *testing.B) {
 		b.ReportAllocs()
 		g := d.MakeGetter()
 		for b.Loop() {
-			_, _ = g.Skip()
-			if !g.HasNext() {
-				g.Reset(0)
+			g.Reset(0)
+			for g.HasNext() {
+				_, _ = g.Skip()
 			}
 		}
 	})
 
-	b.Run("matchcmp_non_existing_key", func(b *testing.B) {
-		b.ReportAllocs()
-		g := d.MakeGetter()
-		for b.Loop() {
-			_ = g.MatchCmp([]byte("longlongword"))
-			if !g.HasNext() {
-				g.Reset(0)
-			}
-		}
-	})
+	//b.Run("matchcmp_non_existing_key", func(b *testing.B) {
+	//	b.ReportAllocs()
+	//	g := d.MakeGetter()
+	//	for b.Loop() {
+	//		_ = g.MatchCmp([]byte("longlongword"))
+	//		if !g.HasNext() {
+	//			g.Reset(0)
+	//		}
+	//	}
+	//})
 }
 
 func BenchmarkDecompressTorrent(t *testing.B) {

Original file line number	Diff line number	Diff line change
`@@ -81,11 +81,11 @@ func checksum(file string) uint32 {`
`81`	`81`	`return hasher.Sum32()`
`82`	`82`	`}`
`83`	`83`
`84`		`-func prepareDict(t testing.T, multiplier int, keys int) Decompressor {`
	`84`	`+func prepareDict(t testing.TB, multiplier int, keys int) *Decompressor {`
`85`	`85`	`return prepareDictMetadata(t, multiplier, false, nil, keys)`
`86`	`86`	`}`
`87`	`87`
`88`		`-func prepareDictMetadata(t testing.T, multiplier int, hasMetadata bool, metadata []byte, keys int) Decompressor {`
	`88`	`+func prepareDictMetadata(t testing.TB, multiplier int, hasMetadata bool, metadata []byte, keys int) *Decompressor {`
`89`	`89`	`t.Helper()`
`90`	`90`	`logger := log.New()`
`91`	`91`	`tmpDir := t.TempDir()`