improve defaultSize adjustment for rare case when next few j > 0 p.calls after sort (==> a) have a[j].calls ~= a[0].calls and some of them a[j].size > defaultSize (== a[0].size) + added test TestPoolCalibrateWithAdjustment

Illirgway · Illirgway · commit 9ccb276166ca · 2021-05-02T22:08:49.000+03:00
+ also replaced floating-point arithmetic with integer muldiv equivalent
+ fix tests fn allocNBytes()
+ some microoptimizations
diff --git a/pool.go b/pool.go
@@ -15,6 +15,26 @@ const (
 
 	calibrateCallsThreshold = 42000
 	maxPercentile           = 0.95
+
+	callsSumMaxValue = steps * calibrateCallsThreshold
+
+	fractionDenominator = uint64(100) // denominator of regular fractions
+
+	// regular fraction of maxPercentile
+	maxPercentileRNumer = uint64(maxPercentile * float64(fractionDenominator)) // numerator of maxPercentile
+	maxPercentileGcd    = uint64(5)                                            // gcd(maxPercentileRNumer, fractionDenominator) = gcd(int(maxPercentile * 100), 100)
+	maxPercentileNumer  = maxPercentileRNumer / maxPercentileGcd               // simplified numerator of maxPercentile
+	maxPercentileDenom  = fractionDenominator / maxPercentileGcd               // simplified denominator of maxPercentile
+
+	// allowable size spread for DefaultSize additional adjustment
+	calibrateDefaultSizeAdjustmentsSpread = 0.05                                      // down to 5% of initial DefaultSize` calls count
+	calibrateDefaultSizeAdjustmentsFactor = 1 - calibrateDefaultSizeAdjustmentsSpread // see calibrate() below
+
+	// regular fraction of calibrateDefaultSizeAdjustmentsFactor
+	calibrateDefaultSizeAdjustmentsFactorRNumer = uint64(calibrateDefaultSizeAdjustmentsFactor * float64(fractionDenominator)) // numerator of calibrateDefaultSizeAdjustmentsFactor
+	calibrateDSASGcd                            = uint64(5)                                                                    // gcd(calibrateDefaultSizeAdjustmentsFactorRNumer, fractionDenominator)
+	calibrateDefaultSizeAdjustmentsFactorNumer  = calibrateDefaultSizeAdjustmentsFactorRNumer / calibrateDSASGcd               // simplified numerator of calibrateDefaultSizeAdjustmentsFactor
+	calibrateDefaultSizeAdjustmentsFactorDenom  = fractionDenominator / calibrateDSASGcd                                       // simplified denominator of calibrateDefaultSizeAdjustmentsFactor
 )
 
 // Pool represents byte buffer pool.
@@ -84,7 +104,9 @@ func (p *Pool) calibrate() {
 	}
 
 	a := make(callSizes, 0, steps)
-	var callsSum uint64
+
+	callsSum := uint64(0)
+
 	for i := uint64(0); i < steps; i++ {
 		calls := atomic.SwapUint64(&p.calls[i], 0)
 		callsSum += calls
@@ -98,17 +120,43 @@ func (p *Pool) calibrate() {
 	defaultSize := a[0].size
 	maxSize := defaultSize
 
-	maxSum := uint64(float64(callsSum) * maxPercentile)
-	callsSum = 0
-	for i := 0; i < steps; i++ {
+	// callsSum <= steps * calibrateCallsThreshold + maybe small R = callsSumMaxValue + R <<<< (MaxUint64 / fractionDenominator),
+	// maxPercentileNumer < fractionDenominator, therefore, integer multiplication by a fraction can be used without overflow
+	maxSum := (callsSum * maxPercentileNumer) / maxPercentileDenom // == uint64(callsSum * maxPercentile)
+
+	// avoid visiting a[0] one more times in `for` loop below
+	callsSum = a[0].calls
+
+	// defaultSize adjust cond:
+	//     ( abs(a[0].calls - a[i].calls) < a[0].calls * calibrateDefaultSizeAdjustmentsSpread ) && ( defaultSize < a[i].size )
+	// due to fact that a is sorted by calls desc,
+	// abs(a[0].calls - a[i].calls) === a[0].calls - a[i].calls ==>
+	// a[0].calls - a[i].calls < a[0].calls * calibrateDefaultSizeAdjustmentsSpread ==>
+	// a[0].calls - a[0].calls * calibrateDefaultSizeAdjustmentsSpread < a[i].calls ==>
+	// a[i].calls > a[0].calls * (1 - calibrateDefaultSizeAdjustmentsSpread) ==>
+	// a[i].calls > a[0].calls * calibrateDefaultSizeAdjustmentsFactor
+	// and we can pre-calculate a[0].calls * calibrateDefaultSizeAdjustmentsFactor
+
+	// a[0].calls ~= calibrateCallsThreshold + maybe small R <<<< (MaxUint64 / fractionDenominator)
+	defSizeAdjustCallsThreshold := (a[0].calls * calibrateDefaultSizeAdjustmentsFactorNumer) / calibrateDefaultSizeAdjustmentsFactorDenom // == uint64(a[0].calls * calibrateDefaultSizeAdjustmentsFactor)
+
+	for i := 1; i < steps; i++ {
+
 		if callsSum > maxSum {
 			break
 		}
-		callsSum += a[i].calls
+
 		size := a[i].size
+
+		if (a[i].calls > defSizeAdjustCallsThreshold) && (size > defaultSize) {
+			defaultSize = size
+		}
+
 		if size > maxSize {
 			maxSize = size
 		}
+
+		callsSum += a[i].calls
 	}
 
 	atomic.StoreUint64(&p.defaultSize, defaultSize)
diff --git a/pool_test.go b/pool_test.go
@@ -1,6 +1,7 @@
 package bytebufferpool
 
 import (
+	"math/bits"
 	"math/rand"
 	"testing"
 	"time"
@@ -40,6 +41,43 @@ func TestPoolCalibrate(t *testing.T) {
 	}
 }
 
+func TestPoolCalibrateWithAdjustment(t *testing.T) {
+
+	var p Pool
+
+	const n = 510
+
+	adjN := n << 2
+
+	// smaller buffer
+	allocNBytesMtimes(&p, n, calibrateCallsThreshold-10)
+
+	// t.Log(p.calls)
+
+	// never trigger calibrate, never used as adjustment for defaultSize
+	for i, s := 0, adjN<<4; i < calibrateCallsThreshold>>1; i++ {
+		v := s + rand.Intn(maxSize)
+		allocNBytesInP(&p, v)
+	}
+
+	// larger buffer
+	allocNBytesMtimes(&p, adjN, calibrateCallsThreshold-10)
+
+	// t.Log(p.calls)
+
+	// now throw away existing larger buf from pool
+	_ = p.Get()
+
+	// ... and now finish with new smaller buf (emulate a long process that uses it)
+	allocNBytesMtimes(&p, n, 11)
+
+	// t.Logf("%#v", p)
+
+	if v := powOfTwo64(uint64(adjN)); v != p.defaultSize {
+		t.Fatalf("wrong pool final defaultSize: want %d, got %d", v, p.defaultSize)
+	}
+}
+
 func TestPoolVariousSizesSerial(t *testing.T) {
 	testPoolVariousSizes(t)
 }
@@ -62,6 +100,18 @@ func TestPoolVariousSizesConcurrent(t *testing.T) {
 	}
 }
 
+//go:noinline
+func TestIntArithmetic(t *testing.T) {
+
+	if float64(maxPercentileNumer) != (float64(maxPercentile) * float64(maxPercentileDenom)) {
+		t.Fatalf("wrong maxPercentile interpolation: want %f, got %f", maxPercentile, float64(maxPercentileNumer)/float64(maxPercentileDenom))
+	}
+
+	if float64(calibrateDefaultSizeAdjustmentsFactorNumer) != (float64(calibrateDefaultSizeAdjustmentsFactor) * float64(calibrateDefaultSizeAdjustmentsFactorDenom)) {
+		t.Fatalf("wrong maxPercentile interpolation: want %f, got %f", calibrateDefaultSizeAdjustmentsFactor, float64(calibrateDefaultSizeAdjustmentsFactorNumer)/float64(calibrateDefaultSizeAdjustmentsFactorDenom))
+	}
+}
+
 func testPoolVariousSizes(t *testing.T) {
 	for i := 0; i < steps+1; i++ {
 		n := (1 << uint32(i))
@@ -90,5 +140,46 @@ func allocNBytes(dst []byte, n int) []byte {
 	if diff <= 0 {
 		return dst[:n]
 	}
-	return append(dst, make([]byte, diff)...)
+	// must return buffer with len == requested size n, not `n - cap(dst)`
+	return append(dst[:cap(dst)], make([]byte, diff)...)
+}
+
+func allocNBytesInP(p *Pool, n int) {
+	b := p.Get()
+	b.B = allocNBytes(b.B, n)
+	p.Put(b)
+}
+
+func allocNBytesMtimes(p *Pool, n, limit int) {
+	for i := 0; i < limit; i++ {
+		allocNBytesInP(p, n)
+	}
+}
+
+// 2^z >= n with min(z)
+func powOfTwo64(n uint64) uint64 {
+	// ((n - 1) & n) - remove the leftmost one bit, 2^k ==> 0, 0 ==> 0, others > 0
+	// ((n - 1) & n) >> 1 - place for sign to avoid overflow, 2^k ==> 0, 0 ==> 0, others > 0
+	// ^(((n - 1) & n) >> 1) - invert result, 2^k ==> uint64(-1), 0 ==> uint64(-1), others < -1
+	// (^(((n - 1) & n) >> 1) + 1) - for 2^k ==> 0, 0 ==> 0, others < 0
+	// uint(^(((n - 1) & n) >> 1) + 1 - z) >> 63 - got sign of result as leftmost bit, 2^k -> 0, 0 -> 0, others -> 1
+	a := int(uint64(^(((n-1)&n)>>1)+1) >> 63)
+	z := int(((n - 1) &^ n) >> 63) // 0 -> 1, others -> 0
+	return 1 << uint(bits.Len64(n)-1+z+a)
+}
+
+func allocNMBytesInP(p *Pool, n, m int) {
+	// ATN! preserve order, its important
+	bn := p.Get()
+	bm := p.Get()
+	bn.B = allocNBytes(bn.B, n)
+	bm.B = allocNBytes(bm.B, m)
+	p.Put(bn)
+	p.Put(bm)
+}
+
+func allocNMBytesXtimes(p *Pool, n, m int, limit int) {
+	for i := 0; i < limit; i++ {
+		allocNMBytesInP(p, n, m)
+	}
 }