fix: use standard linear interpolation for Percentile (issue #92)

montanaflynn · montanaflynn · commit e317bbbdc703 · 2026-03-11T16:50:06.000-05:00
The previous algorithm used a non-standard formula that produced incorrect results. This switches to the NIST linear interpolation method, matching Excel PERCENTILE, Google Sheets, and NumPy. Fixes #92
diff --git a/data_test.go b/data_test.go
@@ -166,7 +166,7 @@ func assertPercentiles(fn func(i float64) (float64, error), i float64, f float64
 }
 
 func TestPercentileMethods(t *testing.T) {
-	assertPercentiles(data1.Percentile, 75, 4.2, t)
+	assertPercentiles(data1.Percentile, 75, 4.4, t)
 	assertPercentiles(data1.PercentileNearestRank, 75, 4.2, t)
 
 }
diff --git a/describe_test.go b/describe_test.go
@@ -77,7 +77,7 @@ func TestDescribeValues(t *testing.T) {
 
 func TestDescribeString(t *testing.T) {
 	describe, _ := stats.Describe([]float64{1.0, 2.0, 3.0}, true, &[]float64{25.0, 50.0, 75.0})
-	if describe.String(2) != "count\t3\nmean\t2.00\nstd\t0.82\nmax\t3.00\nmin\t1.00\n25.00%\t1.50\n50.00%\t1.50\n75.00%\t2.50\nNaN OK\ttrue" {
+	if describe.String(2) != "count\t3\nmean\t2.00\nstd\t0.82\nmax\t3.00\nmin\t1.00\n25.00%\t1.50\n50.00%\t2.00\n75.00%\t2.50\nNaN OK\ttrue" {
 		t.Errorf("String output is not correct")
 	}
 }
diff --git a/percentile.go b/percentile.go
@@ -4,7 +4,19 @@ import (
 	"math"
 )
 
-// Percentile finds the relative standing in a slice of floats
+// Percentile finds the relative standing in a slice of floats.
+//
+// The function uses the Linear Interpolation Between Closest Ranks method
+// as recommended by NIST [1] and used by Excel (PERCENTILE), Google Sheets,
+// NumPy (default), and other standard tools.
+//
+// Algorithm (for percent p and sorted data of length n):
+//
+//  1. Compute the rank: rank = (p / 100) * (n - 1)
+//  2. Split into integer part k and fractional part f
+//  3. Result = data[k] + f * (data[k+1] - data[k])
+//
+// [1] https://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
 func Percentile(input Float64Data, percent float64) (percentile float64, err error) {
 	length := input.Len()
 	if length == 0 {
@@ -22,30 +34,17 @@ func Percentile(input Float64Data, percent float64) (percentile float64, err err
 	// Start by sorting a copy of the slice
 	c := sortedCopy(input)
 
-	// Multiply percent by length of input
-	index := (percent / 100) * float64(len(c))
-
-	// Check if the index is a whole number
-	if index == float64(int64(index)) {
-
-		// Convert float to int
-		i := int(index)
-
-		// Find the value at the index
-		percentile = c[i-1]
+	// Use the standard linear interpolation method:
+	// rank = (percent / 100) * (n - 1)
+	// result = c[k] + f * (c[k+1] - c[k])
+	rank := (percent / 100) * float64(length-1)
+	k := int(rank)
+	f := rank - float64(k)
 
+	if k+1 < length {
+		percentile = c[k] + f*(c[k+1]-c[k])
 	} else {
-		// Convert float to int via truncation
-		i := int(index)
-
-		// When 0 < index < 1, i == 0 and c[i-1] would underflow.
-		// In this boundary case, return the midpoint of the first two values.
-		if i == 0 {
-			percentile, _ = Mean(Float64Data{c[0], c[1]})
-		} else {
-			// Find the average of the index and following values
-			percentile, _ = Mean(Float64Data{c[i-1], c[i]})
-		}
+		percentile = c[k]
 	}
 
 	return percentile, nil
diff --git a/percentile_test.go b/percentile_test.go
@@ -17,12 +17,12 @@ func TestPercentile(t *testing.T) {
 		t.Errorf("%.1f != %.1f", m, 43.0)
 	}
 	m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 50)
-	if m != 5.0 {
-		t.Errorf("%.1f != %.1f", m, 5.0)
+	if m != 5.5 {
+		t.Errorf("%.1f != %.1f", m, 5.5)
 	}
 	m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 99.9)
-	if m != 9.5 {
-		t.Errorf("%.1f != %.1f", m, 9.5)
+	if !close(m, 9.991) {
+		t.Errorf("%v != %v", m, 9.991)
 	}
 	m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 100)
 	if m != 10.0 {
@@ -40,8 +40,8 @@ func TestPercentile(t *testing.T) {
 	if err != nil {
 		t.Errorf("Too low percent shouldn't return an error; got %v", err)
 	}
-	if m != 1.5 {
-		t.Errorf("%.1f != %.1f", m, 1.5)
+	if !close(m, 1.0052) {
+		t.Errorf("%v != %v", m, 1.0052)
 	}
 	_, err = stats.Percentile([]float64{1, 2, 3, 4, 5}, 101)
 	if err != stats.BoundsErr {
@@ -70,6 +70,25 @@ func TestPercentile_Issue88_ThreeValuesQ1(t *testing.T) {
 	}
 }
 
+func TestPercentile_Issue92(t *testing.T) {
+	data := []float64{20.737737800911837, 59.05787249563947, 16.547458636949685, 78.6771074284816}
+
+	m, _ := stats.Percentile(data, 50)
+	if !close(m, 39.89780514827565) {
+		t.Errorf("%v != %v", m, 39.89780514827565)
+	}
+
+	m, _ = stats.Percentile(data, 90)
+	if !close(m, 72.79133694862897) {
+		t.Errorf("%v != %v", m, 72.79133694862897)
+	}
+
+	m, _ = stats.Percentile(data, 95)
+	if !close(m, 75.7342221885553) {
+		t.Errorf("%v != %v", m, 75.7342221885553)
+	}
+}
+
 func TestPercentileSortSideEffects(t *testing.T) {
 	s := []float64{43, 54, 56, 44, 62, 66}
 	a := []float64{43, 54, 56, 44, 62, 66}

Original file line number	Diff line number	Diff line change
`@@ -166,7 +166,7 @@ func assertPercentiles(fn func(i float64) (float64, error), i float64, f float64`
`166`	`166`	`}`
`167`	`167`
`168`	`168`	`func TestPercentileMethods(t *testing.T) {`
`169`		`- assertPercentiles(data1.Percentile, 75, 4.2, t)`
	`169`	`+ assertPercentiles(data1.Percentile, 75, 4.4, t)`
`170`	`170`	`assertPercentiles(data1.PercentileNearestRank, 75, 4.2, t)`
`171`	`171`
`172`	`172`	`}`
Original file line number	Diff line number	Diff line change
`@@ -77,7 +77,7 @@ func TestDescribeValues(t *testing.T) {`
`77`	`77`
`78`	`78`	`func TestDescribeString(t *testing.T) {`
`79`	`79`	`describe, _ := stats.Describe([]float64{1.0, 2.0, 3.0}, true, &[]float64{25.0, 50.0, 75.0})`
`80`		`- if describe.String(2) != "count\t3\nmean\t2.00\nstd\t0.82\nmax\t3.00\nmin\t1.00\n25.00%\t1.50\n50.00%\t1.50\n75.00%\t2.50\nNaN OK\ttrue" {`
	`80`	`+ if describe.String(2) != "count\t3\nmean\t2.00\nstd\t0.82\nmax\t3.00\nmin\t1.00\n25.00%\t1.50\n50.00%\t2.00\n75.00%\t2.50\nNaN OK\ttrue" {`
`81`	`81`	`t.Errorf("String output is not correct")`
`82`	`82`	`}`
`83`	`83`	`}`