Skip to content

Commit e317bbb

Browse files
committed
fix: use standard linear interpolation for Percentile (issue #92)
The previous algorithm used a non-standard formula that produced incorrect results. This switches to the NIST linear interpolation method, matching Excel PERCENTILE, Google Sheets, and NumPy. Fixes #92
1 parent 8743180 commit e317bbb

4 files changed

Lines changed: 49 additions & 31 deletions

File tree

data_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ func assertPercentiles(fn func(i float64) (float64, error), i float64, f float64
166166
}
167167

168168
func TestPercentileMethods(t *testing.T) {
169-
assertPercentiles(data1.Percentile, 75, 4.2, t)
169+
assertPercentiles(data1.Percentile, 75, 4.4, t)
170170
assertPercentiles(data1.PercentileNearestRank, 75, 4.2, t)
171171

172172
}

describe_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ func TestDescribeValues(t *testing.T) {
7777

7878
func TestDescribeString(t *testing.T) {
7979
describe, _ := stats.Describe([]float64{1.0, 2.0, 3.0}, true, &[]float64{25.0, 50.0, 75.0})
80-
if describe.String(2) != "count\t3\nmean\t2.00\nstd\t0.82\nmax\t3.00\nmin\t1.00\n25.00%\t1.50\n50.00%\t1.50\n75.00%\t2.50\nNaN OK\ttrue" {
80+
if describe.String(2) != "count\t3\nmean\t2.00\nstd\t0.82\nmax\t3.00\nmin\t1.00\n25.00%\t1.50\n50.00%\t2.00\n75.00%\t2.50\nNaN OK\ttrue" {
8181
t.Errorf("String output is not correct")
8282
}
8383
}

percentile.go

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,19 @@ import (
44
"math"
55
)
66

7-
// Percentile finds the relative standing in a slice of floats
7+
// Percentile finds the relative standing in a slice of floats.
8+
//
9+
// The function uses the Linear Interpolation Between Closest Ranks method
10+
// as recommended by NIST [1] and used by Excel (PERCENTILE), Google Sheets,
11+
// NumPy (default), and other standard tools.
12+
//
13+
// Algorithm (for percent p and sorted data of length n):
14+
//
15+
// 1. Compute the rank: rank = (p / 100) * (n - 1)
16+
// 2. Split into integer part k and fractional part f
17+
// 3. Result = data[k] + f * (data[k+1] - data[k])
18+
//
19+
// [1] https://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
820
func Percentile(input Float64Data, percent float64) (percentile float64, err error) {
921
length := input.Len()
1022
if length == 0 {
@@ -22,30 +34,17 @@ func Percentile(input Float64Data, percent float64) (percentile float64, err err
2234
// Start by sorting a copy of the slice
2335
c := sortedCopy(input)
2436

25-
// Multiply percent by length of input
26-
index := (percent / 100) * float64(len(c))
27-
28-
// Check if the index is a whole number
29-
if index == float64(int64(index)) {
30-
31-
// Convert float to int
32-
i := int(index)
33-
34-
// Find the value at the index
35-
percentile = c[i-1]
37+
// Use the standard linear interpolation method:
38+
// rank = (percent / 100) * (n - 1)
39+
// result = c[k] + f * (c[k+1] - c[k])
40+
rank := (percent / 100) * float64(length-1)
41+
k := int(rank)
42+
f := rank - float64(k)
3643

44+
if k+1 < length {
45+
percentile = c[k] + f*(c[k+1]-c[k])
3746
} else {
38-
// Convert float to int via truncation
39-
i := int(index)
40-
41-
// When 0 < index < 1, i == 0 and c[i-1] would underflow.
42-
// In this boundary case, return the midpoint of the first two values.
43-
if i == 0 {
44-
percentile, _ = Mean(Float64Data{c[0], c[1]})
45-
} else {
46-
// Find the average of the index and following values
47-
percentile, _ = Mean(Float64Data{c[i-1], c[i]})
48-
}
47+
percentile = c[k]
4948
}
5049

5150
return percentile, nil

percentile_test.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ func TestPercentile(t *testing.T) {
1717
t.Errorf("%.1f != %.1f", m, 43.0)
1818
}
1919
m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 50)
20-
if m != 5.0 {
21-
t.Errorf("%.1f != %.1f", m, 5.0)
20+
if m != 5.5 {
21+
t.Errorf("%.1f != %.1f", m, 5.5)
2222
}
2323
m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 99.9)
24-
if m != 9.5 {
25-
t.Errorf("%.1f != %.1f", m, 9.5)
24+
if !close(m, 9.991) {
25+
t.Errorf("%v != %v", m, 9.991)
2626
}
2727
m, _ = stats.Percentile([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 100)
2828
if m != 10.0 {
@@ -40,8 +40,8 @@ func TestPercentile(t *testing.T) {
4040
if err != nil {
4141
t.Errorf("Too low percent shouldn't return an error; got %v", err)
4242
}
43-
if m != 1.5 {
44-
t.Errorf("%.1f != %.1f", m, 1.5)
43+
if !close(m, 1.0052) {
44+
t.Errorf("%v != %v", m, 1.0052)
4545
}
4646
_, err = stats.Percentile([]float64{1, 2, 3, 4, 5}, 101)
4747
if err != stats.BoundsErr {
@@ -70,6 +70,25 @@ func TestPercentile_Issue88_ThreeValuesQ1(t *testing.T) {
7070
}
7171
}
7272

73+
func TestPercentile_Issue92(t *testing.T) {
74+
data := []float64{20.737737800911837, 59.05787249563947, 16.547458636949685, 78.6771074284816}
75+
76+
m, _ := stats.Percentile(data, 50)
77+
if !close(m, 39.89780514827565) {
78+
t.Errorf("%v != %v", m, 39.89780514827565)
79+
}
80+
81+
m, _ = stats.Percentile(data, 90)
82+
if !close(m, 72.79133694862897) {
83+
t.Errorf("%v != %v", m, 72.79133694862897)
84+
}
85+
86+
m, _ = stats.Percentile(data, 95)
87+
if !close(m, 75.7342221885553) {
88+
t.Errorf("%v != %v", m, 75.7342221885553)
89+
}
90+
}
91+
7392
func TestPercentileSortSideEffects(t *testing.T) {
7493
s := []float64{43, 54, 56, 44, 62, 66}
7594
a := []float64{43, 54, 56, 44, 62, 66}

0 commit comments

Comments
 (0)