Skip to content

Commit 7432121

Browse files
authored
Merge pull request #8 from kgrygiel/master
Base Recommender model classes: Histogram and CircularBuffer.
2 parents 1358f0c + 491e643 commit 7432121

File tree

6 files changed

+563
-0
lines changed

6 files changed

+563
-0
lines changed
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
Copyright 2017 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package util
18+
19+
// Histogram represents an approximate distribution of some variable.
20+
type Histogram interface {
21+
// Returns an approximation of the given percentile of the distribution.
22+
// Note: the argument passed to Percentile() is a number between
23+
// 0 and 1. For example 0.5 corresponds to the median and 0.9 to the
24+
// 90th percentile.
25+
// If the histogram is empty, Percentile() returns 0.0.
26+
Percentile(percentile float64) float64
27+
28+
// Add a sample with a given value and weight.
29+
AddSample(value float64, weight float64)
30+
31+
// Remove a sample with a given value and weight. Note that the total
32+
// weight of samples with a given value cannot be negative.
33+
SubtractSample(value float64, weight float64)
34+
35+
// Returns true if the histogram is empty.
36+
IsEmpty() bool
37+
}
38+
39+
// NewHistogram returns a new Histogram instance using given options.
40+
func NewHistogram(options HistogramOptions) Histogram {
41+
return &histogram{
42+
&options, make([]float64, options.NumBuckets()), 0.0,
43+
options.NumBuckets() - 1, 0}
44+
}
45+
46+
// Simple bucket-based implementation of the Histogram interface. Each bucket
47+
// holds the total weight of samples that belong to it.
48+
// Percentile() returns the middle of the correspodning bucket.
49+
// Resolution (bucket boundaries) of the histogram depends on the options.
50+
// There's no interpolation within buckets (i.e. one sample falls to exactly one
51+
// bucket).
52+
// A bucket is considered empty if its weight is smaller than options.Epsilon().
53+
type histogram struct {
54+
// Bucketing scheme.
55+
options *HistogramOptions
56+
// Cumulative weight of samples in each bucket.
57+
bucketWeight []float64
58+
// Total cumulative weight of samples in all buckets.
59+
totalWeight float64
60+
// Index of the first non-empty bucket if there's any. Otherwise index
61+
// of the last bucket.
62+
minBucket int
63+
// Index of the last non-empty bucket if there's any. Otherwise 0.
64+
maxBucket int
65+
}
66+
67+
func (h *histogram) AddSample(value float64, weight float64) {
68+
if weight < 0.0 {
69+
panic("sample weight must be non-negative")
70+
}
71+
bucket := (*h.options).FindBucket(value)
72+
h.bucketWeight[bucket] += weight
73+
h.totalWeight += weight
74+
if bucket < h.minBucket {
75+
h.minBucket = bucket
76+
}
77+
if bucket > h.maxBucket {
78+
h.maxBucket = bucket
79+
}
80+
}
81+
func (h *histogram) SubtractSample(value float64, weight float64) {
82+
if weight < 0.0 {
83+
panic("sample weight must be non-negative")
84+
}
85+
bucket := (*h.options).FindBucket(value)
86+
epsilon := (*h.options).Epsilon()
87+
if weight > h.bucketWeight[bucket]-epsilon {
88+
weight = h.bucketWeight[bucket]
89+
}
90+
h.totalWeight -= weight
91+
h.bucketWeight[bucket] -= weight
92+
lastBucket := (*h.options).NumBuckets() - 1
93+
for h.bucketWeight[h.minBucket] < epsilon && h.minBucket < lastBucket {
94+
h.minBucket++
95+
}
96+
for h.bucketWeight[h.maxBucket] < epsilon && h.maxBucket > 0 {
97+
h.maxBucket--
98+
}
99+
}
100+
101+
func (h *histogram) Percentile(percentile float64) float64 {
102+
if h.IsEmpty() {
103+
return 0.0
104+
}
105+
partialSum := 0.0
106+
threshold := percentile * h.totalWeight
107+
bucket := h.minBucket
108+
for ; bucket < h.maxBucket; bucket++ {
109+
partialSum += h.bucketWeight[bucket]
110+
if partialSum >= threshold {
111+
break
112+
}
113+
}
114+
bucketStart := (*h.options).GetBucketStart(bucket)
115+
if bucket < (*h.options).NumBuckets()-1 {
116+
// Return the middle point between the bucket boundaries.
117+
bucketEnd := (*h.options).GetBucketStart(bucket + 1)
118+
return (bucketStart + bucketEnd) / 2.0
119+
}
120+
// Return the start of the last bucket (note that the last bucket
121+
// doesn't have an upper bound).
122+
return bucketStart
123+
}
124+
125+
func (h *histogram) IsEmpty() bool {
126+
return h.bucketWeight[h.minBucket] < (*h.options).Epsilon()
127+
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
Copyright 2017 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package util
18+
19+
import (
20+
"errors"
21+
"fmt"
22+
"math"
23+
)
24+
25+
// HistogramOptions define the number and size of buckets of a histogram.
26+
type HistogramOptions interface {
27+
// Returns the number of buckets in the histogram.
28+
NumBuckets() int
29+
// Returns the index of the bucket to which the given value falls.
30+
// If the value is outside of the range covered by the histogram, it
31+
// returns the closest bucket (either the first or the last one).
32+
FindBucket(value float64) int
33+
// Returns the start of the bucket with a given index. If the index is
34+
// outside the [0..NumBuckets() - 1] range, the result is undefined.
35+
GetBucketStart(bucket int) float64
36+
// Returns the minimum weight for a bucket to be considered non-empty.
37+
Epsilon() float64
38+
}
39+
40+
// NewLinearHistogramOptions returns HistogramOptions describing a histogram
41+
// with a given number of fixed-size buckets, with the first bucket start at 0.0
42+
// and the last bucket start larger or equal to maxValue.
43+
// Requires maxValue > 0, bucketSize > 0, epsilon > 0.
44+
func NewLinearHistogramOptions(
45+
maxValue float64, bucketSize float64, epsilon float64) (HistogramOptions, error) {
46+
if maxValue <= 0.0 || bucketSize <= 0.0 || epsilon <= 0.0 {
47+
return nil, errors.New("maxValue and bucketSize must both be positive")
48+
}
49+
numBuckets := int(math.Ceil(maxValue/bucketSize)) + 1
50+
return &linearHistogramOptions{numBuckets, bucketSize, epsilon}, nil
51+
}
52+
53+
// NewExponentialHistogramOptions returns HistogramOptions describing a
54+
// histogram with exponentially growing bucket boundaries. The first bucket
55+
// covers the range [0..firstBucketSize). Consecutive buckets are of the form
56+
// [x(n)..x(n) * ratio) for n = 1 .. numBuckets - 1.
57+
// The last bucket start is larger or equal to maxValue.
58+
// Requires maxValue > 0, firstBucketSize > 0, ratio > 1, epsilon > 0.
59+
func NewExponentialHistogramOptions(
60+
maxValue float64, firstBucketSize float64, ratio float64, epsilon float64) (HistogramOptions, error) {
61+
if maxValue <= 0.0 || firstBucketSize <= 0.0 || ratio <= 1.0 || epsilon <= 0.0 {
62+
return nil, errors.New(
63+
"maxValue, firstBucketSize and epsilon must be > 0.0, ratio must be > 1.0")
64+
}
65+
numBuckets := int(math.Ceil(math.Log(maxValue/firstBucketSize)/math.Log(ratio))) + 2
66+
return &exponentialHistogramOptions{numBuckets, firstBucketSize, ratio, epsilon}, nil
67+
}
68+
69+
type linearHistogramOptions struct {
70+
numBuckets int
71+
bucketSize float64
72+
epsilon float64
73+
}
74+
75+
type exponentialHistogramOptions struct {
76+
numBuckets int
77+
firstBucketSize float64
78+
ratio float64
79+
epsilon float64
80+
}
81+
82+
func (o *linearHistogramOptions) NumBuckets() int {
83+
return o.numBuckets
84+
}
85+
86+
func (o *linearHistogramOptions) FindBucket(value float64) int {
87+
bucket := int(value / o.bucketSize)
88+
if bucket < 0 {
89+
return 0
90+
}
91+
if bucket >= o.numBuckets {
92+
return o.numBuckets - 1
93+
}
94+
return bucket
95+
}
96+
97+
func (o *linearHistogramOptions) GetBucketStart(bucket int) float64 {
98+
if bucket < 0 || bucket >= o.numBuckets {
99+
panic(fmt.Sprintf("index %d out of range [0..%d]", bucket, o.numBuckets-1))
100+
}
101+
return float64(bucket) * o.bucketSize
102+
}
103+
104+
func (o *linearHistogramOptions) Epsilon() float64 {
105+
return o.epsilon
106+
}
107+
108+
func (o *exponentialHistogramOptions) NumBuckets() int {
109+
return o.numBuckets
110+
}
111+
112+
func (o *exponentialHistogramOptions) FindBucket(value float64) int {
113+
if value < o.firstBucketSize {
114+
return 0
115+
}
116+
bucket := int(math.Log(value/o.firstBucketSize)/math.Log(o.ratio)) + 1
117+
if bucket >= o.numBuckets {
118+
return o.numBuckets - 1
119+
}
120+
return bucket
121+
}
122+
123+
func (o *exponentialHistogramOptions) GetBucketStart(bucket int) float64 {
124+
if bucket < 0 || bucket >= o.numBuckets {
125+
panic(fmt.Sprintf("index %d out of range [0..%d]", bucket, o.numBuckets-1))
126+
}
127+
if bucket == 0 {
128+
return 0.0
129+
}
130+
return o.firstBucketSize * math.Pow(o.ratio, float64(bucket-1))
131+
}
132+
133+
func (o *exponentialHistogramOptions) Epsilon() float64 {
134+
return o.epsilon
135+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
Copyright 2017 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package util
18+
19+
import (
20+
"testing"
21+
22+
"github.com/stretchr/testify/assert"
23+
)
24+
25+
var (
26+
epsilon = 0.001
27+
)
28+
29+
// Test all methods of LinearHistogramOptions using a sample bucketing scheme.
30+
func TestLinearHistogramOptions(t *testing.T) {
31+
o, err := NewLinearHistogramOptions(5.0, 0.3, epsilon)
32+
assert.Nil(t, err)
33+
assert.Equal(t, epsilon, o.Epsilon())
34+
assert.Equal(t, 18, o.NumBuckets())
35+
36+
assert.Equal(t, 0.0, o.GetBucketStart(0))
37+
assert.Equal(t, 5.1, o.GetBucketStart(17))
38+
39+
assert.Equal(t, 0, o.FindBucket(-1.0))
40+
assert.Equal(t, 0, o.FindBucket(0.0))
41+
assert.Equal(t, 4, o.FindBucket(1.3))
42+
assert.Equal(t, 17, o.FindBucket(100.0))
43+
}
44+
45+
// Test all methods of ExponentialHistogramOptions using a sample bucketing scheme.
46+
func TestExponentialHistogramOptions(t *testing.T) {
47+
o, err := NewExponentialHistogramOptions(100.0, 10.0, 2.0, epsilon)
48+
assert.Nil(t, err)
49+
assert.Equal(t, epsilon, o.Epsilon())
50+
assert.Equal(t, 6, o.NumBuckets())
51+
52+
assert.Equal(t, 0.0, o.GetBucketStart(0))
53+
assert.Equal(t, 10.0, o.GetBucketStart(1))
54+
assert.Equal(t, 20.0, o.GetBucketStart(2))
55+
assert.Equal(t, 40.0, o.GetBucketStart(3))
56+
assert.Equal(t, 80.0, o.GetBucketStart(4))
57+
assert.Equal(t, 160.0, o.GetBucketStart(5))
58+
59+
assert.Equal(t, 0, o.FindBucket(-1.0))
60+
assert.Equal(t, 0, o.FindBucket(9.99))
61+
assert.Equal(t, 1, o.FindBucket(10.0))
62+
assert.Equal(t, 2, o.FindBucket(20.0))
63+
assert.Equal(t, 5, o.FindBucket(200.0))
64+
}

0 commit comments

Comments
 (0)