-
-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathbench_test.go
More file actions
100 lines (88 loc) · 2.33 KB
/
bench_test.go
File metadata and controls
100 lines (88 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package contextlab
import (
"math/rand"
"testing"
"github.com/Siddhant-K-code/distill/pkg/types"
)
// deterministicEmbedding generates a reproducible embedding for a given seed.
// Using a fixed seed ensures benchmark results are stable across runs.
func deterministicEmbedding(seed int64, dims int) []float32 {
rng := rand.New(rand.NewSource(seed))
v := make([]float32, dims)
for i := range v {
v[i] = rng.Float32()
}
return v
}
// makeBenchChunks builds n chunks with deterministic embeddings.
func makeBenchChunks(n, dims int) []types.Chunk {
chunks := make([]types.Chunk, n)
for i := range chunks {
chunks[i] = types.Chunk{
ID: string(rune('A'+i%26)) + string(rune('0'+i/26%10)),
Text: "benchmark chunk content for semantic deduplication testing",
Embedding: deterministicEmbedding(int64(i), dims),
}
}
return chunks
}
func BenchmarkCluster_10Chunks(b *testing.B) {
chunks := makeBenchChunks(10, 128)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ClusterByThreshold(chunks, 0.15)
}
}
func BenchmarkCluster_50Chunks(b *testing.B) {
chunks := makeBenchChunks(50, 128)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ClusterByThreshold(chunks, 0.15)
}
}
func BenchmarkCluster_100Chunks(b *testing.B) {
chunks := makeBenchChunks(100, 128)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ClusterByThreshold(chunks, 0.15)
}
}
func BenchmarkCluster_500Chunks(b *testing.B) {
chunks := makeBenchChunks(500, 128)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ClusterByThreshold(chunks, 0.15)
}
}
func BenchmarkMMR_10Chunks(b *testing.B) {
chunks := makeBenchChunks(10, 128)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = MMRRerank(chunks, 0.7, 5)
}
}
func BenchmarkMMR_50Chunks(b *testing.B) {
chunks := makeBenchChunks(50, 128)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = MMRRerank(chunks, 0.7, 10)
}
}
func BenchmarkSelector_10Clusters(b *testing.B) {
chunks := makeBenchChunks(10, 128)
result := ClusterByThreshold(chunks, 0.15)
sel := NewSelector(DefaultSelectorConfig())
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = sel.Select(result)
}
}
func BenchmarkSelector_50Clusters(b *testing.B) {
chunks := makeBenchChunks(50, 128)
result := ClusterByThreshold(chunks, 0.15)
sel := NewSelector(DefaultSelectorConfig())
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = sel.Select(result)
}
}