-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoperations.py
More file actions
110 lines (92 loc) · 3.01 KB
/
Copy pathoperations.py
File metadata and controls
110 lines (92 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""Benchmark operations for CRUD methods."""
import statistics
import time
from sqlite_vec_client import SQLiteVecClient
from .utils import benchmark_operation
def benchmark_add(
client: SQLiteVecClient,
texts: list[str],
embeddings: list[list[float]],
metadata: list[dict],
) -> dict:
"""Benchmark add operations."""
elapsed, rowids = benchmark_operation(
client.add, texts=texts, embeddings=embeddings, metadata=metadata
)
return {
"operation": "add",
"count": len(texts),
"time": elapsed,
"ops_per_sec": len(texts) / elapsed,
}
def benchmark_get_many(client: SQLiteVecClient, rowids: list[int]) -> dict:
"""Benchmark get_many operations."""
# Split into chunks to avoid SQLite variable limit (999)
chunk_size = 500
import time
start = time.perf_counter()
for i in range(0, len(rowids), chunk_size):
client.get_many(rowids[i : i + chunk_size])
elapsed = time.perf_counter() - start
return {
"operation": "get_many",
"count": len(rowids),
"time": elapsed,
"ops_per_sec": len(rowids) / elapsed,
}
def benchmark_similarity_search(
client: SQLiteVecClient, embedding: list[float], top_k: int, iterations: int
) -> dict:
"""Benchmark similarity search operations."""
times = []
for _ in range(iterations):
elapsed, _ = benchmark_operation(
client.similarity_search, embedding=embedding, top_k=top_k
)
times.append(elapsed)
avg_time = statistics.mean(times)
total_time = sum(times)
return {
"operation": "similarity_search",
"top_k": top_k,
"count": iterations,
"time": total_time,
"ops_per_sec": iterations / total_time,
"avg_time": avg_time,
"min_time": min(times),
"max_time": max(times),
}
def benchmark_update_many(
client: SQLiteVecClient, rowids: list[int], texts: list[str]
) -> dict:
"""Benchmark update_many operations."""
updates = [(rid, text, None, None) for rid, text in zip(rowids, texts)]
elapsed, count = benchmark_operation(client.update_many, updates)
return {
"operation": "update_many",
"count": count,
"time": elapsed,
"ops_per_sec": count / elapsed,
}
def benchmark_delete_many(client: SQLiteVecClient, rowids: list[int]) -> dict:
"""Benchmark delete_many operations."""
elapsed, count = benchmark_operation(client.delete_many, rowids)
return {
"operation": "delete_many",
"count": count,
"time": elapsed,
"ops_per_sec": count / elapsed,
}
def benchmark_get_all(
client: SQLiteVecClient, expected_count: int, batch_size: int
) -> dict:
"""Benchmark get_all operations."""
start = time.perf_counter()
count = sum(1 for _ in client.get_all(batch_size=batch_size))
elapsed = time.perf_counter() - start
return {
"operation": "get_all",
"count": count,
"time": elapsed,
"ops_per_sec": count / elapsed,
}