-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathbench_s3vfs.py
More file actions
365 lines (315 loc) · 11.8 KB
/
Copy pathbench_s3vfs.py
File metadata and controls
365 lines (315 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/usr/bin/env python3
"""
Cold-latency benchmark for sqlite-s3vfs.
Mirrors the turbolite tiered-bench queries and parameter generation so the
numbers are comparable. sqlite-s3vfs stores each SQLite page as a separate S3
object, so this is the "naive one-object-per-page" baseline.
"""
import argparse
import os
import statistics
import time
from typing import Callable, List
import apsw
import boto3
import sqlite_s3vfs
# ---------------------------------------------------------------------------
# Parameter generation: exact port of Rust phash() used by tiered-bench.rs
# ---------------------------------------------------------------------------
def phash(seed: int) -> int:
x = seed & 0xFFFFFFFFFFFFFFFF
x = (x * 6364136223846793005 + 1442695040888963407) & 0xFFFFFFFFFFFFFFFF
x ^= x >> 33
x = (x * 0xFF51AFD7ED558CCD) & 0xFFFFFFFFFFFFFFFF
x ^= x >> 33
return x
def make_param_fn(n_posts: int, n_users: int, query: str) -> Callable[[int], tuple]:
def post(i: int) -> tuple:
pid = phash(i + 500) % n_posts
return (pid,)
def profile(i: int) -> tuple:
uid = phash(i + 100) % n_users
return (uid,)
def who_liked(i: int) -> tuple:
pid = phash(i + 200) % n_posts
return (pid,)
def mutual(i: int) -> tuple:
a = phash(i + 300) % n_users
b = phash(i + 400) % n_users
return (a, b)
def idx_filter(i: int) -> tuple:
uid = phash(i + 600) % n_users
return (uid,)
def scan_filter(i: int) -> tuple:
threshold = phash(i + 700) % 50
return (threshold,)
return {
"post": post,
"profile": profile,
"who-liked": who_liked,
"mutual": mutual,
"idx-filter": idx_filter,
"scan-filter": scan_filter,
}[query]
# ---------------------------------------------------------------------------
# Queries (must match tiered-bench.rs)
# ---------------------------------------------------------------------------
QUERIES = {
"post": """
SELECT posts.id, posts.content, posts.created_at, posts.like_count,
users.first_name, users.last_name, users.school, users.city
FROM posts
JOIN users ON users.id = posts.user_id
WHERE posts.id = ?1
""",
"profile": """
SELECT users.first_name, users.last_name, users.school, users.city, users.bio,
posts.id, posts.content, posts.created_at, posts.like_count
FROM users
JOIN posts ON posts.user_id = users.id
WHERE users.id = ?1
ORDER BY posts.created_at DESC
LIMIT 10
""",
"who-liked": """
SELECT users.first_name, users.last_name, users.school, likes.created_at
FROM likes
JOIN users ON users.id = likes.user_id
WHERE likes.post_id = ?1
ORDER BY likes.created_at DESC
LIMIT 50
""",
"mutual": """
SELECT users.id, users.first_name, users.last_name, users.school
FROM friendships
JOIN friendships AS friendships_b ON friendships.user_b = friendships_b.user_b
JOIN users ON users.id = friendships.user_b
WHERE friendships.user_a = ?1 AND friendships_b.user_a = ?2
LIMIT 20
""",
"idx-filter": """
SELECT COUNT(*) FROM posts WHERE user_id = ?1
""",
"scan-filter": """
SELECT COUNT(*) FROM posts WHERE like_count > ?1
""",
}
# ---------------------------------------------------------------------------
# S3 helpers
# ---------------------------------------------------------------------------
def get_bucket():
bucket_name = os.environ.get("TIERED_TEST_BUCKET") or os.environ.get("BUCKET_NAME")
if not bucket_name:
raise RuntimeError("Set TIERED_TEST_BUCKET or BUCKET_NAME")
endpoint = os.environ.get("AWS_ENDPOINT_URL")
region = os.environ.get("AWS_REGION", "auto")
session = boto3.Session()
client_kwargs = {"region_name": region}
if endpoint:
client_kwargs["endpoint_url"] = endpoint
s3 = session.resource("s3", **client_kwargs)
return s3.Bucket(bucket_name)
def upload_if_missing(local_path: str, key_prefix: str, bucket, block_size: int):
"""Upload a plain SQLite file to sqlite-s3vfs format if the prefix is empty."""
existing = list(bucket.objects.filter(Prefix=key_prefix + "/").limit(1))
if existing:
print(f" [s3vfs] found existing prefix {key_prefix}/")
return
print(f" [s3vfs] uploading {local_path} -> s3://{bucket.name}/{key_prefix}/")
vfs = sqlite_s3vfs.S3VFS(bucket=bucket, block_size=block_size)
with open(local_path, "rb") as f:
vfs.deserialize_iter(key_prefix=key_prefix, bytes_iter=iter(lambda: f.read(1024 * 1024), b""))
print(f" [s3vfs] upload complete")
# ---------------------------------------------------------------------------
# Benchmark runner
# ---------------------------------------------------------------------------
def run_query_cold(
bucket,
key_prefix: str,
block_size: int,
sql: str,
params: tuple,
per_query_timeout: float | None,
) -> dict:
"""Run a single query with a fresh S3VFS + connection (cold)."""
get_count = 0
bytes_read = 0
# Monkey-patch sqlite_s3vfs's page fetch to count S3 GetObject calls and
# bytes. Patching boto3's resource Object class does not work because
# sqlite_s3vfs keeps a different bucket resource with its own Object class.
orig_block_bytes = sqlite_s3vfs.S3VFSFile._block_bytes
def counted_block_bytes(self, block):
nonlocal get_count, bytes_read
block_bytes = orig_block_bytes(self, block)
get_count += 1
bytes_read += len(block_bytes)
return block_bytes
sqlite_s3vfs.S3VFSFile._block_bytes = counted_block_bytes
try:
vfs = sqlite_s3vfs.S3VFS(bucket=bucket, block_size=block_size)
start = time.perf_counter()
with apsw.Connection(key_prefix, vfs=vfs.name) as conn:
cursor = conn.cursor()
# Ensure page-size matches block size (database header already has it,
# but this prevents accidental mismatches).
cursor.execute("PRAGMA page_size", ())
actual_ps = cursor.fetchall()[0][0]
if actual_ps != block_size:
raise RuntimeError(
f"SQLite page_size {actual_ps} != s3vfs block_size {block_size}"
)
if per_query_timeout:
# APSW supports busy timeout but not statement timeout directly.
# We use Python's signal-based timeout as a safety net.
import signal
def _timeout_handler(signum, frame):
raise TimeoutError(f"query exceeded {per_query_timeout}s")
signal.signal(signal.SIGALRM, _timeout_handler)
signal.setitimer(signal.ITIMER_REAL, per_query_timeout)
try:
cursor.execute(sql, params)
rows = cursor.fetchall()
finally:
signal.alarm(0)
else:
cursor.execute(sql, params)
rows = cursor.fetchall()
elapsed = time.perf_counter() - start
return {
"elapsed_ms": elapsed * 1000.0,
"gets": get_count,
"bytes": bytes_read,
"rows": len(rows),
}
finally:
sqlite_s3vfs.S3VFSFile._block_bytes = orig_block_bytes
def percentile(sorted_vals: List[float], p: float) -> float:
if not sorted_vals:
return 0.0
k = (len(sorted_vals) - 1) * p / 100.0
f = int(k)
c = min(f + 1, len(sorted_vals) - 1)
return sorted_vals[f] + (k - f) * (sorted_vals[c] - sorted_vals[f])
def bench_query(
bucket,
key_prefix: str,
block_size: int,
query: str,
n_posts: int,
n_users: int,
iterations: int,
warmup: int,
per_query_timeout: float | None,
) -> dict:
param_fn = make_param_fn(n_posts, n_users, query)
sql = QUERIES[query]
# Warmup iterations (not measured)
for i in range(warmup):
params = param_fn(i)
run_query_cold(bucket, key_prefix, block_size, sql, params, per_query_timeout)
times: List[float] = []
gets: List[int] = []
bytes_list: List[int] = []
for i in range(warmup, warmup + iterations):
params = param_fn(i)
result = run_query_cold(
bucket, key_prefix, block_size, sql, params, per_query_timeout
)
times.append(result["elapsed_ms"])
gets.append(result["gets"])
bytes_list.append(result["bytes"])
times.sort()
p50 = percentile(times, 50)
p90 = percentile(times, 90)
p99 = percentile(times, 99)
avg_gets = statistics.mean(gets)
avg_bytes = statistics.mean(bytes_list)
return {
"query": query,
"p50": p50,
"p90": p90,
"p99": p99,
"avg_gets": avg_gets,
"avg_bytes_mb": avg_bytes / (1024.0 * 1024.0),
}
def main():
parser = argparse.ArgumentParser(description="Benchmark sqlite-s3vfs cold latency")
parser.add_argument("--sizes", type=int, default=100000, help="Number of posts")
parser.add_argument(
"--local-db",
default=None,
help="Path to local SQLite file (default: /data/social_<size>.db)",
)
parser.add_argument(
"--page-size",
type=int,
default=65536,
help="SQLite page size / s3vfs block size",
)
parser.add_argument(
"--iterations", type=int, default=10, help="Measured iterations per query"
)
parser.add_argument(
"--warmup", type=int, default=2, help="Warmup iterations before measuring"
)
parser.add_argument(
"--queries",
default="post,profile,who-liked,mutual,idx-filter,scan-filter",
help="Comma-separated query names",
)
parser.add_argument(
"--per-query-timeout",
type=float,
default=120.0,
help="Abort a single query iteration after this many seconds",
)
parser.add_argument(
"--skip-upload",
action="store_true",
help="Assume the sqlite-s3vfs dataset already exists; do not upload",
)
args = parser.parse_args()
n_posts = args.sizes
n_users = max(n_posts // 10, 100)
local_db = args.local_db or f"/data/social_{n_posts}.db"
key_prefix = f"s3vfs_social_{n_posts}"
block_size = args.page_size
queries = [q.strip() for q in args.queries.split(",")]
if not os.path.exists(local_db):
raise FileNotFoundError(f"Local SQLite file not found: {local_db}")
bucket = get_bucket()
if not args.skip_upload:
upload_if_missing(local_db, key_prefix, bucket, block_size)
print()
print("=== sqlite-s3vfs cold benchmark ===")
print(f" dataset: {n_posts} posts / {n_users} users")
print(f" local: {local_db}")
print(f" s3 prefix: s3://{bucket.name}/{key_prefix}/")
print(f" page/block size: {block_size} bytes")
print(f" iterations: {args.iterations}, warmup: {args.warmup}")
print()
print(" p50 p90 p99 s3 GETs s3 bytes")
print(" ------------------------ ---------- ---------- ---------- ---------- ------------")
for query in queries:
try:
result = bench_query(
bucket,
key_prefix,
block_size,
query,
n_posts,
n_users,
args.iterations,
args.warmup,
args.per_query_timeout,
)
print(
f" [s3vfs] {query:18} {result['p50']:8.1f}ms {result['p90']:8.1f}ms "
f"{result['p99']:8.1f}ms {result['avg_gets']:10.1f} {result['avg_bytes_mb']:10.1f}MB"
)
except TimeoutError as e:
print(f" [s3vfs] {query:18} TIMEOUT ({e})")
except Exception as e:
print(f" [s3vfs] {query:18} ERROR: {e}")
if __name__ == "__main__":
main()