Skip to content

Commit 000c2f9

Browse files
committed
cache total stored size in the buffer class
1 parent 22ac2c2 commit 000c2f9

File tree

1 file changed

+39
-12
lines changed

1 file changed

+39
-12
lines changed

ops/_tracing/buffer.py

+39-12
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
# Must use isolation_level=None for consistency between Python 3.8 and 3.12
4242
# Can't use the STRICT keyword for tables, requires sqlite 3.37.0
4343
# Can't use the octet_length() either, requires sqlite 3.43.0
44+
# Can't use DELETE ... RETURNING, requires sqlite 3.35.0
4445
#
4546
# Ubuntu 20.04 Python 3.8.2 Sqlite 3.31.1 Adds UPSERT, window functions
4647
# Ubuntu 22.04 Python 3.10.x Sqlite 3.37.2 Adds STRICT tables, JSON ops
@@ -80,6 +81,7 @@ class Buffer:
8081
"""tracing data ids buffered during this dispatch invocation."""
8182
observed = False
8283
"""Marks that data from this dispatch invocation has been marked observed."""
84+
stored: int|None = None
8385

8486
def __init__(self, path: str):
8587
self.path = path
@@ -165,19 +167,17 @@ def pump(self, chunk: bytes | None = None) -> tuple[int, bytes] | None:
165167
# - or a read transaction later upgraded to write (check space, then delete some)
166168
# currently I've made `self.tx()` return a write transaction always
167169
# which is safer, but may incur a filesystem modification cost.
170+
collected_size = 0
171+
chunklen = 0
168172
with self.tx(readonly=not chunk) as conn:
169173
if chunk:
170174
# Ensure that there's enough space in the buffer
171175
chunklen = (len(chunk) + 4095) // 4096 * 4096
172-
stored: int | None = conn.execute(
173-
"""
174-
SELECT sum((length(data)+4095)/4096*4096)
175-
FROM tracing
176-
"""
177-
).fetchone()[0]
176+
178177
# TODO: expose `stored` in metrics, one day
179-
excess = (stored or 0) + chunklen - BUFFER_SIZE
180-
logging.debug(f'{excess=}')
178+
if self.stored is None:
179+
self.stored = self._stored_size(conn)
180+
excess = self.stored + chunklen - BUFFER_SIZE
181181

182182
if excess > 0:
183183
# Drop lower-priority, older data
@@ -190,15 +190,13 @@ def pump(self, chunk: bytes | None = None) -> tuple[int, bytes] | None:
190190
)
191191

192192
collected_ids: set[int] = set()
193-
collected_size: int = 0
194193
for id_, size in cursor:
195194
collected_ids.add(id_)
196195
collected_size += size
197196
if collected_size > excess:
198197
break
199198

200199
assert collected_ids
201-
logging.debug(f'{len(collected_ids)=}')
202200
conn.execute(
203201
f"""
204202
DELETE FROM tracing
@@ -222,7 +220,7 @@ def pump(self, chunk: bytes | None = None) -> tuple[int, bytes] | None:
222220
self.ids.add(cursor.lastrowid)
223221

224222
# Return oldest important data
225-
return conn.execute(
223+
rv = conn.execute(
226224
"""
227225
SELECT id, data
228226
FROM tracing
@@ -231,14 +229,43 @@ def pump(self, chunk: bytes | None = None) -> tuple[int, bytes] | None:
231229
"""
232230
).fetchone()
233231

232+
assert self.stored is not None
233+
self.stored += chunklen - collected_size
234+
return rv
235+
236+
def _stored_size(self, conn: sqlite3.Connection) -> int:
237+
"""Must be called in a transaction."""
238+
stored: int | None = conn.execute(
239+
"""
240+
SELECT sum((length(data)+4095)/4096*4096)
241+
FROM tracing
242+
"""
243+
).fetchone()[0]
244+
return stored or 0
245+
234246
@retry
235-
def remove(self, id_: int):
247+
def remove(self, id_: int) -> None:
236248
with self.tx() as conn:
249+
# NOTE: can't use the RETURNING clause
250+
row = conn.execute(
251+
"""
252+
SELECT (length(data)+4095)/4096*4096
253+
FROM tracing
254+
WHERE id = ?
255+
"""
256+
).fetchone()
257+
258+
if not row:
259+
return
260+
237261
conn.execute(
238262
"""
239263
DELETE FROM tracing
240264
WHERE id = ?
241265
""",
242266
(id_,),
243267
)
268+
244269
self.ids -= {id_}
270+
if self.stored is not None:
271+
self.stored -= row[0]

0 commit comments

Comments
 (0)