Skip to content

Commit 033b35b

Browse files
Explicitly free temporary vector list and gc.collect after loading
On PyPy, gc.collect() helps release memory back to the OS after the temporary list of 1.4M vectors is replaced by the numpy matrix. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d783832 commit 033b35b

1 file changed

Lines changed: 8 additions & 0 deletions

File tree

vectors/simserver.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363

6464
from __future__ import annotations
6565

66+
import gc
6667
import json
6768
import time
6869
import sys
@@ -141,14 +142,21 @@ def _load_topics(self) -> None:
141142
dims = self._corpus.dimensions
142143
if vectors:
143144
self._matrix = np.array(vectors, dtype=np.float32)
145+
# Free the temporary list before computing norms
146+
del vectors
147+
gc.collect()
144148
# Precompute squared norms for cosine similarity
145149
self._norms_sq = np.einsum("ij,ij->i", self._matrix, self._matrix)
146150
else:
151+
del vectors
147152
self._matrix = np.empty((0, dims), dtype=np.float32)
148153
self._norms_sq = np.empty((0,), dtype=np.float32)
149154
self._ids = ids
150155
self._id_to_index = {aid: idx for idx, aid in enumerate(ids)}
151156

157+
# Release any remaining temporary memory back to the OS
158+
gc.collect()
159+
152160
print(
153161
"Loading of {0} topic vectors completed in {1:.2f} seconds".format(
154162
len(ids), t1 - t0

0 commit comments

Comments
 (0)