Skip to content

Commit 5d08f76

Browse files
OctoberChangWei-Cheng Chang
andauthored
Enable Python API of memory-map IO for PECOS-HNSW (#212)
Co-authored-by: Wei-Cheng Chang <chanweic@amazon.com>
1 parent de4e335 commit 5d08f76

File tree

4 files changed

+18
-5
lines changed

4 files changed

+18
-5
lines changed

pecos/ann/hnsw/model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# and limitations under the License.
1111
from ctypes import (
1212
POINTER,
13+
c_bool,
1314
c_float,
1415
c_uint32,
1516
c_char_p,
@@ -149,10 +150,11 @@ def train(cls, X, train_params=None, pred_params=None):
149150
return cls(model_ptr, pX.rows, pX.cols, fn_dict, pred_params)
150151

151152
@classmethod
152-
def load(cls, model_folder):
153+
def load(cls, model_folder, lazy_load=False):
153154
"""Load HNSW model from file
154155
Args:
155156
model_folder (str): model directory from which the model is loaded.
157+
lazy_load (bool): whether to lazy_load memory-mapped files (default False).
156158
Returns:
157159
HNSWModel (pecos.ann.hnsw.HNSW): the loaded HNSW model
158160
"""
@@ -168,7 +170,7 @@ def load(cls, model_folder):
168170
c_model_dir = f"{model_folder}/c_model"
169171
if not os.path.isdir(c_model_dir):
170172
raise ValueError(f"c_model_dir did not exist: {c_model_dir}")
171-
model_ptr = fn_dict["load"](c_char_p(c_model_dir.encode("utf-8")))
173+
model_ptr = fn_dict["load"](c_char_p(c_model_dir.encode("utf-8")), c_bool(lazy_load))
172174
pred_params = cls.PredParams.from_dict(param["pred_kwargs"])
173175
return cls(model_ptr, param["num_item"], param["feat_dim"], fn_dict, pred_params)
174176

pecos/core/base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1608,7 +1608,10 @@ def link_ann_hnsw_methods(self):
16081608
c_fn_name = f"c_ann_hnsw_{fn_name}_{data_type}_{metric_type}_f32"
16091609
local_fn_dict[fn_name] = getattr(self.clib_float32, c_fn_name)
16101610
res_list = c_void_p # pointer to C/C++ pecos::ann::HNSW
1611-
arg_list = [c_char_p] # pointer to char* model_dir
1611+
arg_list = [
1612+
c_char_p, # pointer to C/C++ pecos:ann::hnsw
1613+
c_bool, # bool for lazy_load of mmap files
1614+
]
16121615
corelib.fillprototype(local_fn_dict[fn_name], res_list, arg_list)
16131616

16141617
fn_name = "save"

pecos/core/libpecos.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,9 +378,9 @@ extern "C" {
378378
C_ANN_HNSW_TRAIN(_drm_l2_f32, ScipyDrmF32, pecos::drm_t, hnsw_drm_l2_t)
379379

380380
#define C_ANN_HNSW_LOAD(SUFFIX, HNSW_T) \
381-
void* c_ann_hnsw_load ## SUFFIX(const char* model_dir) { \
381+
void* c_ann_hnsw_load ## SUFFIX(const char* model_dir, const bool lazy_load) { \
382382
HNSW_T *model_ptr = new HNSW_T(); \
383-
model_ptr->load(model_dir); \
383+
model_ptr->load(model_dir, lazy_load); \
384384
return static_cast<void*>(model_ptr); \
385385
}
386386
C_ANN_HNSW_LOAD(_drm_ip_f32, hnsw_drm_ip_t)

test/pecos/ann/test_hnsw.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ def test_save_and_load(tmpdir):
4545
assert Yp_from_mem == approx(
4646
Yp_from_file, abs=0.0
4747
), f"save and load failed: Yp_from_mem != Yp_from_file"
48+
del model
49+
50+
# test load memory-mapped files
51+
model = HNSW.load(model_folder, lazy_load=True)
52+
Yp_from_file, _ = model.predict(X_tst, pred_params=pred_params, ret_csr=False)
53+
assert Yp_from_mem == approx(
54+
Yp_from_file, abs=0.0
55+
), f"load mmap-file failed: Yp_from_mem != Yp_from_file"
4856

4957

5058
def test_predict_and_recall():

0 commit comments

Comments
 (0)