Skip to content

Commit d864e60

Browse files
hallogameboyJyun-Yu Jiang
andauthored
Enhance mmap value store with dynamic pre-allocation and robust indexing (#260)
Co-authored-by: Jyun-Yu Jiang <jyunyu@amazon.com>
1 parent b4b817a commit d864e60

File tree

5 files changed

+142
-127
lines changed

5 files changed

+142
-127
lines changed

pecos/core/base.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1800,7 +1800,7 @@ def mmap_hashmap_init(self, map_type):
18001800
raise NotImplementedError(f"map_type={map_type} is not implemented.")
18011801
return self.mmap_map_fn_dict[map_type]
18021802

1803-
def _get_num_f32_mmap_valstore_methods(self):
1803+
def _get_float32_mmap_valstore_methods(self):
18041804
"""
18051805
Specify C-lib's numerical float32 Memory-mappable store methods arguments and return types.
18061806
"""
@@ -1823,7 +1823,7 @@ def _get_num_f32_mmap_valstore_methods(self):
18231823

18241824
fn_name = "n_col"
18251825
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{store_type}")
1826-
corelib.fillprototype(local_fn_dict[fn_name], c_uint32, [c_void_p])
1826+
corelib.fillprototype(local_fn_dict[fn_name], c_uint64, [c_void_p])
18271827

18281828
fn_name = "save"
18291829
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{store_type}")
@@ -1839,30 +1839,30 @@ def _get_num_f32_mmap_valstore_methods(self):
18391839
local_fn_dict[fn_name], None, [c_void_p, c_uint64, c_uint32, POINTER(c_float)]
18401840
)
18411841

1842-
fn_name = "get_submatrix"
1842+
fn_name = "batch_get"
18431843
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{store_type}")
18441844
corelib.fillprototype(
18451845
local_fn_dict[fn_name],
18461846
None,
18471847
[
18481848
c_void_p,
1849-
c_uint32,
1850-
c_uint32,
1849+
c_uint64,
1850+
c_uint64,
1851+
POINTER(c_uint64),
18511852
POINTER(c_uint64),
1852-
POINTER(c_uint32),
18531853
POINTER(c_float),
18541854
c_uint32,
18551855
],
18561856
)
18571857

18581858
return local_fn_dict
18591859

1860-
def _get_str_mmap_valstore_methods(self):
1860+
def _get_bytes_mmap_valstore_methods(self):
18611861
"""
1862-
Specify C-lib's numerical Memory-mappable value store methods arguments and return types.
1862+
Specify C-lib's bytes Memory-mappable value store methods arguments and return types.
18631863
"""
18641864
fn_prefix = "mmap_valstore"
1865-
store_type = "str"
1865+
store_type = "bytes"
18661866

18671867
local_fn_dict = {}
18681868

@@ -1880,7 +1880,7 @@ def _get_str_mmap_valstore_methods(self):
18801880

18811881
fn_name = "n_col"
18821882
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{store_type}")
1883-
corelib.fillprototype(local_fn_dict[fn_name], c_uint32, [c_void_p])
1883+
corelib.fillprototype(local_fn_dict[fn_name], c_uint64, [c_void_p])
18841884

18851885
fn_name = "save"
18861886
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{store_type}")
@@ -1895,20 +1895,20 @@ def _get_str_mmap_valstore_methods(self):
18951895
corelib.fillprototype(
18961896
local_fn_dict[fn_name],
18971897
None,
1898-
[c_void_p, c_uint64, c_uint32, c_void_p, POINTER(c_uint32)],
1898+
[c_void_p, c_uint64, c_uint64, c_void_p, POINTER(c_uint32)],
18991899
)
19001900

1901-
fn_name = "get_submatrix"
1901+
fn_name = "batch_get"
19021902
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{store_type}")
19031903
corelib.fillprototype(
19041904
local_fn_dict[fn_name],
19051905
None,
19061906
[
19071907
c_void_p,
1908-
c_uint32, # n_sub_row
1909-
c_uint32, # n_sub_col
1908+
c_uint64, # n_sub_row
1909+
c_uint64, # n_sub_col
19101910
POINTER(c_uint64), # sub_rows
1911-
POINTER(c_uint32), # sub_cols
1911+
POINTER(c_uint64), # sub_cols
19121912
c_uint32, # trunc_val_len
19131913
c_char_p, # ret
19141914
POINTER(c_uint32), # ret_lens
@@ -1924,8 +1924,8 @@ def link_mmap_valstore_methods(self):
19241924
"""
19251925

19261926
self.mmap_valstore_fn_dict = {
1927-
"num_f32": self._get_num_f32_mmap_valstore_methods(),
1928-
"str": self._get_str_mmap_valstore_methods(),
1927+
"float32": self._get_float32_mmap_valstore_methods(),
1928+
"bytes": self._get_bytes_mmap_valstore_methods(),
19291929
}
19301930

19311931
def mmap_valstore_init(self, store_type):

pecos/core/libpecos.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ extern "C" {
556556
// ==== C Interface of Memory-mappable Value Store ====
557557

558558
typedef pecos::mmap_valstore::Float32Store mmap_valstore_float32;
559-
typedef pecos::mmap_valstore::StringStore mmap_valstore_str;
559+
typedef pecos::mmap_valstore::BytesStore mmap_valstore_bytes;
560560
typedef pecos::mmap_valstore::row_type row_type;
561561
typedef pecos::mmap_valstore::col_type col_type;
562562

@@ -565,35 +565,35 @@ extern "C" {
565565
void* mmap_valstore_new_ ## SUFFIX () { \
566566
return static_cast<void*>(new mmap_valstore_ ## SUFFIX()); }
567567
MMAP_VALSTORE_NEW(float32)
568-
MMAP_VALSTORE_NEW(str)
568+
MMAP_VALSTORE_NEW(bytes)
569569

570570
// Destruct
571571
#define MMAP_VALSTORE_DESTRUCT(SUFFIX) \
572572
void mmap_valstore_destruct_ ## SUFFIX (void* map_ptr) { \
573573
delete static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr); }
574574
MMAP_VALSTORE_DESTRUCT(float32)
575-
MMAP_VALSTORE_DESTRUCT(str)
575+
MMAP_VALSTORE_DESTRUCT(bytes)
576576

577577
// Number of rows
578578
#define MMAP_MAP_N_ROW(SUFFIX) \
579579
row_type mmap_valstore_n_row_ ## SUFFIX (void* map_ptr) { \
580580
return static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr)->n_row(); }
581581
MMAP_MAP_N_ROW(float32)
582-
MMAP_MAP_N_ROW(str)
582+
MMAP_MAP_N_ROW(bytes)
583583

584584
// Number of columns
585585
#define MMAP_MAP_N_COL(SUFFIX) \
586586
col_type mmap_valstore_n_col_ ## SUFFIX (void* map_ptr) { \
587587
return static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr)->n_col(); }
588588
MMAP_MAP_N_COL(float32)
589-
MMAP_MAP_N_COL(str)
589+
MMAP_MAP_N_COL(bytes)
590590

591591
// Save
592592
#define MMAP_VALSTORE_SAVE(SUFFIX) \
593593
void mmap_valstore_save_ ## SUFFIX (void* map_ptr, const char* map_dir) { \
594594
static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr)->save(map_dir); }
595595
MMAP_VALSTORE_SAVE(float32)
596-
MMAP_VALSTORE_SAVE(str)
596+
MMAP_VALSTORE_SAVE(bytes)
597597

598598
// Load
599599
#define MMAP_VALSTORE_LOAD(SUFFIX) \
@@ -602,7 +602,7 @@ extern "C" {
602602
map_ptr->load(map_dir, lazy_load); \
603603
return static_cast<void *>(map_ptr); }
604604
MMAP_VALSTORE_LOAD(float32)
605-
MMAP_VALSTORE_LOAD(str)
605+
MMAP_VALSTORE_LOAD(bytes)
606606

607607
// Create view from external values pointer
608608
void mmap_valstore_from_vals_float32 (
@@ -614,41 +614,41 @@ extern "C" {
614614
static_cast<mmap_valstore_float32 *>(map_ptr)->from_vals(n_row, n_col, vals);
615615
}
616616
// Allocate and Init
617-
void mmap_valstore_from_vals_str (
617+
void mmap_valstore_from_vals_bytes (
618618
void* map_ptr,
619619
const row_type n_row,
620620
const col_type n_col,
621621
const char* const* vals,
622-
const mmap_valstore_str::str_len_type* vals_lens
622+
const mmap_valstore_bytes::bytes_len_type* vals_lens
623623
) {
624-
static_cast<mmap_valstore_str *>(map_ptr)->from_vals(n_row, n_col, vals, vals_lens);
624+
static_cast<mmap_valstore_bytes *>(map_ptr)->from_vals(n_row, n_col, vals, vals_lens);
625625
}
626626

627627
// Get sub-matrix
628-
void mmap_valstore_get_submatrix_float32 (
628+
void mmap_valstore_batch_get_float32 (
629629
void* map_ptr,
630-
const uint32_t n_sub_row,
631-
const uint32_t n_sub_col,
630+
const uint64_t n_sub_row,
631+
const uint64_t n_sub_col,
632632
const row_type* sub_rows,
633633
const col_type* sub_cols,
634634
mmap_valstore_float32::value_type* ret,
635635
const int threads
636636
) {
637-
static_cast<mmap_valstore_float32 *>(map_ptr)->get_submatrix(
637+
static_cast<mmap_valstore_float32 *>(map_ptr)->batch_get(
638638
n_sub_row, n_sub_col, sub_rows, sub_cols, ret, threads);
639639
}
640-
void mmap_valstore_get_submatrix_str (
640+
void mmap_valstore_batch_get_bytes (
641641
void* map_ptr,
642-
const uint32_t n_sub_row,
643-
const uint32_t n_sub_col,
642+
const uint64_t n_sub_row,
643+
const uint64_t n_sub_col,
644644
const row_type* sub_rows,
645645
const col_type* sub_cols,
646-
const mmap_valstore_str::str_len_type trunc_val_len,
646+
const mmap_valstore_bytes::bytes_len_type trunc_val_len,
647647
char* ret,
648-
mmap_valstore_str::str_len_type* ret_lens,
648+
mmap_valstore_bytes::bytes_len_type* ret_lens,
649649
const int threads
650650
) {
651-
static_cast<mmap_valstore_str *>(map_ptr)->get_submatrix(
651+
static_cast<mmap_valstore_bytes *>(map_ptr)->batch_get(
652652
n_sub_row, n_sub_col, sub_rows, sub_cols, trunc_val_len, ret, ret_lens, threads);
653653
}
654654
}

pecos/core/utils/mmap_valstore.hpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace pecos {
2323
namespace mmap_valstore {
2424

2525
typedef uint64_t row_type;
26-
typedef uint32_t col_type;
26+
typedef uint64_t col_type;
2727

2828

2929
class Float32Store {
@@ -53,10 +53,10 @@ class Float32Store {
5353
vals_ = vals;
5454
}
5555

56-
void get_submatrix(const uint32_t n_sub_row, const uint32_t n_sub_col, const row_type* sub_rows, const col_type* sub_cols, value_type* ret, const int threads=1) {
56+
void batch_get(const uint64_t n_sub_row, const uint64_t n_sub_col, const row_type* sub_rows, const col_type* sub_cols, value_type* ret, const int threads=1) {
5757
#pragma omp parallel for schedule(static, 1) num_threads(threads)
58-
for (uint32_t i=0; i<n_sub_row; ++i) {
59-
for (uint32_t j=0; j<n_sub_col; ++j) {
58+
for (uint64_t i=0; i<n_sub_row; ++i) {
59+
for (uint64_t j=0; j<n_sub_col; ++j) {
6060
ret[i * n_sub_col + j] = vals_[sub_rows[i] * n_col_ + sub_cols[j]];
6161
}
6262
}
@@ -95,11 +95,11 @@ class Float32Store {
9595
};
9696

9797

98-
class StringStore {
98+
class BytesStore {
9999
public:
100-
typedef uint32_t str_len_type;
100+
typedef uint32_t bytes_len_type;
101101

102-
StringStore():
102+
BytesStore():
103103
n_row_(0),
104104
n_col_(0)
105105
{}
@@ -113,7 +113,7 @@ class StringStore {
113113
}
114114

115115
// In memory. Allocate and assign values
116-
void from_vals(const row_type n_row, const col_type n_col, const char* const* vals, const str_len_type* vals_lens) {
116+
void from_vals(const row_type n_row, const col_type n_col, const char* const* vals, const bytes_len_type* vals_lens) {
117117
n_row_ = n_row;
118118
n_col_ = n_col;
119119

@@ -137,15 +137,15 @@ class StringStore {
137137
}
138138
}
139139

140-
void get_submatrix(const uint32_t n_sub_row, const uint32_t n_sub_col, const row_type* sub_rows, const col_type* sub_cols,
141-
const str_len_type trunc_val_len, char* ret, str_len_type* ret_lens, const int threads=1) {
140+
void batch_get(const uint64_t n_sub_row, const uint64_t n_sub_col, const row_type* sub_rows, const col_type* sub_cols,
141+
const bytes_len_type trunc_val_len, char* ret, bytes_len_type* ret_lens, const int threads=1) {
142142
#pragma omp parallel for schedule(static, 1) num_threads(threads)
143-
for (uint32_t i=0; i<n_sub_row; ++i) {
144-
for (uint32_t j=0; j<n_sub_col; ++j) {
145-
uint32_t sub_idx = i * n_sub_col + j;
143+
for (uint64_t i=0; i<n_sub_row; ++i) {
144+
for (uint64_t j=0; j<n_sub_col; ++j) {
145+
uint64_t sub_idx = i * n_sub_col + j;
146146
row_type idx = sub_rows[i] * n_col_ + sub_cols[j];
147-
uint32_t ret_start_idx = sub_idx * trunc_val_len;
148-
str_len_type cur_ret_len = std::min(trunc_val_len, vals_lens_[idx]);
147+
uint64_t ret_start_idx = sub_idx * trunc_val_len;
148+
bytes_len_type cur_ret_len = std::min(trunc_val_len, vals_lens_[idx]);
149149
ret_lens[sub_idx] = cur_ret_len;
150150
std::memcpy(ret + ret_start_idx, vals_.data() + vals_starts_[idx], cur_ret_len);
151151
}
@@ -182,7 +182,7 @@ class StringStore {
182182
row_type n_row_;
183183
col_type n_col_;
184184
mmap_util::MmapableVector<char> vals_; // Concatenated big string
185-
mmap_util::MmapableVector<str_len_type> vals_lens_; // Length for each string
185+
mmap_util::MmapableVector<bytes_len_type> vals_lens_; // Length for each string
186186
mmap_util::MmapableVector<row_type> vals_starts_; // Start for each string in the concatenated big string
187187

188188
pecos::mmap_util::MmapStore mmap_store_;

0 commit comments

Comments
 (0)