Skip to content

Commit e8d3fae

Browse files
committed
Add: Add index_gt::merge()
This just focus on index_gt. index_dense_gt is out of scope of this PR. If we get consensus of implementation approach, we'll be able to implement index_dense_gt::merge() too. This adds mutable `memory_mapped_file_t` and you can create a mutable memory-mapped index with it. You can merge multiple indexes to the mutable memory-mapped index without allocating all data on RAM.
1 parent 77516e2 commit e8d3fae

File tree

2 files changed

+356
-48
lines changed

2 files changed

+356
-48
lines changed

cpp/test.cpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,108 @@ template <typename key_at, typename slot_at> void test_replacing_update() {
10981098
expect_eq(final_search[2].member.key, 44);
10991099
}
11001100

1101+
/**
1102+
* @brief Tests merging.
1103+
*/
1104+
void test_merge() {
1105+
using index_t = index_gt<>;
1106+
using distance_t = typename index_t::distance_t;
1107+
using key_t = typename index_t::key_t;
1108+
using compressed_slot_t = typename index_t::compressed_slot_t;
1109+
using member_ref_t = typename index_t::member_ref_t;
1110+
using member_cref_t = typename index_t::member_cref_t;
1111+
using member_citerator_t = typename index_t::member_citerator_t;
1112+
using add_result_t = typename index_t::add_result_t;
1113+
1114+
using value_t = float;
1115+
1116+
auto create_index = []() {
1117+
auto index_result = index_t::make();
1118+
expect(index_result);
1119+
return std::move(index_result.index);
1120+
};
1121+
1122+
struct metric_t {
1123+
std::unordered_map<compressed_slot_t, value_t> values;
1124+
1125+
metric_t() : values() {}
1126+
distance_t compute(value_t const& a, value_t const& b) {
1127+
if (b > a) {
1128+
return b - a;
1129+
} else {
1130+
return a - b;
1131+
}
1132+
}
1133+
distance_t operator()(value_t const& a, member_cref_t const& b) { return compute(a, values.at(get_slot(b))); }
1134+
distance_t operator()(value_t const& a, member_citerator_t const& b) {
1135+
return compute(a, values.at(get_slot(b)));
1136+
}
1137+
distance_t operator()(member_citerator_t const& a, member_citerator_t const& b) {
1138+
return compute(values.at(get_slot(a)), values.at(get_slot(b)));
1139+
}
1140+
};
1141+
1142+
auto add = [](index_t& index, key_t const key, value_t const value, metric_t& metric) {
1143+
auto on_success = [&](member_ref_t member) { metric.values[member.slot] = value; };
1144+
add_result_t result = index.add(key, value, metric, {}, on_success);
1145+
expect(result);
1146+
};
1147+
1148+
// Prepare index 1
1149+
auto index1 = create_index();
1150+
metric_t metric1;
1151+
expect(index1.reserve(3));
1152+
add(index1, 11, 1.1f, metric1);
1153+
add(index1, 12, 2.1f, metric1);
1154+
add(index1, 13, 3.1f, metric1);
1155+
expect_eq(index1.size(), 3);
1156+
1157+
// Prepare index 2
1158+
auto index2 = create_index();
1159+
metric_t metric2;
1160+
expect(index2.reserve(4));
1161+
add(index2, 21, -1.1f, metric2);
1162+
add(index2, 22, -2.1f, metric2);
1163+
add(index2, 23, -3.1f, metric2);
1164+
add(index2, 24, -4.1f, metric2);
1165+
expect_eq(index2.size(), 4);
1166+
1167+
// Merge indexes
1168+
char const* merge_file_path = "merge.usearch";
1169+
auto merged_index = create_index();
1170+
expect(merged_index.save(merge_file_path));
1171+
memory_mapped_file_t file{merge_file_path, true};
1172+
expect(merged_index.load(std::move(file)));
1173+
metric_t merged_metric;
1174+
auto merge_on_success = [&](member_ref_t member, value_t const& value) {
1175+
merged_metric.values[member.slot] = value;
1176+
};
1177+
auto get_value1 = [&](member_cref_t member) -> value_t& { return metric1.values[member.slot]; };
1178+
expect(merged_index.merge(index1, get_value1, merged_metric, {}, merge_on_success));
1179+
auto get_value2 = [&](member_cref_t member) -> value_t& { return metric2.values[member.slot]; };
1180+
expect(merged_index.merge(index2, get_value2, merged_metric, {}, merge_on_success));
1181+
1182+
// Assert
1183+
expect_eq(merged_index.size(), 7);
1184+
auto search = merged_index.search(0.75f, 3, merged_metric);
1185+
expect_eq(search.size(), 3);
1186+
expect_eq(static_cast<key_t>(search[0].member.key), 11);
1187+
expect_eq(static_cast<key_t>(search[1].member.key), 12);
1188+
expect_eq(static_cast<key_t>(search[2].member.key), 21);
1189+
1190+
// Re-load merged indexes
1191+
merged_index.reset();
1192+
merged_index.load(merge_file_path);
1193+
1194+
// Assert
1195+
expect_eq(merged_index.size(), 7);
1196+
search = merged_index.search(0.75f, 3, merged_metric);
1197+
expect_eq(search.size(), 3);
1198+
expect_eq(static_cast<key_t>(search[0].member.key), 11);
1199+
expect_eq(static_cast<key_t>(search[1].member.key), 12);
1200+
expect_eq(static_cast<key_t>(search[2].member.key), 21);
1201+
}
1202+
11011203
int main(int, char**) {
11021204
test_uint40();
11031205
test_cosine<float, std::int64_t, uint40_t>(10, 10);
@@ -1174,5 +1276,9 @@ int main(int, char**) {
11741276
test_sets<std::int64_t, slot32_t>(set_size, 20, 30);
11751277
test_strings<std::int64_t, slot32_t>();
11761278

1279+
// Test merge
1280+
std::printf("Testing merge\n");
1281+
test_merge();
1282+
11771283
return 0;
11781284
}

0 commit comments

Comments
 (0)