-
Notifications
You must be signed in to change notification settings - Fork 34
fix: store data members & make faiss tests pass #209
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: rfsaliev/cpp-runtime-binding
Are you sure you want to change the base?
Changes from 2 commits
a3ecf1f
faf3d65
6db834b
7af7137
8d8b99a
32e882e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -171,13 +171,31 @@ Status IndexSVSVamanaLVQImpl::init_impl(size_t n, const float* x) noexcept { | |
| ); | ||
| } | ||
|
|
||
| Status IndexSVSVamanaLVQImpl::serialize_impl(std::ostream& out) const noexcept { | ||
| // Also store LVQ specific members | ||
| out.write(reinterpret_cast<const char*>(&lvq_level), sizeof(LVQLevel)); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So far, to make everything consistent, shouldn't we save There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, I guess it makes sense to add the public members too. |
||
|
|
||
| // This will also write whether or not we're initialized | ||
| return IndexSVSVamanaImpl::serialize_impl(out); | ||
|
|
||
| return Status_Ok; | ||
| } | ||
|
|
||
| Status IndexSVSVamanaLVQImpl::deserialize_impl(std::istream& in) noexcept { | ||
| if (impl) { | ||
| return Status{ | ||
| ErrorCode::INVALID_ARGUMENT, | ||
| "Cannot deserialize: SVS index already initialized."}; | ||
| } | ||
|
|
||
| in.read(reinterpret_cast<char*>(&lvq_level), sizeof(LVQLevel)); | ||
|
|
||
| bool initialized = false; | ||
| in.read(reinterpret_cast<char*>(&initialized), sizeof(bool)); | ||
| if (!initialized) { | ||
| return Status_Ok; | ||
| } | ||
|
|
||
| if (svs::detail::intel_enabled()) { | ||
| switch (lvq_level) { | ||
| case LVQLevel::LVQ4x0: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,7 +135,7 @@ IndexSVSVamanaLeanVecImpl::~IndexSVSVamanaLeanVecImpl() = default; | |
|
|
||
| void IndexSVSVamanaLeanVecImpl::reset() noexcept { | ||
| IndexSVSVamanaImpl::reset(); | ||
| leanvec_matrix.reset(); | ||
| // leanvec_matrix.reset(); | ||
| } | ||
|
|
||
| Status IndexSVSVamanaLeanVecImpl::train(size_t n, const float* x) noexcept { | ||
|
|
@@ -157,7 +157,7 @@ Status IndexSVSVamanaLeanVecImpl::init_impl(size_t n, const float* x) noexcept { | |
| return Status{ErrorCode::UNKNOWN_ERROR, "Index already initialized"}; | ||
| } | ||
|
|
||
| if (!is_trained()) { | ||
| if (!trained) { | ||
| return { | ||
| ErrorCode::NOT_INITIALIZED, | ||
| "Cannot initialize SVS LeanVec index without training first."}; | ||
|
|
@@ -237,13 +237,86 @@ Status IndexSVSVamanaLeanVecImpl::init_impl(size_t n, const float* x) noexcept { | |
| ); | ||
| } | ||
|
|
||
| Status IndexSVSVamanaLeanVecImpl::serialize_impl(std::ostream& out) const noexcept { | ||
| // TODO: try/catch around writes, report error codes --> macro? | ||
|
|
||
| // Store LeanVec specific members | ||
| out.write(reinterpret_cast<const char*>(&leanvec_d), sizeof(size_t)); | ||
| out.write(reinterpret_cast<const char*>(&leanvec_level), sizeof(LeanVecLevel)); | ||
| out.write(reinterpret_cast<const char*>(&trained), sizeof(bool)); | ||
|
|
||
| bool has_matrix = (leanvec_matrix != nullptr); | ||
| out.write(reinterpret_cast<const char*>(&has_matrix), sizeof(bool)); | ||
|
|
||
| if (has_matrix) { | ||
| // To avoid another temp file, stream the data directly | ||
| size_t num_rows = leanvec_matrix->num_rows(); | ||
| size_t num_cols = leanvec_matrix->num_cols(); | ||
| // check for overflow (guard against division by zero when num_cols == 0) | ||
| if (num_cols != 0 && num_rows > std::numeric_limits<size_t>::max() / num_cols) { | ||
| return Status{ | ||
| ErrorCode::IO_ERROR, "Failed to serialize leanvec matrix: size overflow"}; | ||
| } | ||
|
|
||
| // data and query matrices are the same, can use either | ||
| auto matrix = leanvec_matrix->view_data_matrix(); | ||
|
|
||
| size_t elements = num_rows * num_cols; | ||
|
|
||
| out.write(reinterpret_cast<const char*>(&num_rows), sizeof(size_t)); | ||
| out.write(reinterpret_cast<const char*>(&num_cols), sizeof(size_t)); | ||
| out.write(reinterpret_cast<const char*>(matrix.data()), elements * sizeof(float)); | ||
| } | ||
|
|
||
| // This will also write whether or not we're initialized | ||
| return IndexSVSVamanaImpl::serialize_impl(out); | ||
| } | ||
|
|
||
| Status IndexSVSVamanaLeanVecImpl::deserialize_impl(std::istream& in) noexcept { | ||
| if (impl) { | ||
| return Status{ | ||
| ErrorCode::INVALID_ARGUMENT, | ||
| "Cannot deserialize: SVS index already initialized."}; | ||
| } | ||
|
|
||
| in.read(reinterpret_cast<char*>(&leanvec_d), sizeof(size_t)); | ||
|
||
| in.read(reinterpret_cast<char*>(&leanvec_level), sizeof(LeanVecLevel)); | ||
| in.read(reinterpret_cast<char*>(&trained), sizeof(bool)); | ||
|
|
||
| bool has_matrix = false; | ||
| in.read(reinterpret_cast<char*>(&has_matrix), sizeof(bool)); | ||
|
|
||
| if (has_matrix) { | ||
| size_t num_rows = 0; | ||
| size_t num_cols = 0; | ||
|
|
||
| // TODO: read macro for error handling | ||
| in.read(reinterpret_cast<char*>(&num_rows), sizeof(size_t)); | ||
| in.read(reinterpret_cast<char*>(&num_cols), sizeof(size_t)); | ||
|
|
||
| std::vector<float> matrix_data(num_rows * num_cols); | ||
| in.read( | ||
| reinterpret_cast<char*>(matrix_data.data()), num_rows * num_cols * sizeof(float) | ||
| ); | ||
|
|
||
| auto matrix = svs::data::SimpleData<float, svs::Dynamic>(num_rows, num_cols); | ||
| for (size_t i = 0; i < num_rows; ++i) { | ||
| const auto datum = | ||
| std::span<const float>(matrix_data.data() + i * num_cols, num_cols); | ||
| matrix.set_datum(i, datum); | ||
| } | ||
|
|
||
| leanvec_matrix = | ||
|
||
| std::make_unique<svs::leanvec::LeanVecMatrices<svs::Dynamic>>(matrix, matrix); | ||
| } | ||
|
|
||
| bool initialized = false; | ||
| in.read(reinterpret_cast<char*>(&initialized), sizeof(bool)); | ||
|
|
||
| if (!initialized) { | ||
| return Status_Ok; | ||
| } | ||
|
|
||
| if (svs::detail::intel_enabled()) { | ||
| switch (leanvec_level) { | ||
| case LeanVecLevel::LeanVec4x4: | ||
|
|
@@ -264,7 +337,6 @@ Status IndexSVSVamanaLeanVecImpl::deserialize_impl(std::istream& in) noexcept { | |
| impl.reset(deserialize_impl_t<storage_type_sq>(in, metric_type_)); | ||
| } | ||
|
|
||
| trained = true; | ||
| return Status_Ok; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please explain, why we have to serializae/deserialize empty inidices?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the use cases in this test
https://github.com/ahuber21/faiss/blob/b709fa114afc522b3d10ffd1356df1d9a9548951/tests/test_svs.cpp#L111
which is created to validate the scenario in this issue
ahuber21/faiss#37
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you @ahuber21 for the problem description
After some time of investigation ana analysis, I got understanding, that inside the implementation code in SVS side we do not need to call
DynamicVamana::save(std::ostream&)anymore, instead we can use filesystem-basedDynamicVamana::save(std::filesystem::path, ...)method which allows to manage serialization in a more controlled and simpler way.