Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions bindings/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@ set(SVS_RUNTIME_HEADERS
include/IndexSVSImplDefs.h
include/IndexSVSFlatImpl.h
include/IndexSVSVamanaImpl.h
include/IndexSVSTrainingInfo.h
include/detail/TrainingInfoImpl.h
)

set(SVS_RUNTIME_SOURCES
src/IndexSVSImplUtils.h
src/IndexSVSFlatImpl.cpp
src/IndexSVSVamanaImpl.cpp
src/IndexSVSTrainingInfo.cpp
src/detail/TrainingInfoImpl.cpp
)

option(SVS_RUNTIME_ENABLE_LVQ_LEANVEC "Enable compilation of SVS runtime with LVQ and LeanVec support" ON)
Expand Down Expand Up @@ -97,15 +101,15 @@ if ((SVS_RUNTIME_ENABLE_LVQ_LEANVEC))
)
else()
# Links to LTO-enabled static library, requires GCC/G++ 11.2
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL "11.2")
set(SVS_URL "https://github.com/intel/ScalableVectorSearch/releases/download/v1.0.0-dev/svs-shared-library-1.0.0-NIGHTLY-20251030-737-bindings.tar.gz"
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "11.2" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11.3")
set(SVS_URL "file:///svs/docker-share/svs-shared-library-DEV.tar.gz"
CACHE STRING "URL to download SVS shared library")
else()
message(WARNING
"Pre-built LVQ/LeanVec SVS library requires GCC/G++ v.11.2 to apply LTO optimizations."
"Pre-built LVQ/LeanVec SVS library requires GCC/G++ v11.2 to apply LTO optimizations."
"Current compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}"
)
set(SVS_URL "https://github.com/intel/ScalableVectorSearch/releases/download/v1.0.0-dev/svs-shared-library-1.0.0-NIGHTLY-20251017-faiss.tar.gz")
set(SVS_URL "file:///svs/docker-share/svs-shared-library-DEV.tar.gz")
endif()
include(FetchContent)
FetchContent_Declare(
Expand Down
3 changes: 2 additions & 1 deletion bindings/cpp/include/IndexSVSImplDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ enum class ErrorCode {
UNKNOWN_ERROR = 1,
INVALID_ARGUMENT = 2,
NOT_IMPLEMENTED = 3,
NOT_INITIALIZED = 4
NOT_INITIALIZED = 4,
IO_ERROR = 5
};

struct Status {
Expand Down
47 changes: 47 additions & 0 deletions bindings/cpp/include/IndexSVSTrainingInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once
#include "IndexSVSImplDefs.h"

#include <iostream>
#include <memory>

namespace svs {
namespace runtime {

namespace detail {
struct TrainingInfoImpl;
}

struct SVS_RUNTIME_API IndexSVSTrainingInfo {
IndexSVSTrainingInfo() noexcept = default;

IndexSVSTrainingInfo(std::unique_ptr<svs::runtime::detail::TrainingInfoImpl> impl
) noexcept;

static void destroy(IndexSVSTrainingInfo* impl) noexcept;
virtual ~IndexSVSTrainingInfo();

Status serialize(std::ostream& out) const noexcept;
Status deserialize(std::istream& in) noexcept;

protected:
std::unique_ptr<svs::runtime::detail::TrainingInfoImpl> impl_{nullptr};
};

} // namespace runtime
} // namespace svs
2 changes: 1 addition & 1 deletion bindings/cpp/include/IndexSVSVamanaImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ struct SVS_RUNTIME_API IndexSVSVamanaImpl {
virtual void reset() noexcept;

/* Serialization and deserialization helpers */
Status serialize_impl(std::ostream& out) const noexcept;
virtual Status serialize_impl(std::ostream& out) const noexcept;
virtual Status deserialize_impl(std::istream& in) noexcept;

MetricType metric_type_;
Expand Down
1 change: 1 addition & 0 deletions bindings/cpp/include/IndexSVSVamanaLVQImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ struct SVS_RUNTIME_API IndexSVSVamanaLVQImpl : IndexSVSVamanaImpl {
static IndexSVSVamanaLVQImpl*
build(size_t dim, MetricType metric, const BuildParams& params, LVQLevel lvq) noexcept;

Status serialize_impl(std::ostream& out) const noexcept override;
Status deserialize_impl(std::istream& in) noexcept override;

protected:
Expand Down
19 changes: 12 additions & 7 deletions bindings/cpp/include/IndexSVSVamanaLeanVecImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,32 @@ template <size_t Extent> struct LeanVecMatrices;

namespace runtime {

struct IndexSVSTrainingInfo;

struct SVS_RUNTIME_API IndexSVSVamanaLeanVecImpl : IndexSVSVamanaImpl {
enum LeanVecLevel { LeanVec4x4, LeanVec4x8, LeanVec8x8 };

static IndexSVSVamanaLeanVecImpl* build(
static IndexSVSVamanaLeanVecImpl* build_leanvec(
size_t dim,
MetricType metric,
const BuildParams& params,
size_t leanvec_dims,
LeanVecLevel leanvec_level
LeanVecLevel leanvec_level,
size_t n,
const float* x,
const IndexSVSTrainingInfo* info
) noexcept;

static IndexSVSTrainingInfo* build_leanvec_training(
size_t n, const float* x, size_t dim, size_t leanvec_dims
) noexcept;

void reset() noexcept override;

Status train(size_t n, const float* x) noexcept;
Status serialize_impl(std::ostream& out) const noexcept override;

Status deserialize_impl(std::istream& in) noexcept override;

bool is_trained() const noexcept { return trained; }

protected:
IndexSVSVamanaLeanVecImpl();

Expand All @@ -64,8 +71,6 @@ struct SVS_RUNTIME_API IndexSVSVamanaLeanVecImpl : IndexSVSVamanaImpl {

size_t leanvec_d;
LeanVecLevel leanvec_level;
std::unique_ptr<svs::leanvec::LeanVecMatrices<std::dynamic_extent>> leanvec_matrix;
bool trained = false;
};

} // namespace runtime
Expand Down
48 changes: 48 additions & 0 deletions bindings/cpp/include/detail/TrainingInfoImpl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <svs/leanvec/leanvec.h>

#include <iostream>
#include <memory>
#include <span>

namespace svs {
namespace runtime {
namespace detail {

struct TrainingInfoImpl {
virtual ~TrainingInfoImpl() = default;

virtual void serialize(std::ostream& out) const = 0;
virtual void deserialize(std::istream& in) = 0;
};

// TrainingInfo wrapper around pre-computed leanvec matrix
struct LeanVecTrainingInfoImpl : public TrainingInfoImpl {
LeanVecTrainingInfoImpl(svs::leanvec::LeanVecMatrices<svs::Dynamic> matrix);

void serialize(std::ostream& out) const override;
void deserialize(std::istream& in) override;

svs::leanvec::LeanVecMatrices<svs::Dynamic> leanvec_matrix;
};

} // namespace detail
} // namespace runtime
} // namespace svs
59 changes: 59 additions & 0 deletions bindings/cpp/src/IndexSVSTrainingInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "IndexSVSTrainingInfo.h"
#include "detail/TrainingInfoImpl.h"

namespace svs {
namespace runtime {

IndexSVSTrainingInfo::IndexSVSTrainingInfo(
std::unique_ptr<svs::runtime::detail::TrainingInfoImpl> impl
) noexcept
: impl_(std::move(impl)) {}

void IndexSVSTrainingInfo::destroy(IndexSVSTrainingInfo* impl) noexcept { delete impl; }

Status IndexSVSTrainingInfo::serialize(std::ostream& out) const noexcept {
if (impl_ == nullptr) {
return Status_Ok;
}
try {
impl_->serialize(out);
} catch (std::exception& e) {
return Status{ErrorCode::IO_ERROR, e.what()};
} catch (...) {
return Status{ErrorCode::IO_ERROR, "Failed to serialize IndexSVSTrainingInfo"};
}
return Status_Ok;
}

Status IndexSVSTrainingInfo::deserialize(std::istream& in) noexcept {
if (!impl_) {
return Status_Ok;
}
try {
impl_->deserialize(in);
} catch (std::exception& e) {
return Status{ErrorCode::IO_ERROR, e.what()};
} catch (...) {
return Status{ErrorCode::IO_ERROR, "Failed to deserialize IndexSVSTrainingInfo"};
}
return Status_Ok;
}

} // namespace runtime
} // namespace svs
32 changes: 27 additions & 5 deletions bindings/cpp/src/IndexSVSVamanaImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,21 @@ Status IndexSVSVamanaImpl::init_impl(size_t n, const float* x) noexcept {
}

Status IndexSVSVamanaImpl::serialize_impl(std::ostream& out) const noexcept {
if (!impl) {
return Status{
ErrorCode::NOT_INITIALIZED, "Cannot serialize: SVS index not initialized."};
}
// store own members
out.write(reinterpret_cast<const char*>(&metric_type_), sizeof(metric_type_));
out.write(reinterpret_cast<const char*>(&dim_), sizeof(dim_));
out.write(
reinterpret_cast<const char*>(&default_search_params), sizeof(default_search_params)
);
out.write(reinterpret_cast<const char*>(&build_params), sizeof(build_params));
// no need to store ntotal_soft_deleted, index is always compacted on saving

bool initialized = impl != nullptr;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please explain, why we have to serializae/deserialize empty inidices?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the use cases in this test
https://github.com/ahuber21/faiss/blob/b709fa114afc522b3d10ffd1356df1d9a9548951/tests/test_svs.cpp#L111

which is created to validate the scenario in this issue
ahuber21/faiss#37

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @ahuber21 for the problem description
After some time of investigation ana analysis, I got understanding, that inside the implementation code in SVS side we do not need to call DynamicVamana::save(std::ostream&) anymore, instead we can use filesystem-based DynamicVamana::save(std::filesystem::path, ...) method which allows to manage serialization in a more controlled and simpler way.

out.write(reinterpret_cast<const char*>(&initialized), sizeof(bool));

impl->save(out);
if (initialized) {
impl->save(out);
}
return Status_Ok;
}

Expand All @@ -482,6 +491,19 @@ Status IndexSVSVamanaImpl::deserialize_impl(std::istream& in) noexcept {
"Cannot deserialize: SVS index already initialized."};
}

// load own members
in.read(reinterpret_cast<char*>(&metric_type_), sizeof(metric_type_));
in.read(reinterpret_cast<char*>(&dim_), sizeof(dim_));
in.read(reinterpret_cast<char*>(&default_search_params), sizeof(default_search_params));
in.read(reinterpret_cast<char*>(&build_params), sizeof(build_params));
ntotal_soft_deleted = 0; // index is always compacted on saving

bool initialized = false;
in.read(reinterpret_cast<char*>(&initialized), sizeof(bool));
if (!initialized) {
return Status_Ok;
}

impl.reset(std::visit(
[&](auto element) {
using ElementType = std::decay_t<decltype(element)>;
Expand Down
18 changes: 18 additions & 0 deletions bindings/cpp/src/IndexSVSVamanaLVQImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,31 @@ Status IndexSVSVamanaLVQImpl::init_impl(size_t n, const float* x) noexcept {
);
}

Status IndexSVSVamanaLVQImpl::serialize_impl(std::ostream& out) const noexcept {
// Also store LVQ specific members
out.write(reinterpret_cast<const char*>(&lvq_level), sizeof(LVQLevel));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So far, to make everything consistent, shouldn't we save IndexSVSVamanaImpl members as well?

Copy link
Contributor Author

@ahuber21 ahuber21 Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there only ntotal_soft_deleted which is always 0 because we compact on save?

Yes, I guess it makes sense to add the public members too.


// This will also write whether or not we're initialized
return IndexSVSVamanaImpl::serialize_impl(out);

return Status_Ok;
}

Status IndexSVSVamanaLVQImpl::deserialize_impl(std::istream& in) noexcept {
if (impl) {
return Status{
ErrorCode::INVALID_ARGUMENT,
"Cannot deserialize: SVS index already initialized."};
}

in.read(reinterpret_cast<char*>(&lvq_level), sizeof(LVQLevel));

bool initialized = false;
in.read(reinterpret_cast<char*>(&initialized), sizeof(bool));
if (!initialized) {
return Status_Ok;
}

if (svs::detail::intel_enabled()) {
switch (lvq_level) {
case LVQLevel::LVQ4x0:
Expand Down
Loading