-
Notifications
You must be signed in to change notification settings - Fork 75
Support gno-imi partition strategy #612
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
43a8b9c
a28b5c7
75ebbaf
bb27dee
04ef4ab
a87e05c
779ac49
05ce90d
7fee097
93b7ff8
58400cf
dc6dd79
cbf999f
59afb03
75da847
39c19c2
caefbea
9dc7ef2
11207ba
4895841
e31dd99
7c02b73
50949d6
b74ba9c
2cd5dba
d07b324
8e190dd
9b7063f
0469640
1332312
daefc10
c57317f
6df9cbd
c8e99c2
07fc182
61b4a0f
e2d8263
fc6bd3e
391264c
7a291bc
05407d0
6037138
0b46590
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
|
|
||
| // Copyright 2024-present the vsag project | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, software | ||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| // See the License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
| #include <vsag/vsag.h> | ||
|
|
||
| #include <fstream> | ||
| #include <iostream> | ||
|
|
||
| int | ||
| main(int argc, char** argv) { | ||
| vsag::init(); | ||
|
|
||
| /******************* Prepare Base Dataset *****************/ | ||
| int64_t num_vectors = 10000; | ||
| int64_t dim = 128; | ||
| std::vector<int64_t> ids(num_vectors); | ||
| std::vector<float> datas(num_vectors * dim); | ||
| std::mt19937 rng(47); | ||
| std::uniform_real_distribution<float> distrib_real; | ||
| for (int64_t i = 0; i < num_vectors; ++i) { | ||
| ids[i] = i; | ||
| } | ||
| for (int64_t i = 0; i < dim * num_vectors; ++i) { | ||
| datas[i] = distrib_real(rng); | ||
| } | ||
|
|
||
| auto base = vsag::Dataset::Make(); | ||
| base->NumElements(num_vectors) | ||
| ->Dim(dim) | ||
| ->Ids(ids.data()) | ||
| ->Float32Vectors(datas.data()) | ||
| ->Owner(false); | ||
|
|
||
| /******************* Create IVF Index *****************/ | ||
| std::string ivf_build_params = R"( | ||
| { | ||
| "dtype": "float32", | ||
| "metric_type": "l2", | ||
| "dim": 128, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "dim": 960 ?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not push ? |
||
| "index_param": { | ||
| "base_quantization_type": "fp32", | ||
| "partition_strategy_type": "gno_imi", | ||
| "ivf_train_type": "kmeans", | ||
| "first_order_buckets_count": 10, | ||
| "second_order_buckets_count": 10 | ||
jiaweizone marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| })"; | ||
| auto index = vsag::Factory::CreateIndex("ivf", ivf_build_params).value(); | ||
|
|
||
| /******************* Build IVF Index *****************/ | ||
|
|
||
| auto path = "example_gno_imi.index"; | ||
| if (auto build_result = index->Build(base); build_result.has_value()) { | ||
| std::ofstream outfile(path, std::ios::out | std::ios::binary); | ||
| auto result = index->Serialize(outfile); | ||
| outfile.close(); | ||
| std::cout << "After Build(), Index IVF contains: " << index->GetNumElements() << std::endl; | ||
| } else if (build_result.error().type == vsag::ErrorType::INTERNAL_ERROR) { | ||
| std::cerr << "Failed to build index: internalError" << std::endl; | ||
| exit(-1); | ||
| } | ||
|
|
||
| std::ifstream infile(path, std::ios::binary); | ||
| auto index2 = vsag::Factory::CreateIndex("ivf", ivf_build_params).value(); | ||
| index2->Deserialize(infile); | ||
| infile.close(); | ||
|
|
||
| /******************* Prepare Query Dataset *****************/ | ||
| std::vector<float> query_vector(dim); | ||
| for (int64_t i = 0; i < dim; ++i) { | ||
| query_vector[i] = distrib_real(rng); | ||
| } | ||
|
|
||
| auto query = vsag::Dataset::Make(); | ||
| query->NumElements(1)->Dim(dim)->Float32Vectors(query_vector.data())->Owner(false); | ||
|
|
||
| /******************* KnnSearch For IVF Index *****************/ | ||
| auto ivf_search_parameters = R"( | ||
| { | ||
| "ivf": { | ||
| "scan_buckets_count": 20, | ||
| "first_order_scan_ratio": 0.8 | ||
| } | ||
| })"; | ||
| int64_t topk = 10; | ||
| auto result = index->KnnSearch(query, topk, ivf_search_parameters).value(); | ||
| auto result2 = index2->KnnSearch(query, topk, ivf_search_parameters).value(); | ||
| /******************* Print Search Result *****************/ | ||
| std::cout << "results: " << std::endl; | ||
| for (int64_t i = 0; i < result->GetDim(); ++i) { | ||
| std::cout << result->GetIds()[i] << ": " << result->GetDistances()[i] << std::endl; | ||
| std::cout << result2->GetIds()[i] << ": " << result2->GetDistances()[i] << std::endl; | ||
| } | ||
| return 0; | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.