-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy path101_index_hnsw.cpp
103 lines (92 loc) · 3.97 KB
/
101_index_hnsw.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vsag/vsag.h>
#include <iostream>
int
main(int argc, char** argv) {
/******************* Prepare Base Dataset *****************/
int64_t num_vectors = 1000;
int64_t dim = 128;
auto ids = new int64_t[num_vectors];
auto vectors = new float[dim * num_vectors];
std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
for (int64_t i = 0; i < num_vectors; ++i) {
ids[i] = i;
}
for (int64_t i = 0; i < dim * num_vectors; ++i) {
vectors[i] = distrib_real(rng);
}
auto base = vsag::Dataset::Make();
// Transfer the ownership of the data (ids, vectors) to the base.
base->NumElements(num_vectors)->Dim(dim)->Ids(ids)->Float32Vectors(vectors);
/******************* Create HNSW Index *****************/
// hnsw_build_parameters is the configuration for building an HNSW index.
// The "dtype" specifies the data type, which supports float32 and int8.
// The "metric_type" indicates the distance metric type (e.g., cosine, inner product, and L2).
// The "dim" represents the dimensionality of the vectors, indicating the number of features for each data point.
// The "hnsw" section contains parameters specific to HNSW:
// - "max_degree": The maximum number of connections for each node in the graph.
// - "ef_construction": The size used for nearest neighbor search during graph construction, which affects both speed and the quality of the graph.
auto hnsw_build_paramesters = R"(
{
"dtype": "float32",
"metric_type": "l2",
"dim": 128,
"hnsw": {
"max_degree": 16,
"ef_construction": 100
}
}
)";
auto index = vsag::Factory::CreateIndex("hnsw", hnsw_build_paramesters).value();
/******************* Build HNSW Index *****************/
if (auto build_result = index->Build(base); build_result.has_value()) {
std::cout << "After Build(), Index HNSW contains: " << index->GetNumElements() << std::endl;
} else {
std::cerr << "Failed to build index: " << build_result.error().message << std::endl;
exit(-1);
}
/******************* KnnSearch For HNSW Index *****************/
auto query_vector = new float[dim];
for (int64_t i = 0; i < dim; ++i) {
query_vector[i] = distrib_real(rng);
}
// hnsw_search_parameters is the configuration for searching in an HNSW index.
// The "hnsw" section contains parameters specific to the search operation:
// - "ef_search": The size of the dynamic list used for nearest neighbor search, which influences both recall and search speed.
auto hnsw_search_parameters = R"(
{
"hnsw": {
"ef_search": 100
}
}
)";
int64_t topk = 10;
auto query = vsag::Dataset::Make();
query->NumElements(1)->Dim(dim)->Float32Vectors(query_vector)->Owner(true);
auto knn_result = index->KnnSearch(query, topk, hnsw_search_parameters);
/******************* Print Search Result *****************/
if (knn_result.has_value()) {
auto result = knn_result.value();
std::cout << "results: " << std::endl;
for (int64_t i = 0; i < result->GetDim(); ++i) {
std::cout << result->GetIds()[i] << ": " << result->GetDistances()[i] << std::endl;
}
} else {
std::cerr << "Search Error: " << knn_result.error().message << std::endl;
}
return 0;
}