Skip to content

Commit 33a87dd

Browse files
authored
Merge branch 'main' into dev/eglaser-divbyzero-resolution
2 parents 53da15d + 9b84433 commit 33a87dd

6 files changed

Lines changed: 26 additions & 21 deletions

File tree

bindings/python/src/svs/common.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ def read_vecs(filename: str):
8181
8282
* `bvecs`: 8-bit unsigned integers.
8383
* `fvecs`: 32-bit floating point numbers.
84-
* `ivecs`: 32-bit signed integers.
84+
* `ivecs`: 32-bit unsigned integers.
85+
86+
*Note*: The format differs from the IRISA format.
87+
Both vector dimensionality and `ivecs` values are unsigned.
8588
8689
Args:
8790
filename: The file to read.
@@ -93,24 +96,21 @@ def read_vecs(filename: str):
9396
file_type = filename[-5:]
9497
if file_type == 'bvecs':
9598
dtype = np.uint8
96-
struct_format = 'B'
9799
n_bytes = 1
98100
padding = 4
99101
elif file_type == 'fvecs':
100102
dtype = np.float32
101-
struct_format = 'f'
102103
n_bytes = 4
103104
padding = 1
104105
elif file_type == 'ivecs':
105106
dtype = np.uint32
106-
struct_format = 'i'
107107
n_bytes = 4
108108
padding = 1
109109
else:
110110
raise ValueError('Can only open bvecs, fvecs, and ivecs.')
111111

112112
with open(filename, 'rb') as fin:
113-
vec_size = struct.unpack('i', fin.read(4))[0]
113+
vec_size = struct.unpack('I', fin.read(4))[0]
114114

115115
X = np.fromfile(filename, dtype=dtype)
116116
X = X.reshape((-1, vec_size + padding))

examples/cpp/shared/example_vamana_with_compression.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "svs/orchestrators/dynamic_vamana.h"
2424
#include "svs/orchestrators/exhaustive.h"
2525
#include "svs/orchestrators/vamana.h"
26+
#include <cstdint>
2627

2728
int main() {
2829
// STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions
@@ -69,7 +70,7 @@ int main() {
6970
//! [Perform Queries]
7071

7172
//! [Recall]
72-
auto groundtruth = svs::load_data<int>(
73+
auto groundtruth = svs::load_data<uint32_t>(
7374
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
7475
);
7576
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);

examples/cpp/shared/example_vamana_with_compression_dynamic.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "svs/orchestrators/dynamic_vamana.h"
2424
#include "svs/orchestrators/exhaustive.h"
2525
#include "svs/orchestrators/vamana.h"
26+
#include <cstdint>
2627

2728
// Alias for blocked Lean dataset that supports resize/compact
2829
using BlockedLean = svs::leanvec::LeanDataset<
@@ -113,7 +114,7 @@ int main() {
113114
//! [Perform Queries]
114115

115116
//! [Recall]
116-
auto groundtruth = svs::load_data<int>(
117+
auto groundtruth = svs::load_data<uint32_t>(
117118
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
118119
);
119120
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);

examples/cpp/shared/example_vamana_with_compression_lvq.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "svs/orchestrators/dynamic_vamana.h"
2424
#include "svs/orchestrators/exhaustive.h"
2525
#include "svs/orchestrators/vamana.h"
26+
#include <cstdint>
2627

2728
int main() {
2829
// STEP 1: Compress Data with LVQ
@@ -57,7 +58,7 @@ int main() {
5758
//! [Perform Queries]
5859

5960
//! [Recall]
60-
auto groundtruth = svs::load_data<int>(
61+
auto groundtruth = svs::load_data<uint32_t>(
6162
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
6263
);
6364
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);

examples/cpp/shared/shared.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "svs/orchestrators/dynamic_vamana.h"
2424
#include "svs/orchestrators/exhaustive.h"
2525
#include "svs/orchestrators/vamana.h"
26+
#include <cstdint>
2627

2728
#include "utils.h"
2829

@@ -96,7 +97,7 @@ void vamana_search(Data& data, Distance distance) {
9697

9798
index.set_search_window_size(search_window_size);
9899
const auto query_data = svs::load_data<float>(qfname);
99-
const auto groundtruth = svs::load_data<int>(gtfname);
100+
const auto groundtruth = svs::load_data<uint32_t>(gtfname);
100101

101102
auto tic = svs::lib::now();
102103
auto query_result = index.search(query_data, n_neighbors);

examples/cpp/shared/utils.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@
1818
* I/O functions for fvecs, ivecs and xVecs
1919
*****************************************************/
2020

21+
#include <cstdint>
2122
#include <random>
2223
#include <sys/stat.h>
2324
#include <sys/types.h>
2425
#include <unistd.h>
2526

26-
int fvec_fwrite(FILE* fo, const float* v, int d) {
27+
int fvec_fwrite(FILE* fo, const float* v, uint32_t d) {
2728
int ret;
28-
ret = fwrite(&d, sizeof(int), 1, fo);
29+
ret = fwrite(&d, sizeof(uint32_t), 1, fo);
2930
if (ret != 1) {
3031
perror("fvec_fwrite: write error 1");
3132
return -1;
@@ -38,7 +39,7 @@ int fvec_fwrite(FILE* fo, const float* v, int d) {
3839
return 0;
3940
}
4041

41-
int fvecs_write(const char* fname, int d, int n, const float* vf) {
42+
int fvecs_write(const char* fname, uint32_t d, int n, const float* vf) {
4243
FILE* fo = fopen(fname, "w");
4344
if (!fo) {
4445
perror("fvecs_write: cannot open file");
@@ -55,22 +56,22 @@ int fvecs_write(const char* fname, int d, int n, const float* vf) {
5556
return n;
5657
}
5758

58-
int ivec_iwrite(FILE* fo, const int* v, int d) {
59+
int ivec_iwrite(FILE* fo, const uint32_t* v, uint32_t d) {
5960
int ret;
60-
ret = fwrite(&d, sizeof(int), 1, fo);
61+
ret = fwrite(&d, sizeof(uint32_t), 1, fo);
6162
if (ret != 1) {
62-
perror("fvec_fwrite: write error 1");
63+
perror("ivec_iwrite: write error 1");
6364
return -1;
6465
}
65-
ret = fwrite(v, sizeof(float), d, fo);
66+
ret = fwrite(v, sizeof(uint32_t), d, fo);
6667
if (ret != d) {
67-
perror("fvec_fwrite: write error 2");
68+
perror("ivec_iwrite: write error 2");
6869
return -1;
6970
}
7071
return 0;
7172
}
7273

73-
int ivecs_write(const char* fname, int d, int n, const int* vf) {
74+
int ivecs_write(const char* fname, uint32_t d, int n, const uint32_t* vf) {
7475
FILE* fo = fopen(fname, "w");
7576
if (!fo) {
7677
perror("fvecs_write: cannot open file");
@@ -93,7 +94,7 @@ void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_siz
9394
std::default_random_engine generator;
9495
std::normal_distribution<float> dataset_dist(0.0f, dataset_std);
9596
std::normal_distribution<float> query_dist(0.0f, query_std);
96-
std::uniform_int_distribution<> uni_dist(0, dataset_size - 1);
97+
std::uniform_int_distribution<uint32_t> uni_dist(0, dataset_size - 1);
9798

9899
generator.seed(100);
99100
std::vector<float> dataset(dataset_size * data_dim);
@@ -102,9 +103,9 @@ void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_siz
102103
}
103104

104105
std::vector<float> queries(query_size * data_dim);
105-
std::vector<int> gt(query_size);
106+
std::vector<uint32_t> gt(query_size);
106107
for (size_t i = 0; i < query_size; ++i) {
107-
int e = uni_dist(generator);
108+
uint32_t e = uni_dist(generator);
108109
for (size_t j = 0; j < data_dim; ++j) {
109110
queries[i * data_dim + j] = dataset[e * data_dim + j] + query_dist(generator);
110111
}

0 commit comments

Comments
 (0)