Skip to content

Add other data storage types to Python bindings. #364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 79 additions & 24 deletions hnswlib/space_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,44 @@
#include "hnswlib.h"

namespace hnswlib {

static float
/**
* For a given loop unrolling factor K, distance type dist_t, and data type data_t,
* calculate the inner product distance between two vectors.
* The compiler should automatically do the loop unrolling for us here and vectorize as appropriate.
*/
template<typename dist_t, typename data_t = dist_t, int K = 1>
static dist_t
InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
float res = 0;
for (unsigned i = 0; i < qty; i++) {
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
dist_t res = 0;
data_t *a = (data_t *) pVect1;
data_t *b = (data_t *) pVect2;

qty = qty / K;

for (size_t i = 0; i < qty; i++) {
for (size_t j = 0; j < K; j++) {
const size_t index = (i * K) + j;
const dist_t _a = a[index];
const dist_t _b = b[index];
res += _a * _b;
}
}
return (1.0f - res);

return (static_cast<dist_t>(1.0f) - res);
}

template<typename dist_t, typename data_t = dist_t, int K>
static dist_t
InnerProductAtLeast(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
size_t k = K;
size_t remainder = *((size_t *) qty_ptr) - K;

data_t *a = (data_t *) pVect1;
data_t *b = (data_t *) pVect2;

return InnerProduct<dist_t, data_t, K>(a, b, &k)
+ InnerProduct<dist_t, data_t, 1>(a + K, b + K, &remainder);
}

#if defined(USE_AVX)
Expand Down Expand Up @@ -254,7 +282,7 @@ namespace hnswlib {
float *pVect2 = (float *) pVect2v + qty16;

size_t qty_left = qty - qty16;
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
float res_tail = InnerProduct<float, float>(pVect1, pVect2, &qty_left);
return res + res_tail - 1.0f;
}

Expand All @@ -268,48 +296,75 @@ namespace hnswlib {

float *pVect1 = (float *) pVect1v + qty4;
float *pVect2 = (float *) pVect2v + qty4;
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
float res_tail = InnerProduct<float, float>(pVect1, pVect2, &qty_left);

return res + res_tail - 1.0f;
}
#endif

class InnerProductSpace : public SpaceInterface<float> {
template<typename dist_t, typename data_t = dist_t>
class InnerProductSpace : public SpaceInterface<dist_t> {

DISTFUNC<float> fstdistfunc_;
DISTFUNC<dist_t> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
InnerProductSpace(size_t dim) {
fstdistfunc_ = InnerProduct;
#if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
if (dim % 16 == 0)
fstdistfunc_ = InnerProductSIMD16Ext;
InnerProductSpace(size_t dim) : dim_(dim), data_size_(dim * sizeof(data_t)) {
if (dim % 128 == 0)
fstdistfunc_ = InnerProduct<dist_t, data_t, 128>;
else if (dim % 64 == 0)
fstdistfunc_ = InnerProduct<dist_t, data_t, 64>;
else if (dim % 32 == 0)
fstdistfunc_ = InnerProduct<dist_t, data_t, 32>;
else if (dim % 16 == 0)
fstdistfunc_ = InnerProduct<dist_t, data_t, 16>;
else if (dim % 8 == 0)
fstdistfunc_ = InnerProduct<dist_t, data_t, 8>;
else if (dim % 4 == 0)
fstdistfunc_ = InnerProductSIMD4Ext;
fstdistfunc_ = InnerProduct<dist_t, data_t, 4>;

else if (dim > 128)
fstdistfunc_ = InnerProductAtLeast<dist_t, data_t, 128>;
else if (dim > 64)
fstdistfunc_ = InnerProductAtLeast<dist_t, data_t, 64>;
else if (dim > 32)
fstdistfunc_ = InnerProductAtLeast<dist_t, data_t, 32>;
else if (dim > 16)
fstdistfunc_ = InnerProductSIMD16ExtResiduals;
fstdistfunc_ = InnerProductAtLeast<dist_t, data_t, 16>;
else if (dim > 8)
fstdistfunc_ = InnerProductAtLeast<dist_t, data_t, 8>;
else if (dim > 4)
fstdistfunc_ = InnerProductSIMD4ExtResiduals;
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
fstdistfunc_ = InnerProductAtLeast<dist_t, data_t, 4>;
else
fstdistfunc_ = InnerProduct<dist_t, data_t>;
}

size_t get_data_size() {
return data_size_;
}

DISTFUNC<float> get_dist_func() {
DISTFUNC<dist_t> get_dist_func() {
return fstdistfunc_;
}

void *get_dist_func_param() {
return &dim_;
}

~InnerProductSpace() {}
~InnerProductSpace() {}
};

template<> InnerProductSpace<float, float>::InnerProductSpace(size_t dim) : dim_(dim), data_size_(dim * sizeof(float)) {
fstdistfunc_ = InnerProduct<float, float>;
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
if (dim % 16 == 0)
fstdistfunc_ = InnerProductSIMD16Ext;
else if (dim % 4 == 0)
fstdistfunc_ = InnerProductSIMD4Ext;
else if (dim > 16)
fstdistfunc_ = InnerProductSIMD16ExtResiduals;
else if (dim > 4)
fstdistfunc_ = InnerProductSIMD4ExtResiduals;
#endif
}

}
175 changes: 75 additions & 100 deletions hnswlib/space_l2.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,45 @@
#include "hnswlib.h"

namespace hnswlib {

static float
L2Sqr(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
/**
* For a given loop unrolling factor K, distance type dist_t, and data type data_t,
* calculate the L2 squared distance between two vectors.
* The compiler should automatically do the loop unrolling for us here and vectorize as appropriate.
*/
template<typename dist_t, typename data_t = dist_t, int K = 1>
static dist_t
L2Sqr(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
dist_t res = 0;
data_t *a = (data_t *) pVect1;
data_t *b = (data_t *) pVect2;

qty = qty / K;

float res = 0;
for (size_t i = 0; i < qty; i++) {
float t = *pVect1 - *pVect2;
pVect1++;
pVect2++;
res += t * t;
for (size_t j = 0; j < K; j++) {
const size_t index = (i * K) + j;
const dist_t _a = a[index];
const dist_t _b = b[index];
res += (_a - _b) * (_a - _b);
}
}
return (res);
}

template<typename dist_t, typename data_t = dist_t, int K>
static dist_t
L2SqrAtLeast(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
size_t k = K;
size_t remainder = *((size_t *) qty_ptr) - K;

data_t *a = (data_t *) pVect1;
data_t *b = (data_t *) pVect2;

return L2Sqr<dist_t, data_t, K>(a, b, &k)
+ L2Sqr<dist_t, data_t, 1>(a + K, b + K, &remainder);
}

#if defined(USE_AVX512)

// Favor using AVX512 if available.
Expand Down Expand Up @@ -150,7 +172,7 @@ namespace hnswlib {
float *pVect2 = (float *) pVect2v + qty16;

size_t qty_left = qty - qty16;
float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
float res_tail = L2Sqr<float, float>(pVect1, pVect2, &qty_left);
return (res + res_tail);
}
#endif
Expand Down Expand Up @@ -194,39 +216,54 @@ namespace hnswlib {

float *pVect1 = (float *) pVect1v + qty4;
float *pVect2 = (float *) pVect2v + qty4;
float res_tail = L2Sqr(pVect1, pVect2, &qty_left);
float res_tail = L2Sqr<float, float>(pVect1, pVect2, &qty_left);

return (res + res_tail);
}
#endif

class L2Space : public SpaceInterface<float> {
template <typename dist_t, typename data_t = dist_t>
class L2Space : public SpaceInterface<dist_t> {

DISTFUNC<float> fstdistfunc_;
DISTFUNC<dist_t> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
L2Space(size_t dim) {
fstdistfunc_ = L2Sqr;
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
if (dim % 16 == 0)
fstdistfunc_ = L2SqrSIMD16Ext;
L2Space(size_t dim) : dim_(dim), data_size_(dim * sizeof(data_t)) {
if (dim % 128 == 0)
fstdistfunc_ = L2Sqr<dist_t, data_t, 128>;
else if (dim % 64 == 0)
fstdistfunc_ = L2Sqr<dist_t, data_t, 64>;
else if (dim % 32 == 0)
fstdistfunc_ = L2Sqr<dist_t, data_t, 32>;
else if (dim % 16 == 0)
fstdistfunc_ = L2Sqr<dist_t, data_t, 16>;
else if (dim % 8 == 0)
fstdistfunc_ = L2Sqr<dist_t, data_t, 8>;
else if (dim % 4 == 0)
fstdistfunc_ = L2SqrSIMD4Ext;
fstdistfunc_ = L2Sqr<dist_t, data_t, 4>;

else if (dim > 128)
fstdistfunc_ = L2SqrAtLeast<dist_t, data_t, 128>;
else if (dim > 64)
fstdistfunc_ = L2SqrAtLeast<dist_t, data_t, 64>;
else if (dim > 32)
fstdistfunc_ = L2SqrAtLeast<dist_t, data_t, 32>;
else if (dim > 16)
fstdistfunc_ = L2SqrSIMD16ExtResiduals;
fstdistfunc_ = L2SqrAtLeast<dist_t, data_t, 16>;
else if (dim > 8)
fstdistfunc_ = L2SqrAtLeast<dist_t, data_t, 8>;
else if (dim > 4)
fstdistfunc_ = L2SqrSIMD4ExtResiduals;
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
fstdistfunc_ = L2SqrAtLeast<dist_t, data_t, 4>;
else
fstdistfunc_ = L2Sqr<dist_t, data_t>;
}

size_t get_data_size() {
return data_size_;
}

DISTFUNC<float> get_dist_func() {
DISTFUNC<dist_t> get_dist_func() {
return fstdistfunc_;
}

Expand All @@ -237,79 +274,17 @@ namespace hnswlib {
~L2Space() {}
};

static int
L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {

size_t qty = *((size_t *) qty_ptr);
int res = 0;
unsigned char *a = (unsigned char *) pVect1;
unsigned char *b = (unsigned char *) pVect2;

qty = qty >> 2;
for (size_t i = 0; i < qty; i++) {

res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
}
return (res);
template<> L2Space<float, float>::L2Space(size_t dim) : dim_(dim), data_size_(dim * sizeof(float)) {
fstdistfunc_ = L2Sqr<float, float>;
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
if (dim % 16 == 0)
fstdistfunc_ = L2SqrSIMD16Ext;
else if (dim % 4 == 0)
fstdistfunc_ = L2SqrSIMD4Ext;
else if (dim > 16)
fstdistfunc_ = L2SqrSIMD16ExtResiduals;
else if (dim > 4)
fstdistfunc_ = L2SqrSIMD4ExtResiduals;
#endif
}

static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
size_t qty = *((size_t*)qty_ptr);
int res = 0;
unsigned char* a = (unsigned char*)pVect1;
unsigned char* b = (unsigned char*)pVect2;

for(size_t i = 0; i < qty; i++)
{
res += ((*a) - (*b)) * ((*a) - (*b));
a++;
b++;
}
return (res);
}

class L2SpaceI : public SpaceInterface<int> {

DISTFUNC<int> fstdistfunc_;
size_t data_size_;
size_t dim_;
public:
L2SpaceI(size_t dim) {
if(dim % 4 == 0) {
fstdistfunc_ = L2SqrI4x;
}
else {
fstdistfunc_ = L2SqrI;
}
dim_ = dim;
data_size_ = dim * sizeof(unsigned char);
}

size_t get_data_size() {
return data_size_;
}

DISTFUNC<int> get_dist_func() {
return fstdistfunc_;
}

void *get_dist_func_param() {
return &dim_;
}

~L2SpaceI() {}
};


}
Loading