Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions benchs/bench_polysemous_sift1m.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,39 @@

from __future__ import print_function

import time
import numpy as np
import faiss
from datasets import load_sift1M, evaluate

NUM_TRAIN_RUNS = 5

print("load data")
xb, xq, xt, gt = load_sift1M()
nq, d = xq.shape

# index with 16 subquantizers, 8 bit each
index = faiss.IndexPQ(d, 16, 8)
index.do_polysemous_training = True
index.verbose = True
train_times = []
for run in range(NUM_TRAIN_RUNS):
index = faiss.IndexPQ(d, 16, 8)
index.do_polysemous_training = True
index.verbose = (run == 0)

print("train")
print("train run %d/%d" % (run + 1, NUM_TRAIN_RUNS))

index.train(xt)
t0 = time.time()
index.train(xt)
t1 = time.time()
elapsed = t1 - t0
train_times.append(elapsed)
print(" Training time: %.2f s" % elapsed)

print("add vectors to index")
times = np.array(train_times)
print("\nTraining time over %d runs: "
"median %.2f s, mean %.2f s, std %.2f s, min %.2f s, max %.2f s"
% (NUM_TRAIN_RUNS, np.median(times), np.mean(times),
np.std(times), np.min(times), np.max(times)))

print("\nadd vectors to index")

index.add(xb)

Expand All @@ -42,3 +57,4 @@
index.polysemous_ht = ht
t, r = evaluate(index, xq, gt, 1)
print("\t %7.3f ms per query, R@1 %.4f" % (t, r[1]))

2 changes: 2 additions & 0 deletions faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set(FAISS_SIMD_AVX512_SRC
impl/fast_scan/impl-avx512.cpp
impl/hnsw/avx512.cpp
impl/pq_code_distance/avx512.cpp
impl/polysemous_training/avx512.cpp
impl/scalar_quantizer/sq-avx512.cpp
impl/binary_hamming/avx512.cpp
utils/simd_impl/distances_avx512.cpp
Expand Down Expand Up @@ -282,6 +283,7 @@ set(FAISS_HEADERS
impl/PanoramaStats.h
impl/PdxLayout.h
impl/PolysemousTraining.h
impl/polysemous_training/avx512.h
impl/ProductQuantizer-inl.h
impl/ProductQuantizer.h
impl/Quantizer.h
Expand Down
36 changes: 32 additions & 4 deletions faiss/impl/PolysemousTraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cstring>
#include <memory>

#include <faiss/impl/polysemous_training/avx512.h>
#include <faiss/impl/simd_dispatch.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/hamming.h>
Expand Down Expand Up @@ -168,17 +169,17 @@ static inline int hamming_dis(uint64_t a, uint64_t b) {
return popcount64(a ^ b);
}

static inline double sqr(double x) {
return x * x;
}

namespace {

/// optimize permutation to reproduce a distance table with Hamming distances
struct ReproduceWithHammingObjective : PermutationObjective {
int nbits;
double dis_weight_factor;

static double sqr(double x) {
return x * x;
}

// weighting of distances: it is more important to reproduce small
// distances well
double dis_weight(double x) const {
Expand All @@ -190,6 +191,13 @@ struct ReproduceWithHammingObjective : PermutationObjective {

// cost = quadratic difference between actual distance and Hamming distance
double compute_cost(const int* perm) const override {
#ifdef COMPILE_SIMD_AVX512
if (SIMDConfig::level == SIMDLevel::AVX512 ||
SIMDConfig::level == SIMDLevel::AVX512_SPR) {
return polysemous_avx512::hamming_compute_cost_avx512(
n, perm, target_dis.data(), weights.data());
}
#endif
double cost = 0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
Expand All @@ -205,6 +213,13 @@ struct ReproduceWithHammingObjective : PermutationObjective {
// what would the cost update be if iw and jw were swapped?
// computed in O(n) instead of O(n^2) for the full re-computation
double cost_update(const int* perm, int iw, int jw) const override {
#ifdef COMPILE_SIMD_AVX512
if (SIMDConfig::level == SIMDLevel::AVX512 ||
SIMDConfig::level == SIMDLevel::AVX512_SPR) {
return polysemous_avx512::hamming_cost_update_avx512(
n, perm, iw, jw, target_dis.data(), weights.data());
}
#endif
double delta_cost = 0;

for (int i = 0; i < n; i++) {
Expand Down Expand Up @@ -308,6 +323,12 @@ double ReproduceDistancesObjective::get_source_dis(int i, int j) const {

// cost = quadratic difference between actual distance and Hamming distance
double ReproduceDistancesObjective::compute_cost(const int* perm) const {
#ifdef COMPILE_SIMD_AVX512
if (SIMDConfig::level == SIMDLevel::AVX512 ||
SIMDConfig::level == SIMDLevel::AVX512_SPR) {
return polysemous_avx512::distances_compute_cost_avx512(*this, perm);
}
#endif
double cost = 0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
Expand All @@ -324,6 +345,13 @@ double ReproduceDistancesObjective::compute_cost(const int* perm) const {
// computed in O(n) instead of O(n^2) for the full re-computation
double ReproduceDistancesObjective::cost_update(const int* perm, int iw, int jw)
const {
#ifdef COMPILE_SIMD_AVX512
if (SIMDConfig::level == SIMDLevel::AVX512 ||
SIMDConfig::level == SIMDLevel::AVX512_SPR) {
return polysemous_avx512::distances_cost_update_avx512(
*this, perm, iw, jw);
}
#endif
double delta_cost = 0;
for (int i = 0; i < n; i++) {
if (i == iw) {
Expand Down
Loading
Loading