diff --git a/faiss/IVFlib.cpp b/faiss/IVFlib.cpp index a39fb83f23..03a9755125 100644 --- a/faiss/IVFlib.cpp +++ b/faiss/IVFlib.cpp @@ -601,10 +601,14 @@ void handle_ivf( sharded_centroids[i].data()); } char fname[256]; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif snprintf(fname, 256, filename_template.c_str(), i); +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif faiss::write_index(sharded_index, fname); delete sharded_index; } @@ -655,10 +659,14 @@ void handle_binary_ivf( sharded_centroids[i].data()); } char fname[256]; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif snprintf(fname, 256, filename_template.c_str(), i); +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif faiss::write_index_binary(sharded_index, fname); delete sharded_index; } diff --git a/faiss/IndexIVFFlat.h b/faiss/IndexIVFFlat.h index 711a663296..ae74794e5a 100644 --- a/faiss/IndexIVFFlat.h +++ b/faiss/IndexIVFFlat.h @@ -91,7 +91,7 @@ struct IVFFlatScanner : InvertedListScanner { size_t list_size, const uint8_t* codes, const idx_t* ids, - ResultHandler& handler) const; + ResultHandler& handler) const override; }; struct IndexIVFFlatDedup : IndexIVFFlat { diff --git a/faiss/IndexIVFSpectralHash.cpp b/faiss/IndexIVFSpectralHash.cpp index fc35675214..f0e53d36ea 100644 --- a/faiss/IndexIVFSpectralHash.cpp +++ b/faiss/IndexIVFSpectralHash.cpp @@ -38,7 +38,8 @@ IndexIVFSpectralHash::IndexIVFSpectralHash( own_invlists_in), nbit(nbit_in), period(period_in) { - auto rr = std::make_unique(d_in, nbit_in); + auto rr = std::make_unique( + static_cast(d_in), nbit_in); rr->init(1234); vt = rr.release(); own_fields = true; diff --git a/faiss/IndexRaBitQFastScan.h b/faiss/IndexRaBitQFastScan.h index 988a5c26b7..eeda760e4c 100644 --- a/faiss/IndexRaBitQFastScan.h +++ b/faiss/IndexRaBitQFastScan.h @@ -279,11 +279,11 @@ struct RaBitQHeapHandler rabitq_stats.n_multibit_evaluations += local_multibit_evaluations; } - void begin(const float* norms) { + void begin(const float* norms) override { normalizers = norms; } - void end() { + void end() override { #pragma omp parallel for if (nq > 100) for (int64_t q = 0; q < static_cast(nq); q++) { float* heap_dis = heap_distances + q * k; diff --git a/faiss/impl/PolysemousTraining.cpp b/faiss/impl/PolysemousTraining.cpp index 9d61a6452c..512ecadf76 100644 --- a/faiss/impl/PolysemousTraining.cpp +++ b/faiss/impl/PolysemousTraining.cpp @@ -823,10 +823,14 @@ void PolysemousTraining::optimize_reproduce_distances( if (log_pattern.size()) { char fname[256]; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif snprintf(fname, 256, log_pattern.c_str(), m); +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif printf("opening log file %s\n", fname); optim.logfile = fopen(fname, "w"); FAISS_THROW_IF_NOT_MSG(optim.logfile, "could not open logfile"); @@ -938,10 +942,14 @@ void PolysemousTraining::optimize_ranking( if (log_pattern.size()) { char fname[256]; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif snprintf(fname, 256, log_pattern.c_str(), m); +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif printf("opening log file %s\n", fname); optim.logfile = fopen(fname, "w"); FAISS_THROW_IF_NOT_FMT( diff --git a/faiss/impl/simdlib/simdlib_avx2.h b/faiss/impl/simdlib/simdlib_avx2.h index d89bd3f832..734c4095aa 100644 --- a/faiss/impl/simdlib/simdlib_avx2.h +++ b/faiss/impl/simdlib/simdlib_avx2.h @@ -141,12 +141,17 @@ struct simd16uint16_tpl : simd256bit_tpl { storeu((void*)bytes); char res[1000]; char* ptr = res; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int idx = 0; idx < 16; idx++) { - ptr += sprintf(ptr, fmt, bytes[idx]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[idx]); } +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -462,12 +467,17 @@ struct simd32uint8_tpl : simd256bit_tpl { storeu((void*)bytes); char res[1000]; char* ptr = res; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int idx = 0; idx < 32; idx++) { - ptr += sprintf(ptr, fmt, bytes[idx]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[idx]); } +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -601,12 +611,17 @@ struct simd8uint32_tpl : simd256bit_tpl { storeu((void*)bytes); char res[1000]; char* ptr = res; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int idx = 0; idx < 8; idx++) { - ptr += sprintf(ptr, fmt, bytes[idx]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[idx]); } +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -727,7 +742,8 @@ struct simd8float32_tpl : simd256bit_tpl { char res[1000]; char* ptr = res; for (int idx = 0; idx < 8; idx++) { - ptr += sprintf(ptr, "%g,", tab[idx]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), "%g,", tab[idx]); } // strip last , ptr[-1] = 0; diff --git a/faiss/impl/simdlib/simdlib_avx512.h b/faiss/impl/simdlib/simdlib_avx512.h index c92190c727..1968b8ae23 100644 --- a/faiss/impl/simdlib/simdlib_avx512.h +++ b/faiss/impl/simdlib/simdlib_avx512.h @@ -127,9 +127,17 @@ struct simd32uint16_tpl : simd512bit_tpl { storeu((void*)bytes); char res[2000]; char* ptr = res; +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int i = 0; i < 32; i++) { - ptr += sprintf(ptr, fmt, bytes[i]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[i]); } +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -263,9 +271,17 @@ struct simd64uint8_tpl : simd512bit_tpl { storeu((void*)bytes); char res[2000]; char* ptr = res; +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int i = 0; i < 64; i++) { - ptr += sprintf(ptr, fmt, bytes[i]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), fmt, bytes[i]); } +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -374,7 +390,8 @@ struct simd16float32_tpl char res[1000]; char* ptr = res; for (int i = 0; i < 16; i++) { - ptr += sprintf(ptr, "%g,", tab[i]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), "%g,", tab[i]); } ptr[-1] = 0; return std::string(res); diff --git a/faiss/impl/simdlib/simdlib_emulated.h b/faiss/impl/simdlib/simdlib_emulated.h index 69fbaf80b3..6d56d97097 100644 --- a/faiss/impl/simdlib/simdlib_emulated.h +++ b/faiss/impl/simdlib/simdlib_emulated.h @@ -129,12 +129,17 @@ struct simd16uint16_tpl : simd256bit_tpl { std::string elements_to_string(const char* fmt) const { char res[1000], *ptr = res; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int i = 0; i < 16; i++) { - ptr += sprintf(ptr, fmt, u16[i]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), fmt, u16[i]); } +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -507,12 +512,16 @@ struct simd32uint8_tpl : simd256bit_tpl { std::string elements_to_string(const char* fmt) const { char res[1000], *ptr = res; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int i = 0; i < 32; i++) { - ptr += sprintf(ptr, fmt, u8[i]); + ptr += snprintf(ptr, (size_t)(res + sizeof(res) - ptr), fmt, u8[i]); } +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -707,12 +716,17 @@ struct simd8uint32_tpl : simd256bit_tpl { std::string elements_to_string(const char* fmt) const { char res[1000], *ptr = res; +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif for (int i = 0; i < 8; i++) { - ptr += sprintf(ptr, fmt, u32[i]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), fmt, u32[i]); } +#if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop +#endif // strip last , ptr[-1] = 0; return std::string(res); @@ -863,7 +877,8 @@ struct simd8float32_tpl : simd256bit_tpl { std::string tostring() const { char res[1000], *ptr = res; for (int i = 0; i < 8; i++) { - ptr += sprintf(ptr, "%g,", f32[i]); + ptr += snprintf( + ptr, (size_t)(res + sizeof(res) - ptr), "%g,", f32[i]); } // strip last , ptr[-1] = 0; diff --git a/faiss/python/faiss_example_external_module.swig b/faiss/python/faiss_example_external_module.swig index e1a21e9731..4eec0e4a1c 100644 --- a/faiss/python/faiss_example_external_module.swig +++ b/faiss/python/faiss_example_external_module.swig @@ -77,7 +77,7 @@ struct IDSelectorModulo : faiss::IDSelector { IDSelectorModulo(int mod): mod(mod) {} - bool is_member(faiss::idx_t id) const { + bool is_member(faiss::idx_t id) const override { return id % mod == 0; } diff --git a/faiss/utils/distances.h b/faiss/utils/distances.h index f44772366b..7dc8896454 100644 --- a/faiss/utils/distances.h +++ b/faiss/utils/distances.h @@ -600,4 +600,67 @@ int fvec_madd_and_argmin( const float* b, float* c); +/* Explicit specialization declarations for all SIMD-templated distance + functions. C++ [temp.expl.spec]/7 requires that these appear before any + translation unit that might implicitly instantiate them. GCC/Clang are + lenient about this, but MSVC is not — without these declarations the + linker emits LNK2001 for the specializations defined in the _avx2 + translation units. */ + +// clang-format off +#define FAISS_DECLARE_DISTANCES_SPECIALIZATIONS(SL) \ + template <> float fvec_L2sqr( \ + const float* x, const float* y, size_t d); \ + template <> float fvec_inner_product( \ + const float* x, const float* y, size_t d); \ + template <> float fvec_L1( \ + const float* x, const float* y, size_t d); \ + template <> float fvec_Linf( \ + const float* x, const float* y, size_t d); \ + template <> void fvec_inner_product_batch_4( \ + const float* x, const float* y0, const float* y1, \ + const float* y2, const float* y3, const size_t d, \ + float& dis0, float& dis1, float& dis2, float& dis3); \ + template <> void fvec_L2sqr_batch_4( \ + const float* x, const float* y0, const float* y1, \ + const float* y2, const float* y3, const size_t d, \ + float& dis0, float& dis1, float& dis2, float& dis3); \ + template <> void fvec_inner_products_ny( \ + float* ip, const float* x, const float* y, \ + size_t d, size_t ny); \ + template <> void fvec_L2sqr_ny( \ + float* dis, const float* x, const float* y, \ + size_t d, size_t ny); \ + template <> void fvec_L2sqr_ny_transposed( \ + float* dis, const float* x, const float* y, \ + const float* y_sqlen, size_t d, size_t d_offset, size_t ny); \ + template <> size_t fvec_L2sqr_ny_nearest( \ + float* distances_tmp_buffer, const float* x, \ + const float* y, size_t d, size_t ny); \ + template <> size_t fvec_L2sqr_ny_nearest_y_transposed( \ + float* distances_tmp_buffer, const float* x, \ + const float* y, const float* y_sqlen, \ + size_t d, size_t d_offset, size_t ny); \ + template <> float fvec_norm_L2sqr(const float* x, size_t d); \ + template <> void fvec_add( \ + size_t d, const float* a, const float* b, float* c); \ + template <> void fvec_add( \ + size_t d, const float* a, float b, float* c); \ + template <> void fvec_sub( \ + size_t d, const float* a, const float* b, float* c); \ + template <> void compute_PQ_dis_tables_dsub2( \ + size_t d, size_t ksub, const float* centroids, \ + size_t nx, const float* x, bool is_inner_product, \ + float* dis_tables); \ + template <> void fvec_madd( \ + size_t n, const float* a, float bf, const float* b, float* c); \ + template <> int fvec_madd_and_argmin( \ + size_t n, const float* a, float bf, const float* b, float* c); + +FAISS_DECLARE_DISTANCES_SPECIALIZATIONS(SIMDLevel::NONE) +FAISS_DECLARE_DISTANCES_SPECIALIZATIONS(SIMDLevel::AVX2) + +#undef FAISS_DECLARE_DISTANCES_SPECIALIZATIONS +// clang-format on + } // namespace faiss diff --git a/faiss/utils/partitioning.cpp b/faiss/utils/partitioning.cpp index 7c2decfe9d..fd2c6d7703 100644 --- a/faiss/utils/partitioning.cpp +++ b/faiss/utils/partitioning.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -432,7 +433,7 @@ uint16_t simd_partition_fuzzy_with_bounds( // lower bound inclusive, upper exclusive size_t s0 = s0i, s1 = s1i + 1; - IFV printf("bounds: %ld %ld\n", s0, s1 - 1); + IFV printf("bounds: %zu %zu\n", s0, s1 - 1); int thresh; size_t n_eq = 0, n_lt = 0; @@ -444,7 +445,7 @@ uint16_t simd_partition_fuzzy_with_bounds( count_lt_and_eq(vals, n, thresh, n_lt, n_eq); IFV printf( - " [%ld %ld] thresh=%d n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n", + " [%zu %zu] thresh=%d n_lt=%zu n_eq=%zu, q=%zu:%zu/%zu\n", s0, s1, thresh, @@ -481,7 +482,8 @@ uint16_t simd_partition_fuzzy_with_bounds( // number of equal values to keep int64_t n_eq_1 = q - n_lt; - IFV printf("shrink: thresh=%d q=%ld n_eq_1=%ld\n", thresh, q, n_eq_1); + IFV printf( + "shrink: thresh=%d q=%zu n_eq_1=%" PRId64 "\n", thresh, q, n_eq_1); if (n_eq_1 < 0) { // happens when > q elements are at lower bound assert(s0 + 1 == s1); q = q_min; @@ -491,14 +493,15 @@ uint16_t simd_partition_fuzzy_with_bounds( thresh++; } n_eq_1 = q; - IFV printf(" override: thresh=%d n_eq_1=%ld\n", thresh, n_eq_1); + IFV printf( + " override: thresh=%d n_eq_1=%" PRId64 "\n", thresh, n_eq_1); } else { assert(n_eq_1 <= n_eq); } size_t wp = simd_compress_array(vals, ids, n, thresh, n_eq_1); - IFV printf("wp=%ld\n", wp); + IFV printf("wp=%zu\n", wp); assert(wp == q); if (q_out) { *q_out = q; diff --git a/faiss/utils/simd_impl/distances_sse-inl.h b/faiss/utils/simd_impl/distances_sse-inl.h index 5b69dc48be..38b748dc04 100644 --- a/faiss/utils/simd_impl/distances_sse-inl.h +++ b/faiss/utils/simd_impl/distances_sse-inl.h @@ -360,7 +360,8 @@ inline int fvec_madd_and_argmin_sse( float bf, const float* b, float* c) { - if ((n & 3) == 0 && ((((long)a) | ((long)b) | ((long)c)) & 15) == 0) { + if ((n & 3) == 0 && + ((((uintptr_t)a) | ((uintptr_t)b) | ((uintptr_t)c)) & 15) == 0) { return fvec_madd_and_argmin_sse_ref(n, a, bf, b, c); } else { return fvec_madd_and_argmin(n, a, bf, b, c);