99
1010#include < faiss/impl/simdlib/simdlib_avx2.h>
1111
12+ #include < cstring>
13+
1214#include < faiss/impl/scalar_quantizer/codecs.h>
1315#include < faiss/impl/scalar_quantizer/distance_computers.h>
1416#include < faiss/impl/scalar_quantizer/quantizers.h>
@@ -21,6 +23,61 @@ namespace scalar_quantizer {
2123
2224using simd8float32 = faiss::simd8float32_tpl<SIMDLevel::AVX2>;
2325
26+ namespace {
27+
28+ FAISS_ALWAYS_INLINE uint16_t load_u16 (const uint8_t * ptr) {
29+ uint16_t value;
30+ std::memcpy (&value, ptr, sizeof (value));
31+ return value;
32+ }
33+
34+ FAISS_ALWAYS_INLINE uint32_t load_u32 (const uint8_t * ptr) {
35+ uint32_t value;
36+ std::memcpy (&value, ptr, sizeof (value));
37+ return value;
38+ }
39+
40+ FAISS_ALWAYS_INLINE uint32_t load_u24 (const uint8_t * ptr) {
41+ return static_cast <uint32_t >(ptr[0 ]) |
42+ (static_cast <uint32_t >(ptr[1 ]) << 8 ) |
43+ (static_cast <uint32_t >(ptr[2 ]) << 16 );
44+ }
45+
46+ FAISS_ALWAYS_INLINE __m256i unpack_8x1bit_to_u32 (const uint8_t * code, int i) {
47+ const uint32_t packed = code[static_cast <size_t >(i) >> 3 ];
48+ const __m256i shifts = _mm256_setr_epi32 (0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 );
49+ const __m256i indices =
50+ _mm256_srlv_epi32 (_mm256_set1_epi32 (packed), shifts);
51+ return _mm256_and_si256 (indices, _mm256_set1_epi32 (0x1 ));
52+ }
53+
54+ FAISS_ALWAYS_INLINE __m256i unpack_8x2bit_to_u32 (const uint8_t * code, int i) {
55+ const uint32_t packed = load_u16 (code + (static_cast <size_t >(i) >> 2 ));
56+ const __m256i shifts = _mm256_setr_epi32 (0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 );
57+ const __m256i indices =
58+ _mm256_srlv_epi32 (_mm256_set1_epi32 (packed), shifts);
59+ return _mm256_and_si256 (indices, _mm256_set1_epi32 (0x3 ));
60+ }
61+
62+ FAISS_ALWAYS_INLINE __m256i unpack_8x3bit_to_u32 (const uint8_t * code, int i) {
63+ const uint32_t packed =
64+ load_u24 (code + ((static_cast <size_t >(i) >> 3 ) * 3 ));
65+ const __m256i shifts = _mm256_setr_epi32 (0 , 3 , 6 , 9 , 12 , 15 , 18 , 21 );
66+ const __m256i indices =
67+ _mm256_srlv_epi32 (_mm256_set1_epi32 (packed), shifts);
68+ return _mm256_and_si256 (indices, _mm256_set1_epi32 (0x7 ));
69+ }
70+
71+ FAISS_ALWAYS_INLINE __m256i unpack_8x4bit_to_u32 (const uint8_t * code, int i) {
72+ const uint32_t packed = load_u32 (code + (static_cast <size_t >(i) >> 1 ));
73+ const __m256i shifts = _mm256_setr_epi32 (0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 );
74+ const __m256i indices =
75+ _mm256_srlv_epi32 (_mm256_set1_epi32 (packed), shifts);
76+ return _mm256_and_si256 (indices, _mm256_set1_epi32 (0xf ));
77+ }
78+
79+ } // namespace
80+
2481/* *********************************************************
2582 * Codecs
2683 **********************************************************/
@@ -168,6 +225,56 @@ struct QuantizerTemplate<
168225 }
169226};
170227
228+ /* *********************************************************
229+ * TurboQuant MSE quantizer
230+ **********************************************************/
231+
232+ #define DEFINE_TQMSE_AVX2_SPECIALIZATION (NBITS, INDEX_EXPR ) \
233+ template <> \
234+ struct QuantizerTurboQuantMSE <NBITS, SIMDLevel::AVX2> \
235+ : QuantizerTurboQuantMSE<NBITS, SIMDLevel::NONE> { \
236+ using Base = QuantizerTurboQuantMSE<NBITS, SIMDLevel::NONE>; \
237+ \
238+ QuantizerTurboQuantMSE (size_t d, const std::vector<float >& trained) \
239+ : Base(d, trained) { \
240+ assert (d % 8 == 0 ); \
241+ } \
242+ \
243+ FAISS_ALWAYS_INLINE simd8float32 \
244+ reconstruct_8_components (const uint8_t * code, int i) const { \
245+ const __m256i indices = (INDEX_EXPR); \
246+ return simd8float32 (_mm256_i32gather_ps ( \
247+ this ->centroids , indices, sizeof (float ))); \
248+ } \
249+ }
250+
251+ DEFINE_TQMSE_AVX2_SPECIALIZATION (1 , unpack_8x1bit_to_u32(code, i));
252+ DEFINE_TQMSE_AVX2_SPECIALIZATION (2 , unpack_8x2bit_to_u32(code, i));
253+ DEFINE_TQMSE_AVX2_SPECIALIZATION (3 , unpack_8x3bit_to_u32(code, i));
254+ DEFINE_TQMSE_AVX2_SPECIALIZATION (4 , unpack_8x4bit_to_u32(code, i));
255+
256+ #undef DEFINE_TQMSE_AVX2_SPECIALIZATION
257+
258+ template <>
259+ struct QuantizerTurboQuantMSE <8 , SIMDLevel::AVX2>
260+ : QuantizerTurboQuantMSE<8 , SIMDLevel::NONE> {
261+ using Base = QuantizerTurboQuantMSE<8 , SIMDLevel::NONE>;
262+
263+ QuantizerTurboQuantMSE (size_t d, const std::vector<float >& trained)
264+ : Base(d, trained) {
265+ assert (d % 8 == 0 );
266+ }
267+
268+ FAISS_ALWAYS_INLINE simd8float32
269+ reconstruct_8_components (const uint8_t * code, int i) const {
270+ const __m128i packed = _mm_loadl_epi64 (
271+ (const __m128i*)(code + static_cast <size_t >(i)));
272+ const __m256i indices = _mm256_cvtepu8_epi32 (packed);
273+ return simd8float32 (
274+ _mm256_i32gather_ps (this ->centroids , indices, sizeof (float )));
275+ }
276+ };
277+
171278/* *********************************************************
172279 * FP16 Quantizer
173280 **********************************************************/
0 commit comments