Skip to content

Commit a2dfd91

Browse files
algoriddlemeta-codesync[bot]
authored andcommitted
Delete old pq4_accumulate_loop files (replaced by dispatching.h) (facebookresearch#4905)
Summary: Pull Request resolved: facebookresearch#4905 Now that all search paths use FastScanCodeScanner (via dispatching.h), the old pq4_accumulate_loop and pq4_accumulate_loop_qbs free functions have no callers. Delete pq4_fast_scan_search_1.cpp and pq4_fast_scan_search_qbs.cpp entirely. Relocate the remaining utility functions (pq4_qbs_to_nq, pq4_preferred_qbs, accumulate_to_mem) to pq4_fast_scan.cpp. Remove dead declarations from pq4_fast_scan.h. This completes the code deduplication: dispatching.h uses shared helpers from accumulate_loops.h and decompose_qbs.h, and no duplicate implementations remain. Reviewed By: mdouze Differential Revision: D96131176
1 parent 2cecedd commit a2dfd91

5 files changed

Lines changed: 49 additions & 224 deletions

File tree

faiss/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,6 @@ set(FAISS_SRC
121121
impl/lattice_Zn.cpp
122122
impl/mapped_io.cpp
123123
impl/fast_scan/fast_scan.cpp
124-
impl/fast_scan/pq4_fast_scan_search_1.cpp
125-
impl/fast_scan/pq4_fast_scan_search_qbs.cpp
126124
impl/residual_quantizer_encode_steps.cpp
127125
impl/zerocopy_io.cpp
128126
impl/NNDescent.cpp

faiss/impl/fast_scan/fast_scan.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
*/
77

88
#include <faiss/impl/FaissAssert.h>
9+
#include <faiss/impl/fast_scan/LookupTableScaler.h>
10+
#include <faiss/impl/fast_scan/decompose_qbs.h>
911
#include <faiss/impl/fast_scan/fast_scan.h>
1012
#include <faiss/impl/fast_scan/simd_result_handlers.h>
1113
#include <faiss/impl/simd_dispatch.h>
@@ -351,6 +353,52 @@ int pq4_pack_LUT_qbs_q_map(
351353
return i0;
352354
}
353355

356+
// declared in simd_result_handlers.h
357+
bool simd_result_handlers_accept_virtual = true;
358+
359+
int pq4_qbs_to_nq(int qbs) {
360+
int i0 = 0;
361+
int qi = qbs;
362+
while (qi) {
363+
int nq = qi & 15;
364+
qi >>= 4;
365+
i0 += nq;
366+
}
367+
return i0;
368+
}
369+
370+
int pq4_preferred_qbs(int n) {
371+
// from timings in P141901742, P141902828
372+
static int map[12] = {
373+
0, 1, 2, 3, 0x13, 0x23, 0x33, 0x223, 0x233, 0x333, 0x2233, 0x2333};
374+
if (n <= 11) {
375+
return map[n];
376+
} else if (n <= 24) {
377+
// override qbs: all first stages with 3 steps
378+
// then 1 stage with the rest
379+
int nbit = 4 * (n / 3); // nbits with only 3s
380+
int qbs = 0x33333333 & ((1 << nbit) - 1);
381+
qbs |= (n % 3) << nbit;
382+
return qbs;
383+
} else {
384+
FAISS_THROW_FMT("number of queries %d too large", n);
385+
}
386+
}
387+
388+
void accumulate_to_mem(
389+
int nq,
390+
size_t ntotal2,
391+
int nsq,
392+
const uint8_t* codes,
393+
const uint8_t* LUT,
394+
uint16_t* accu) {
395+
using namespace simd_result_handlers;
396+
FAISS_THROW_IF_NOT(ntotal2 % 32 == 0);
397+
StoreResultHandler<> handler(accu, ntotal2);
398+
DummyScaler<> scaler;
399+
accumulate(nq, ntotal2, nsq, codes, LUT, handler, scaler, 32 * nsq / 2);
400+
}
401+
354402
} // namespace faiss
355403

356404
/***************************************************************

faiss/impl/fast_scan/fast_scan.h

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ namespace faiss {
2929
struct IDSelector;
3030
struct RangeSearchResult;
3131
struct RangeSearchPartialResult;
32-
struct SIMDResultHandler;
3332
struct SIMDResultHandlerToFloat;
3433

3534
/** Pack codes for consumption by the SIMD kernels.
@@ -125,42 +124,6 @@ struct CodePackerPQ4 : CodePacker {
125124
*/
126125
void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
127126

128-
/** Loop over database elements and accumulate results into result handler
129-
*
130-
* @param nq number of queries
131-
* @param nb number of database elements
132-
* @param bbs size of database blocks (multiple of 32)
133-
* @param nsq number of sub-quantizers (multiple of 2)
134-
* @param codes packed codes array
135-
* @param LUT packed look-up table
136-
* @param scaler scaler to scale the encoded norm
137-
* @param block_stride stride in bytes between consecutive blocks.
138-
*/
139-
void pq4_accumulate_loop(
140-
int nq,
141-
size_t nb,
142-
int bbs,
143-
int nsq,
144-
const uint8_t* codes,
145-
const uint8_t* LUT,
146-
SIMDResultHandler& res,
147-
int pq2x4_scale,
148-
size_t block_stride);
149-
150-
/* qbs versions, supported only for bbs=32.
151-
*
152-
* The kernel function runs the kernel for *several* query blocks
153-
* and bbs database vectors. The sizes of the blocks are encoded in qbs as
154-
* base-16 digits.
155-
*
156-
* For example, qbs = 0x1223 means that the kernel will be run 4 times, the
157-
* first time with 3 query vectors, second time with 2 query vectors, then 2
158-
* vectors again and finally with 1 query vector. The output block will thus be
159-
* nq = 3 + 2 + 2 + 1 = 6 queries. For a given total block size, the optimal
160-
* decomposition into sub-blocks (measured empirically) is given by
161-
* preferred_qbs().
162-
*/
163-
164127
/* compute the number of queries from a base-16 decomposition */
165128
int pq4_qbs_to_nq(int qbs);
166129

@@ -187,28 +150,7 @@ int pq4_pack_LUT_qbs_q_map(
187150
const int* q_map,
188151
uint8_t* dest);
189152

190-
/** Run accumulation loop.
191-
*
192-
* @param qbs 4-bit encoded number of queries
193-
* @param nb number of database codes (multiple of bbs)
194-
* @param nsq number of sub-quantizers
195-
* @param codes encoded database vectors (packed)
196-
* @param LUT look-up table (packed)
197-
* @param res call-back for the results
198-
* @param pq2x4_scale scaler to scale the encoded norm
199-
* @param block_stride stride in bytes between consecutive blocks.
200-
*/
201-
void pq4_accumulate_loop_qbs(
202-
int qbs,
203-
size_t nb,
204-
int nsq,
205-
const uint8_t* codes,
206-
const uint8_t* LUT,
207-
SIMDResultHandler& res,
208-
int pq2x4_scale,
209-
size_t block_stride);
210-
211-
/** Wrapper of pq4_accumulate_loop_qbs using simple StoreResultHandler
153+
/** Wrapper using simple StoreResultHandler
212154
* and DummyScaler
213155
*
214156
* @param nq number of queries

faiss/impl/fast_scan/pq4_fast_scan_search_1.cpp

Lines changed: 0 additions & 71 deletions
This file was deleted.

faiss/impl/fast_scan/pq4_fast_scan_search_qbs.cpp

Lines changed: 0 additions & 92 deletions
This file was deleted.

0 commit comments

Comments
 (0)