-
Notifications
You must be signed in to change notification settings - Fork 98
/
Copy pathivf_pq.h
442 lines (420 loc) · 15 KB
/
ivf_pq.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cuvs/core/c_api.h>
#include <cuvs/distance/distance.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @defgroup ivf_pq_c_index_params IVF-PQ index build parameters
* @{
*/
/**
* @brief A type for specifying how PQ codebooks are created
*
*/
enum codebook_gen { // NOLINT
PER_SUBSPACE = 0, // NOLINT
PER_CLUSTER = 1, // NOLINT
};
/**
* @brief Supplemental parameters to build IVF-PQ Index
*
*/
struct cuvsIvfPqIndexParams {
/** Distance type. */
cuvsDistanceType metric;
/** The argument used by some distance metrics. */
float metric_arg;
/**
* Whether to add the dataset content to the index, i.e.:
*
* - `true` means the index is filled with the dataset vectors and ready to search after calling
* `build`.
* - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but
* the index is left empty; you'd need to call `extend` on the index afterwards to populate it.
*/
bool add_data_on_build;
/**
* The number of inverted lists (clusters)
*
* Hint: the number of vectors per cluster (`n_rows/n_lists`) should be approximately 1,000 to
* 10,000.
*/
uint32_t n_lists;
/** The number of iterations searching for kmeans centers (index building). */
uint32_t kmeans_n_iters;
/** The fraction of data to use during iterative kmeans building. */
double kmeans_trainset_fraction;
/**
* The bit length of the vector element after compression by PQ.
*
* Possible values: [4, 5, 6, 7, 8].
*
* Hint: the smaller the 'pq_bits', the smaller the index size and the better the search
* performance, but the lower the recall.
*/
uint32_t pq_bits;
/**
* The dimensionality of the vector after compression by PQ. When zero, an optimal value is
* selected using a heuristic.
*
* NB: `pq_dim * pq_bits` must be a multiple of 8.
*
* Hint: a smaller 'pq_dim' results in a smaller index size and better search performance, but
* lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number, but multiple of 8 are
* desirable for good performance. If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8.
* For good performance, it is desirable that 'pq_dim' is a multiple of 32. Ideally, 'pq_dim'
* should be also a divisor of the dataset dim.
*/
uint32_t pq_dim;
/** How PQ codebooks are created. */
enum codebook_gen codebook_kind;
/**
* Apply a random rotation matrix on the input data and queries even if `dim % pq_dim == 0`.
*
* Note: if `dim` is not multiple of `pq_dim`, a random rotation is always applied to the input
* data and queries to transform the working space from `dim` to `rot_dim`, which may be slightly
* larger than the original space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
* However, this transform is not necessary when `dim` is multiple of `pq_dim`
* (`dim == rot_dim`, hence no need in adding "extra" data columns / features).
*
* By default, if `dim == rot_dim`, the rotation transform is initialized with the identity
* matrix. When `force_random_rotation == true`, a random orthogonal transform matrix is generated
* regardless of the values of `dim` and `pq_dim`.
*/
bool force_random_rotation;
/**
* By default, the algorithm allocates more space than necessary for individual clusters
* (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of
* data copies during repeated calls to `extend` (extending the database).
*
* The alternative is the conservative allocation behavior; when enabled, the algorithm always
* allocates the minimum amount of memory required to store the given number of records. Set this
* flag to `true` if you prefer to use as little GPU memory for the database as possible.
*/
bool conservative_memory_allocation;
/**
* The max number of data points to use per PQ code during PQ codebook training. Using more data
* points per PQ code may increase the quality of PQ codebook but may also increase the build
* time. The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and
* PER_CLUSTER. In both cases, we will use `pq_book_size * max_train_points_per_pq_code` training
* points to train each codebook.
*/
uint32_t max_train_points_per_pq_code;
};
typedef struct cuvsIvfPqIndexParams* cuvsIvfPqIndexParams_t;
/**
* @brief Allocate IVF-PQ Index params, and populate with default values
*
* @param[in] index_params cuvsIvfPqIndexParams_t to allocate
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqIndexParamsCreate(cuvsIvfPqIndexParams_t* index_params);
/**
* @brief De-allocate IVF-PQ Index params
*
* @param[in] index_params
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqIndexParamsDestroy(cuvsIvfPqIndexParams_t index_params);
/**
* @}
*/
/**
* @defgroup ivf_pq_c_search_params IVF-PQ index search parameters
* @{
*/
/**
* @brief Supplemental parameters to search IVF-PQ index
*
*/
struct cuvsIvfPqSearchParams {
/** The number of clusters to search. */
uint32_t n_probes;
/**
* Data type of look up table to be created dynamically at search time.
*
* Possible values: [CUDA_R_32F, CUDA_R_16F, CUDA_R_8U]
*
* The use of low-precision types reduces the amount of shared memory required at search time, so
* fast shared memory kernels can be used even for datasets with large dimansionality. Note that
* the recall is slightly degraded when low-precision type is selected.
*/
cudaDataType_t lut_dtype;
/**
* Storage data type for distance/similarity computed at search time.
*
* Possible values: [CUDA_R_16F, CUDA_R_32F]
*
* If the performance limiter at search time is device memory access, selecting FP16 will improve
* performance slightly.
*/
cudaDataType_t internal_distance_dtype;
/**
* The data type to use as the GEMM element type when searching the clusters to probe.
*
* Possible values: [CUDA_R_8I, CUDA_R_16F, CUDA_R_32F].
*
* - Legacy default: CUDA_R_32F (float)
* - Recommended for performance: CUDA_R_16F (half)
* - Experimental/low-precision: CUDA_R_8I (int8_t)
* (WARNING: int8_t variant degrades recall unless data is normalized and low-dimensional)
*/
cudaDataType_t coarse_search_dtype;
/**
* Set the internal batch size to improve GPU utilization at the cost of larger memory footprint.
*/
uint32_t max_internal_batch_size;
/**
* Preferred fraction of SM's unified memory / L1 cache to be used as shared memory.
*
* Possible values: [0.0 - 1.0] as a fraction of the `sharedMemPerMultiprocessor`.
*
* One wants to increase the carveout to make sure a good GPU occupancy for the main search
* kernel, but not to keep it too high to leave some memory to be used as L1 cache. Note, this
* value is interpreted only as a hint. Moreover, a GPU usually allows only a fixed set of cache
* configurations, so the provided value is rounded up to the nearest configuration. Refer to the
* NVIDIA tuning guide for the target GPU architecture.
*
* Note, this is a low-level tuning parameter that can have drastic negative effects on the search
* performance if tweaked incorrectly.
*/
double preferred_shmem_carveout;
};
typedef struct cuvsIvfPqSearchParams* cuvsIvfPqSearchParams_t;
/**
* @brief Allocate IVF-PQ search params, and populate with default values
*
* @param[in] params cuvsIvfPqSearchParams_t to allocate
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqSearchParamsCreate(cuvsIvfPqSearchParams_t* params);
/**
* @brief De-allocate IVF-PQ search params
*
* @param[in] params
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqSearchParamsDestroy(cuvsIvfPqSearchParams_t params);
/**
* @}
*/
/**
* @defgroup ivf_pq_c_index IVF-PQ index
* @{
*/
/**
* @brief Struct to hold address of cuvs::neighbors::ivf_pq::index and its active trained dtype
*
*/
typedef struct {
uintptr_t addr;
DLDataType dtype;
} cuvsIvfPqIndex;
typedef cuvsIvfPqIndex* cuvsIvfPqIndex_t;
/**
* @brief Allocate IVF-PQ index
*
* @param[in] index cuvsIvfPqIndex_t to allocate
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqIndexCreate(cuvsIvfPqIndex_t* index);
/**
* @brief De-allocate IVF-PQ index
*
* @param[in] index cuvsIvfPqIndex_t to de-allocate
*/
cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
/**
* @}
*/
/**
* @defgroup ivf_pq_c_index_build IVF-PQ index build
* @{
*/
/**
* @brief Build a IVF-PQ index with a `DLManagedTensor` which has underlying
* `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
* or `kDLCPU`. Also, acceptable underlying types are:
* 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
* 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
* 3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
* 4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
*
* @code {.c}
* #include <cuvs/core/c_api.h>
* #include <cuvs/neighbors/ivf_pq.h>
*
* // Create cuvsResources_t
* cuvsResources_t res;
* cuvsError_t res_create_status = cuvsResourcesCreate(&res);
*
* // Assume a populated `DLManagedTensor` type here
* DLManagedTensor dataset;
*
* // Create default index params
* cuvsIvfPqIndexParams_t index_params;
* cuvsError_t params_create_status = cuvsIvfPqIndexParamsCreate(&index_params);
*
* // Create IVF-PQ index
* cuvsIvfPqIndex_t index;
* cuvsError_t index_create_status = cuvsIvfPqIndexCreate(&index);
*
* // Build the IVF-PQ Index
* cuvsError_t build_status = cuvsIvfPqBuild(res, index_params, &dataset, index);
*
* // de-allocate `index_params`, `index` and `res`
* cuvsError_t params_destroy_status = cuvsIvfPqIndexParamsDestroy(index_params);
* cuvsError_t index_destroy_status = cuvsIvfPqIndexDestroy(index);
* cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
* @endcode
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] params cuvsIvfPqIndexParams_t used to build IVF-PQ index
* @param[in] dataset DLManagedTensor* training dataset
* @param[out] index cuvsIvfPqIndex_t Newly built IVF-PQ index
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
cuvsIvfPqIndexParams_t params,
DLManagedTensor* dataset,
cuvsIvfPqIndex_t index);
/**
* @}
*/
/**
* @defgroup ivf_pq_c_index_search IVF-PQ index search
* @{
*/
/**
* @brief Search a IVF-PQ index with a `DLManagedTensor` which has underlying
* `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`.
* It is also important to note that the IVF-PQ Index must have been built
* with the same type of `queries`, such that `index.dtype.code ==
* queries.dl_tensor.dtype.code` Types for input are:
* 1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
* or `kDLDataType.bits = 16`
* 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
* 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
*
* @code {.c}
* #include <cuvs/core/c_api.h>
* #include <cuvs/neighbors/ivf_pq.h>
*
* // Create cuvsResources_t
* cuvsResources_t res;
* cuvsError_t res_create_status = cuvsResourcesCreate(&res);
*
* // Assume a populated `DLManagedTensor` type here
* DLManagedTensor dataset;
* DLManagedTensor queries;
* DLManagedTensor neighbors;
*
* // Create default search params
* cuvsIvfPqSearchParams_t search_params;
* cuvsError_t params_create_status = cuvsIvfPqSearchParamsCreate(&search_params);
*
* // Search the `index` built using `cuvsIvfPqBuild`
* cuvsError_t search_status = cuvsIvfPqSearch(res, search_params, index, &queries, &neighbors,
* &distances);
*
* // de-allocate `search_params` and `res`
* cuvsError_t params_destroy_status = cuvsIvfPqSearchParamsDestroy(search_params);
* cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
* @endcode
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] search_params cuvsIvfPqSearchParams_t used to search IVF-PQ index
* @param[in] index cuvsIvfPqIndex which has been returned by `cuvsIvfPqBuild`
* @param[in] queries DLManagedTensor* queries dataset to search
* @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
* @param[out] distances DLManagedTensor* output `k` distances for queries
*/
cuvsError_t cuvsIvfPqSearch(cuvsResources_t res,
cuvsIvfPqSearchParams_t search_params,
cuvsIvfPqIndex_t index,
DLManagedTensor* queries,
DLManagedTensor* neighbors,
DLManagedTensor* distances);
/**
* @}
*/
/**
* @defgroup ivf_pq_c_serialize IVF-PQ C-API serialize functions
* @{
*/
/**
* Save the index to file.
*
* Experimental, both the API and the serialization format are subject to change.
*
* @code{.cpp}
* #include <cuvs/neighbors/ivf_pq.h>
*
* // Create cuvsResources_t
* cuvsResources_t res;
* cuvsError_t res_create_status = cuvsResourcesCreate(&res);
*
* // create an index with `cuvsIvfPqBuild`
* cuvsIvfPqSerialize(res, "/path/to/index", index, true);
* @endcode
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] filename the file name for saving the index
* @param[in] index IVF-PQ index
*/
cuvsError_t cuvsIvfPqSerialize(cuvsResources_t res, const char* filename, cuvsIvfPqIndex_t index);
/**
* Load index from file.
*
* Experimental, both the API and the serialization format are subject to change.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] filename the name of the file that stores the index
* @param[out] index IVF-PQ index loaded disk
*/
cuvsError_t cuvsIvfPqDeserialize(cuvsResources_t res, const char* filename, cuvsIvfPqIndex_t index);
/**
* @}
*/
/**
* @defgroup ivf_pq_c_index_extend IVF-PQ index extend
* @{
*/
/**
* @brief Extend the index with the new data.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] new_vectors DLManagedTensor* the new vectors to add to the index
* @param[in] new_indices DLManagedTensor* vector of new indices for the new vectors
* @param[inout] index IVF-PQ index to be extended
* @return cuvsError_t
*/
cuvsError_t cuvsIvfPqExtend(cuvsResources_t res,
DLManagedTensor* new_vectors,
DLManagedTensor* new_indices,
cuvsIvfPqIndex_t index);
/**
* @}
*/
#ifdef __cplusplus
}
#endif