Skip to content

Commit 95daa58

Browse files
committed
more review suggestions
1 parent 5d52dc4 commit 95daa58

6 files changed

Lines changed: 104 additions & 63 deletions

File tree

cpp/include/cuvs/neighbors/cagra.hpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3266,16 +3266,15 @@ std::tuple<size_t, size_t, size_t, size_t> optimize_workspace_size(size_t n_rows
32663266
*
32673267
* @param[in] res raft resource
32683268
* @param[in] dataset shape of the dataset
3269-
* @param[in] dtype_size size of dataset datatype in bytes
3270-
* @param[in] input_is_float whether the dataset element type is `float`
3269+
* @param[in] dtype element type of the dataset
3270+
* (e.g. `CUDA_R_32F`, `CUDA_R_16F`, `CUDA_R_8I`, `CUDA_R_8U`)
32713271
* @param[in] cparams CAGRA index building parameters
32723272
*
32733273
* @return pair of [host_size, device_size] memory sizes in bytes
32743274
*/
32753275
std::pair<size_t, size_t> cagra_build_mem_usage(raft::resources const& res,
32763276
raft::matrix_extent<int64_t> dataset,
3277-
size_t dtype_size,
3278-
bool input_is_float,
3277+
cudaDataType_t dtype,
32793278
cuvs::neighbors::cagra::index_params cparams);
32803279

32813280
/**

cpp/src/neighbors/detail/cagra/cagra_build.cuh

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -809,17 +809,19 @@ struct ace_memory_requirements {
809809
size_t available_gpu_memory;
810810
};
811811

812-
// TODO: Adjust overhead factor if needed. Very conservative for now.
812+
// Amount of host memory that can be used for the build
813813
constexpr double usable_cpu_memory_fraction = 0.8;
814-
constexpr double usable_gpu_memory_fraction = 1.0;
815-
constexpr double imbalance_factor = 3.0;
816-
constexpr double vector_expansion_factor = 2.0;
817-
constexpr size_t extra_cpu_workspace_size = 0;
818-
constexpr size_t extra_gpu_workspace_size = 2e9;
814+
815+
// Factor to account for imbalances in the partitions (maximum allowed is 3x the average)
816+
constexpr double imbalance_factor = 3.0;
817+
818+
// Current partitioning adds each vector into 2 partitions (core and augmented)
819+
constexpr double vector_expansion_factor = 2.0;
819820

820821
// Check if disk mode should be used for ACE based on memory constraints
821822
template <typename T, typename IdxT>
822-
bool ace_check_use_disk_mode(bool use_disk,
823+
bool ace_check_use_disk_mode(raft::resources const& res,
824+
bool use_disk,
823825
std::string& build_dir,
824826
size_t dataset_size,
825827
size_t dataset_dim,
@@ -851,7 +853,8 @@ bool ace_check_use_disk_mode(bool use_disk,
851853
mem.available_host_memory = cuvs::util::get_free_host_memory();
852854
}
853855
size_t sub_partition_size =
854-
static_cast<size_t>(imbalance_factor * vector_expansion_factor * (dataset_size / n_partitions));
856+
static_cast<size_t>(imbalance_factor * vector_expansion_factor *
857+
raft::div_rounding_up_safe(dataset_size, n_partitions));
855858
auto [opt_host_ws_total, opt_dev_ws_total, opt_host_ws_fixed, opt_dev_ws_fixed] =
856859
helpers::optimize_workspace_size(
857860
sub_partition_size, graph_degree, intermediate_degree, sizeof(IdxT), guarantee_connectivity);
@@ -872,8 +875,7 @@ bool ace_check_use_disk_mode(bool use_disk,
872875
mem.sub_graph_size = sub_partition_size * (intermediate_degree + graph_degree) * sizeof(IdxT);
873876
mem.cagra_graph_size = dataset_size * graph_degree * sizeof(IdxT);
874877
mem.total_size = mem.partition_labels_size + mem.id_mapping_size + mem.sub_dataset_size +
875-
mem.sub_graph_size + mem.cagra_graph_size + opt_host_ws_total +
876-
extra_cpu_workspace_size;
878+
mem.sub_graph_size + mem.cagra_graph_size + opt_host_ws_total;
877879

878880
RAFT_LOG_INFO("ACE: Estimated host memory required: %.2f GiB, available: %.2f GiB",
879881
to_gib(mem.total_size),
@@ -907,11 +909,11 @@ bool ace_check_use_disk_mode(bool use_disk,
907909
// * IVF-PQ on partition (sub_dataset_size, uncompressed upper bound)
908910
// * optimize workspace (opt_dev_ws_total)
909911
// + some extra workspace (IVF-PQ search, ...)
912+
size_t extra_gpu_workspace_size = raft::resource::get_workspace_total_bytes(res);
910913
size_t gpu_memory_required =
911914
std::max(mem.sub_dataset_size, opt_dev_ws_total) + extra_gpu_workspace_size;
912915

913-
bool gpu_memory_limited = static_cast<size_t>(usable_gpu_memory_fraction *
914-
mem.available_gpu_memory) < gpu_memory_required;
916+
bool gpu_memory_limited = mem.available_gpu_memory < gpu_memory_required;
915917

916918
RAFT_LOG_INFO("ACE: Estimated GPU memory required: %.2f GiB, available: %.2f GiB",
917919
to_gib(gpu_memory_required),
@@ -958,7 +960,8 @@ bool ace_check_use_disk_mode(bool use_disk,
958960

959961
// Validate and adjust partitions for disk mode memory requirements
960962
template <typename T, typename IdxT>
961-
void ace_validate_disk_mode_partitions(size_t& n_partitions,
963+
void ace_validate_disk_mode_partitions(raft::resources const& res,
964+
size_t& n_partitions,
962965
size_t dataset_size,
963966
size_t dataset_dim,
964967
size_t intermediate_degree,
@@ -983,7 +986,8 @@ void ace_validate_disk_mode_partitions(size_t& n_partitions,
983986

984987
// Compute optimize workspace requirements
985988
size_t sub_partition_size =
986-
static_cast<size_t>(imbalance_factor * vector_expansion_factor * (dataset_size / n_partitions));
989+
static_cast<size_t>(imbalance_factor * vector_expansion_factor *
990+
raft::div_rounding_up_safe(dataset_size, n_partitions));
987991
auto [host_workspace_size_total,
988992
gpu_workspace_size_total,
989993
host_workspace_size_fixed,
@@ -994,7 +998,7 @@ void ace_validate_disk_mode_partitions(size_t& n_partitions,
994998
// Check host memory requirements
995999
size_t disk_mode_host_required = mem.partition_labels_size + mem.id_mapping_size +
9961000
mem.sub_dataset_size + mem.sub_graph_size +
997-
host_workspace_size_total + extra_cpu_workspace_size;
1001+
host_workspace_size_total;
9981002

9991003
if (static_cast<size_t>(usable_cpu_memory_fraction * mem.available_host_memory) <
10001004
disk_mode_host_required) {
@@ -1006,11 +1010,11 @@ void ace_validate_disk_mode_partitions(size_t& n_partitions,
10061010
to_gib(mem.available_host_memory),
10071011
to_gib(mem.sub_dataset_size),
10081012
to_gib(mem.sub_graph_size),
1009-
to_gib(host_workspace_size_total + extra_cpu_workspace_size));
1013+
to_gib(host_workspace_size_total));
10101014

10111015
// Calculate suggested number of partitions for host memory
1012-
size_t disk_mode_host_static = mem.partition_labels_size + mem.id_mapping_size +
1013-
host_workspace_size_fixed + extra_cpu_workspace_size;
1016+
size_t disk_mode_host_static =
1017+
mem.partition_labels_size + mem.id_mapping_size + host_workspace_size_fixed;
10141018
size_t disk_mode_host_dynamic = disk_mode_host_required - disk_mode_host_static;
10151019
double available_for_scaling =
10161020
usable_cpu_memory_fraction * mem.available_host_memory - disk_mode_host_static;
@@ -1032,11 +1036,11 @@ void ace_validate_disk_mode_partitions(size_t& n_partitions,
10321036
// * IVF-PQ on partition (mem.sub_dataset_size) (compressed?)
10331037
// * optimize workspace (gpu_workspace_size_total)
10341038
// + some extra workspace (IVF-PQ search, ...)
1039+
size_t extra_gpu_workspace_size = raft::resource::get_workspace_total_bytes(res);
10351040
size_t disk_mode_gpu_required =
10361041
std::max(mem.sub_dataset_size, gpu_workspace_size_total) + extra_gpu_workspace_size;
10371042

1038-
if (static_cast<size_t>(usable_gpu_memory_fraction * mem.available_gpu_memory) <
1039-
disk_mode_gpu_required) {
1043+
if (mem.available_gpu_memory < disk_mode_gpu_required) {
10401044
gpu_memory_insufficient = true;
10411045
RAFT_LOG_WARN(
10421046
"ACE: GPU memory insufficient for per-partition processing. Required: %.2f GiB, "
@@ -1048,14 +1052,13 @@ void ace_validate_disk_mode_partitions(size_t& n_partitions,
10481052

10491053
size_t disk_mode_gpu_static = gpu_workspace_size_fixed + extra_gpu_workspace_size;
10501054
size_t disk_mode_gpu_dynamic = disk_mode_gpu_required - disk_mode_gpu_static;
1051-
double available_for_scaling =
1052-
usable_gpu_memory_fraction * mem.available_gpu_memory - disk_mode_gpu_static;
1055+
double available_for_scaling = mem.available_gpu_memory - disk_mode_gpu_static;
10531056

10541057
RAFT_EXPECTS(available_for_scaling > 0,
10551058
"ACE: GPU memory insufficient even for constant overhead. Required: %.2f GiB, "
10561059
"available: %.2f GiB",
10571060
to_gib(disk_mode_gpu_static),
1058-
to_gib(usable_gpu_memory_fraction * mem.available_gpu_memory));
1061+
to_gib(mem.available_gpu_memory));
10591062

10601063
gpu_suggested_partitions =
10611064
static_cast<size_t>(std::ceil(disk_mode_gpu_dynamic * n_partitions / available_for_scaling));
@@ -1080,8 +1083,9 @@ void ace_validate_disk_mode_partitions(size_t& n_partitions,
10801083

10811084
n_partitions = new_n_partitions;
10821085

1083-
size_t new_sub_partition_size = static_cast<size_t>(imbalance_factor * vector_expansion_factor *
1084-
(dataset_size / n_partitions));
1086+
size_t new_sub_partition_size =
1087+
static_cast<size_t>(imbalance_factor * vector_expansion_factor *
1088+
raft::div_rounding_up_safe(dataset_size, n_partitions));
10851089
auto [new_opt_host_ws, new_opt_dev_ws, new_opt_host_ws_fixed, new_opt_dev_ws_fixed] =
10861090
helpers::optimize_workspace_size(new_sub_partition_size,
10871091
graph_degree,
@@ -1093,15 +1097,14 @@ void ace_validate_disk_mode_partitions(size_t& n_partitions,
10931097
mem.sub_graph_size =
10941098
new_sub_partition_size * (intermediate_degree + graph_degree) * sizeof(IdxT);
10951099
mem.total_size = mem.partition_labels_size + mem.id_mapping_size + mem.sub_dataset_size +
1096-
mem.sub_graph_size + mem.cagra_graph_size + new_opt_host_ws +
1097-
extra_cpu_workspace_size;
1100+
mem.sub_graph_size + mem.cagra_graph_size + new_opt_host_ws;
10981101

10991102
RAFT_LOG_INFO(
11001103
"ACE: Updated per-partition memory estimates: dataset %.2f GiB, graph %.2f GiB, "
11011104
"host workspace %.2f GiB, GPU workspace %.2f GiB",
11021105
to_gib(mem.sub_dataset_size),
11031106
to_gib(mem.sub_graph_size),
1104-
to_gib(new_opt_host_ws + extra_cpu_workspace_size),
1107+
to_gib(new_opt_host_ws),
11051108
to_gib(new_opt_dev_ws + extra_gpu_workspace_size));
11061109
}
11071110
}
@@ -1184,7 +1187,8 @@ index<T, IdxT> build_ace(raft::resources const& res,
11841187

11851188
// Check if disk mode should be used based on memory constraints
11861189
ace_memory_requirements mem;
1187-
bool use_disk_mode = ace_check_use_disk_mode<T, IdxT>(use_disk,
1190+
bool use_disk_mode = ace_check_use_disk_mode<T, IdxT>(res,
1191+
use_disk,
11881192
build_dir,
11891193
dataset_size,
11901194
dataset_dim,
@@ -1198,7 +1202,8 @@ index<T, IdxT> build_ace(raft::resources const& res,
11981202

11991203
// Validate and adjust partitions if disk mode is enabled
12001204
if (use_disk_mode) {
1201-
ace_validate_disk_mode_partitions<T, IdxT>(n_partitions,
1205+
ace_validate_disk_mode_partitions<T, IdxT>(res,
1206+
n_partitions,
12021207
dataset_size,
12031208
dataset_dim,
12041209
intermediate_degree,

cpp/src/neighbors/detail/cagra/cagra_helpers.cpp

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,22 @@
1111
#include <utility>
1212

1313
namespace cuvs::neighbors::cagra::helpers {
14+
namespace {
15+
// Size in bytes of a single element of the given CUDA data type.
16+
size_t cuda_data_type_size(cudaDataType_t dtype)
17+
{
18+
switch (dtype) {
19+
case CUDA_R_32F: return 4;
20+
case CUDA_R_16F: return 2;
21+
case CUDA_R_8I:
22+
case CUDA_R_8U: return 1;
23+
default:
24+
RAFT_FAIL("cagra_build_mem_usage: unsupported dataset element type %d",
25+
static_cast<int>(dtype));
26+
}
27+
}
28+
} // namespace
29+
1430
// Calculate CAGRA optimize workspace memory requirements.
1531
// This is the working memory on top of the input/output memory usage.
1632
std::tuple<size_t, size_t, size_t, size_t> optimize_workspace_size(size_t n_rows,
@@ -30,7 +46,8 @@ std::tuple<size_t, size_t, size_t, size_t> optimize_workspace_size(size_t n_rows
3046
mst_host = n_rows * index_size; // mst_graph_num_edges
3147
mst_host += n_rows * graph_degree * index_size; // mst_graph allocated in optimize
3248
mst_host += n_rows * graph_degree * index_size; // mst_graph allocated in mst_optimize
33-
mst_host += n_rows * index_size * 7; // Five vectors _edges suffix, and label, cluster_size vectors.
49+
mst_host +=
50+
n_rows * index_size * 7; // Five vectors _edges suffix, and label, cluster_size vectors.
3451
mst_host_fixed += (graph_degree - 1) * (graph_degree - 1) * index_size; // iB_candidates
3552
mst_host += mst_host_fixed;
3653
}
@@ -77,13 +94,15 @@ std::tuple<size_t, size_t, size_t, size_t> optimize_workspace_size(size_t n_rows
7794
inline std::pair<size_t, size_t> ivf_pq_build_mem_usage(
7895
raft::resources const& res,
7996
raft::matrix_extent<int64_t> dataset,
80-
size_t dtype_size,
81-
bool input_is_float,
97+
cudaDataType_t dtype,
8298
cuvs::neighbors::graph_build_params::ivf_pq_params params,
8399
size_t graph_degree,
84100
size_t intermediate_graph_degree,
85101
bool guarantee_connectivity)
86102
{
103+
size_t dtype_size = cuda_data_type_size(dtype);
104+
bool input_is_float = (dtype == CUDA_R_32F);
105+
87106
size_t n_rows = dataset.extent(0);
88107
size_t dim = dataset.extent(1);
89108

@@ -170,8 +189,7 @@ inline std::pair<size_t, size_t> nn_descent_build_mem_usage(raft::resources cons
170189

171190
std::pair<size_t, size_t> cagra_build_mem_usage(raft::resources const& res,
172191
raft::matrix_extent<int64_t> dataset,
173-
size_t dtype_size,
174-
bool input_is_float,
192+
cudaDataType_t dtype,
175193
cuvs::neighbors::cagra::index_params cparams)
176194
{
177195
using namespace cuvs::neighbors;
@@ -185,8 +203,7 @@ std::pair<size_t, size_t> cagra_build_mem_usage(raft::resources const& res,
185203
std::get<graph_build_params::ivf_pq_params>(cparams.graph_build_params);
186204
std::tie(total_host, total_dev) = ivf_pq_build_mem_usage(res,
187205
dataset,
188-
dtype_size,
189-
input_is_float,
206+
dtype,
190207
pq_params,
191208
cparams.graph_degree,
192209
cparams.intermediate_graph_degree,
@@ -202,13 +219,15 @@ std::pair<size_t, size_t> cagra_build_mem_usage(raft::resources const& res,
202219
} else {
203220
// iterative build
204221
// TODO(tfeher): proper estimate
205-
total_host = dataset.extent(0) * dataset.extent(1) * dtype_size +
222+
total_host = dataset.extent(0) * dataset.extent(1) * cuda_data_type_size(dtype) +
206223
dataset.extent(0) * (cparams.graph_degree + cparams.intermediate_graph_degree) *
207224
sizeof(uint32_t);
208225
total_dev = total_host;
209226
}
227+
228+
size_t extra_gpu_workspace_size = raft::resource::get_workspace_total_bytes(res);
210229
return std::make_pair(total_host + static_cast<size_t>(1e9),
211-
total_dev + static_cast<size_t>(1e9));
230+
total_dev + extra_gpu_workspace_size);
212231
}
213232

214233
} // namespace cuvs::neighbors::cagra::helpers

cpp/src/neighbors/detail/cagra/graph_core.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,7 @@ void merge_graph_gpu(
846846

847847
uint32_t batch_size =
848848
static_cast<uint32_t>(std::min<uint64_t>(graph_size, helpers::kOptimizeBatchSize));
849-
const uint32_t num_batch = (graph_size + batch_size - 1) / batch_size;
849+
const uint32_t num_batch = raft::div_rounding_up_safe<uint64_t>(graph_size, batch_size);
850850

851851
namespace bli = cuvs::spatial::knn::detail::utils;
852852
auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
@@ -1607,7 +1607,7 @@ void prune_graph_gpu(
16071607

16081608
uint32_t batch_size =
16091609
static_cast<uint32_t>(std::min<uint64_t>(graph_size, helpers::kOptimizeBatchSize));
1610-
const uint32_t num_batch = (graph_size + batch_size - 1) / batch_size;
1610+
const uint32_t num_batch = raft::div_rounding_up_safe<uint64_t>(graph_size, batch_size);
16111611

16121612
RAFT_LOG_DEBUG("# Pruning kNN Graph on GPUs\r");
16131613

0 commit comments

Comments
 (0)