Skip to content

Commit 1a35b05

Browse files
committed
Replace Thrust sequential algorithms with libcu++ ones
1 parent e7c9915 commit 1a35b05

File tree

55 files changed

+901
-1089
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+901
-1089
lines changed

cpp/examples/developers/vertex_and_edge_partition/vertex_and_edge_partition.cu

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@
2424
#include <raft/core/handle.hpp>
2525
#include <raft/random/rng_state.hpp>
2626

27-
#include <thrust/for_each.h>
27+
#include <cuda/std/__algorithm_>
2828

2929
#include <iostream>
3030
#include <string>
@@ -237,21 +237,20 @@ void look_into_vertex_and_edge_partitions(
237237
//
238238

239239
if (renumber_map) {
240-
thrust::for_each(thrust::host,
241-
thrust::make_zip_iterator(thrust::make_tuple(
242-
h_vertices_in_this_proces.begin(),
243-
thrust::make_counting_iterator(renumbered_vertex_id_of_local_first))),
244-
thrust::make_zip_iterator(thrust::make_tuple(
245-
h_vertices_in_this_proces.end(),
246-
thrust::make_counting_iterator(renumbered_vertex_id_of_local_last))),
247-
[comm_rank](auto old_and_new_id_pair) {
248-
auto old_id = thrust::get<0>(old_and_new_id_pair);
249-
auto new_id = thrust::get<1>(old_and_new_id_pair);
250-
printf("owner rank = %d, original vertex id %d is renumbered to %d\n",
251-
comm_rank,
252-
static_cast<int>(old_id),
253-
static_cast<int>(new_id));
254-
});
240+
cuda::std::for_each(thrust::make_zip_iterator(thrust::make_tuple(
241+
h_vertices_in_this_proces.begin(),
242+
thrust::make_counting_iterator(renumbered_vertex_id_of_local_first))),
243+
thrust::make_zip_iterator(thrust::make_tuple(
244+
h_vertices_in_this_proces.end(),
245+
thrust::make_counting_iterator(renumbered_vertex_id_of_local_last))),
246+
[comm_rank](auto old_and_new_id_pair) {
247+
auto old_id = thrust::get<0>(old_and_new_id_pair);
248+
auto new_id = thrust::get<1>(old_and_new_id_pair);
249+
printf("owner rank = %d, original vertex id %d is renumbered to %d\n",
250+
comm_rank,
251+
static_cast<int>(old_id),
252+
static_cast<int>(new_id));
253+
});
255254
}
256255

257256
//
@@ -307,8 +306,7 @@ void look_into_vertex_and_edge_partitions(
307306
auto v = major_range_first + i;
308307
auto deg_of_v_in_this_edge_partition = offsets[i + 1] - offsets[i];
309308

310-
thrust::for_each(
311-
thrust::seq,
309+
cuda::std::for_each(
312310
thrust::make_counting_iterator(edge_t{offsets[i]}),
313311
thrust::make_counting_iterator(edge_t{offsets[i + 1]}),
314312
[comm_rank, ep_idx, v, indices, is_weighted, weights] __device__(auto pos) {
@@ -353,8 +351,7 @@ void look_into_vertex_and_edge_partitions(
353351
auto major_idx = (major_hypersparse_first - major_range_first) + i;
354352
auto deg_of_v_in_this_edge_partition = offsets[major_idx + 1] - offsets[major_idx];
355353

356-
thrust::for_each(
357-
thrust::seq,
354+
cuda::std::for_each(
358355
thrust::make_counting_iterator(edge_t{offsets[major_idx]}),
359356
thrust::make_counting_iterator(edge_t{offsets[major_idx + 1]}),
360357
[comm_rank, ep_idx, v, indices, is_weighted, weights] __device__(auto pos) {

cpp/include/cugraph/detail/decompress_edge_partition.cuh

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424

2525
#include <rmm/device_uvector.hpp>
2626

27+
#include <cuda/std/__algorithm_>
2728
#include <thrust/copy.h>
2829
#include <thrust/execution_policy.h>
29-
#include <thrust/fill.h>
3030
#include <thrust/for_each.h>
3131
#include <thrust/iterator/counting_iterator.h>
3232
#include <thrust/sequence.h>
@@ -159,10 +159,9 @@ void decompress_edge_partition_to_fill_edgelist_majors(
159159
auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
160160
auto local_degree = edge_partition.local_degree(major_offset);
161161
auto local_offset = edge_partition.local_offset(major_offset);
162-
thrust::fill(thrust::seq,
163-
output_buffer.begin() + local_offset,
164-
output_buffer.begin() + local_offset + local_degree,
165-
major);
162+
cuda::std::fill(output_buffer.begin() + local_offset,
163+
output_buffer.begin() + local_offset + local_degree,
164+
major);
166165
});
167166
}
168167
if (edge_partition.dcs_nzd_vertex_count() && (*(edge_partition.dcs_nzd_vertex_count()) > 0)) {
@@ -177,10 +176,9 @@ void decompress_edge_partition_to_fill_edgelist_majors(
177176
major_start_offset + idx; // major_offset != major_idx in the hypersparse region
178177
auto local_degree = edge_partition.local_degree(major_idx);
179178
auto local_offset = edge_partition.local_offset(major_idx);
180-
thrust::fill(thrust::seq,
181-
output_buffer.begin() + local_offset,
182-
output_buffer.begin() + local_offset + local_degree,
183-
major);
179+
cuda::std::fill(output_buffer.begin() + local_offset,
180+
output_buffer.begin() + local_offset + local_degree,
181+
major);
184182
});
185183
}
186184
} else {
@@ -192,10 +190,9 @@ void decompress_edge_partition_to_fill_edgelist_majors(
192190
auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
193191
auto local_degree = edge_partition.local_degree(major_offset);
194192
auto local_offset = edge_partition.local_offset(major_offset);
195-
thrust::fill(thrust::seq,
196-
output_buffer.begin() + local_offset,
197-
output_buffer.begin() + local_offset + local_degree,
198-
major);
193+
cuda::std::fill(output_buffer.begin() + local_offset,
194+
output_buffer.begin() + local_offset + local_degree,
195+
major);
199196
});
200197
}
201198

cpp/include/cugraph/edge_partition_device_view.cuh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
#include <rmm/device_uvector.hpp>
2727
#include <rmm/exec_policy.hpp>
2828

29+
#include <cuda/std/__algorithm_>
2930
#include <cuda/std/iterator>
3031
#include <cuda/std/optional>
31-
#include <thrust/binary_search.h>
3232
#include <thrust/execution_policy.h>
3333
#include <thrust/transform.h>
3434
#include <thrust/transform_reduce.h>
@@ -48,8 +48,7 @@ __device__ cuda::std::optional<vertex_t> major_hypersparse_idx_from_major_nochec
4848
{
4949
// we can avoid binary search (and potentially improve performance) if we add an auxiliary array
5050
// or cuco::static_map (at the expense of additional memory)
51-
auto it =
52-
thrust::lower_bound(thrust::seq, dcs_nzd_vertices.begin(), dcs_nzd_vertices.end(), major);
51+
auto it = cuda::std::lower_bound(dcs_nzd_vertices.begin(), dcs_nzd_vertices.end(), major);
5352
return it != dcs_nzd_vertices.end()
5453
? (*it == major ? cuda::std::optional<vertex_t>{static_cast<vertex_t>(
5554
cuda::std::distance(dcs_nzd_vertices.begin(), it))}
@@ -163,7 +162,7 @@ class edge_partition_device_view_base_t {
163162
{
164163
return static_cast<vertex_t>(cuda::std::distance(
165164
offsets_.begin() + 1,
166-
thrust::upper_bound(thrust::seq, offsets_.begin() + 1, offsets_.end(), local_edge_idx)));
165+
cuda::std::upper_bound(offsets_.begin() + 1, offsets_.end(), local_edge_idx)));
167166
}
168167

169168
// major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC

cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323

2424
#include <raft/core/device_span.hpp>
2525

26+
#include <cuda/std/__algorithm_>
2627
#include <cuda/std/iterator>
2728
#include <cuda/std/optional>
28-
#include <thrust/binary_search.h>
2929
#include <thrust/execution_policy.h>
3030
#include <thrust/fill.h>
3131
#include <thrust/iterator/iterator_traits.h>
@@ -197,10 +197,10 @@ class edge_partition_endpoint_property_device_view_t {
197197
auto val_offset = offset;
198198
if (keys_) {
199199
auto chunk_idx = static_cast<size_t>(offset) / (*key_chunk_size_);
200-
auto it = thrust::lower_bound(thrust::seq,
201-
(*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx],
202-
(*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx + 1],
203-
range_first_ + offset);
200+
auto it =
201+
cuda::std::lower_bound((*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx],
202+
(*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx + 1],
203+
range_first_ + offset);
204204
assert((it != (*keys_).begin() + (*key_chunk_start_offsets_)[chunk_idx + 1]) &&
205205
(*it == (range_first_ + offset)));
206206
val_offset = (*key_chunk_start_offsets_)[chunk_idx] +

cpp/include/cugraph/utilities/mask_utils.cuh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
#include <raft/core/handle.hpp>
2222

2323
#include <cuda/functional>
24+
#include <cuda/std/__algorithm_>
2425
#include <cuda/std/iterator>
26+
#include <cuda/std/numeric>
2527
#include <thrust/copy.h>
2628
#include <thrust/functional.h>
2729
#include <thrust/iterator/counting_iterator.h>
@@ -53,19 +55,18 @@ __device__ size_t count_set_bits(MaskIterator mask_first, size_t start_offset, s
5355
++mask_first;
5456
}
5557

56-
return thrust::transform_reduce(
57-
thrust::seq,
58+
return cuda::std::transform_reduce(
5859
thrust::make_counting_iterator(size_t{0}),
5960
thrust::make_counting_iterator(packed_bool_size(num_bits)),
61+
ret,
62+
thrust::plus<size_t>{},
6063
[mask_first, num_bits] __device__(size_t i) {
6164
auto word = *(mask_first + i);
6265
if ((i + 1) * packed_bools_per_word() > num_bits) {
6366
word &= packed_bool_partial_mask(num_bits % packed_bools_per_word());
6467
}
6568
return static_cast<size_t>(__popc(word));
66-
},
67-
ret,
68-
thrust::plus<size_t>{});
69+
});
6970
}
7071

7172
// @p n starts from 1

cpp/include/cugraph/utilities/shuffle_comm.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <cuda/atomic>
2828
#include <cuda/functional>
29+
#include <cuda/std/__algorithm_>
2930
#include <cuda/std/iterator>
3031
#include <thrust/binary_search.h>
3132
#include <thrust/copy.h>
@@ -63,9 +64,8 @@ struct compute_group_id_count_pair_t {
6364
{
6465
static_assert(
6566
std::is_same_v<typename thrust::iterator_traits<GroupIdIterator>::value_type, int>);
66-
auto lower_it =
67-
thrust::lower_bound(thrust::seq, group_id_first, group_id_last, static_cast<int>(i));
68-
auto upper_it = thrust::upper_bound(thrust::seq, lower_it, group_id_last, static_cast<int>(i));
67+
auto lower_it = cuda::std::lower_bound(group_id_first, group_id_last, static_cast<int>(i));
68+
auto upper_it = cuda::std::upper_bound(lower_it, group_id_last, static_cast<int>(i));
6969
return thrust::make_tuple(static_cast<int>(i),
7070
static_cast<size_t>(cuda::std::distance(lower_it, upper_it)));
7171
}

cpp/src/c_api/capi_helper.cu

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <cugraph/shuffle_functions.hpp>
2121
#include <cugraph/utilities/misc_utils.cuh>
2222

23+
#include <cuda/std/__algorithm_>
2324
#include <cuda/std/iterator>
2425
#include <thrust/iterator/zip_iterator.h>
2526
#include <thrust/sort.h>
@@ -141,8 +142,7 @@ reorder_extracted_egonets(raft::handle_t const& handle,
141142
source_indices = raft::device_span<size_t const>(source_indices.data(),
142143
source_indices.size())] __device__(size_t i) {
143144
auto idx = static_cast<size_t>(cuda::std::distance(
144-
offset_lasts.begin(),
145-
thrust::upper_bound(thrust::seq, offset_lasts.begin(), offset_lasts.end(), i)));
145+
offset_lasts.begin(), cuda::std::upper_bound(offset_lasts.begin(), offset_lasts.end(), i)));
146146
return source_indices[idx];
147147
});
148148
source_indices.resize(0, handle.get_stream());
@@ -166,8 +166,7 @@ reorder_extracted_egonets(raft::handle_t const& handle,
166166
[sort_indices = raft::device_span<size_t const>(sort_indices.data(),
167167
sort_indices.size())] __device__(size_t i) {
168168
return static_cast<size_t>(cuda::std::distance(
169-
sort_indices.begin(),
170-
thrust::upper_bound(thrust::seq, sort_indices.begin(), sort_indices.end(), i)));
169+
sort_indices.begin(), cuda::std::upper_bound(sort_indices.begin(), sort_indices.end(), i)));
171170
});
172171

173172
return std::make_tuple(

cpp/src/c_api/renumber_arbitrary_edgelist.cu

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
#include <cugraph/graph.hpp>
2424
#include <cugraph/utilities/error.hpp>
2525

26+
#include <cuda/std/__algorithm_>
2627
#include <cuda/std/iterator>
27-
#include <thrust/binary_search.h>
2828
#include <thrust/iterator/counting_iterator.h>
2929

3030
namespace {
@@ -87,8 +87,8 @@ cugraph_error_code_t renumber_arbitrary_edgelist(
8787
raft::device_span<vertex_t const>{renumber_chunk.data(), renumber_chunk.size()},
8888
vertices_span,
8989
ids_span] __device__(size_t idx) {
90-
auto pos = thrust::lower_bound(
91-
thrust::seq, vertices_span.begin(), vertices_span.end(), renumber_chunk_span[idx]);
90+
auto pos = cuda::std::lower_bound(
91+
vertices_span.begin(), vertices_span.end(), renumber_chunk_span[idx]);
9292
if ((pos != vertices_span.end()) && (*pos == renumber_chunk_span[idx])) {
9393
ids_span[cuda::std::distance(vertices_span.begin(), pos)] =
9494
static_cast<vertex_t>(chunk_base_offset + idx);
@@ -102,27 +102,25 @@ cugraph_error_code_t renumber_arbitrary_edgelist(
102102
cugraph::invalid_vertex_id<vertex_t>::value) == 0,
103103
"some vertices were not renumbered");
104104

105-
thrust::transform(
106-
handle.get_thrust_policy(),
107-
srcs->as_type<vertex_t>(),
108-
srcs->as_type<vertex_t>() + srcs->size_,
109-
srcs->as_type<vertex_t>(),
110-
[vertices_span, ids_span] __device__(vertex_t v) {
111-
return ids_span[cuda::std::distance(
112-
vertices_span.begin(),
113-
thrust::lower_bound(thrust::seq, vertices_span.begin(), vertices_span.end(), v))];
114-
});
115-
116-
thrust::transform(
117-
handle.get_thrust_policy(),
118-
dsts->as_type<vertex_t>(),
119-
dsts->as_type<vertex_t>() + srcs->size_,
120-
dsts->as_type<vertex_t>(),
121-
[vertices_span, ids_span] __device__(vertex_t v) {
122-
return ids_span[cuda::std::distance(
123-
vertices_span.begin(),
124-
thrust::lower_bound(thrust::seq, vertices_span.begin(), vertices_span.end(), v))];
125-
});
105+
thrust::transform(handle.get_thrust_policy(),
106+
srcs->as_type<vertex_t>(),
107+
srcs->as_type<vertex_t>() + srcs->size_,
108+
srcs->as_type<vertex_t>(),
109+
[vertices_span, ids_span] __device__(vertex_t v) {
110+
return ids_span[cuda::std::distance(
111+
vertices_span.begin(),
112+
cuda::std::lower_bound(vertices_span.begin(), vertices_span.end(), v))];
113+
});
114+
115+
thrust::transform(handle.get_thrust_policy(),
116+
dsts->as_type<vertex_t>(),
117+
dsts->as_type<vertex_t>() + srcs->size_,
118+
dsts->as_type<vertex_t>(),
119+
[vertices_span, ids_span] __device__(vertex_t v) {
120+
return ids_span[cuda::std::distance(
121+
vertices_span.begin(),
122+
cuda::std::lower_bound(vertices_span.begin(), vertices_span.end(), v))];
123+
});
126124

127125
return CUGRAPH_SUCCESS;
128126
}

cpp/src/community/detail/common_methods.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
#include <cugraph/graph_functions.hpp>
3131

3232
#include <cuda/functional>
33+
#include <cuda/std/__algorithm_>
3334
#include <cuda/std/optional>
34-
#include <thrust/binary_search.h>
3535
#include <thrust/execution_policy.h>
3636
#include <thrust/functional.h>
3737
#include <thrust/iterator/zip_iterator.h>
@@ -319,8 +319,8 @@ rmm::device_uvector<vertex_t> update_clustering_by_delta_modularity(
319319
[d_cluster_weights = cluster_weights_v.data(),
320320
d_cluster_keys = cluster_keys_v.data(),
321321
num_clusters = cluster_keys_v.size()] __device__(vertex_t cluster) {
322-
auto pos = thrust::lower_bound(
323-
thrust::seq, d_cluster_keys, d_cluster_keys + num_clusters, cluster);
322+
auto pos = cuda::std::lower_bound(
323+
d_cluster_keys, d_cluster_keys + num_clusters, cluster);
324324
return d_cluster_weights[pos - d_cluster_keys];
325325
});
326326
}

0 commit comments

Comments
 (0)