From 513eb5c1f6a9428cb3943458a9b116557959146f Mon Sep 17 00:00:00 2001 From: MithunR Date: Fri, 20 Feb 2026 15:43:11 -0800 Subject: [PATCH 1/3] Replace cudf::detail::valid_if with cudf::bools_to_mask This commit is part of the continuihng effort to reduce the dependency of spark-rapids-jni on `cudf::detail` APIs. In this commit, some of the references to `cudf::detail::valid_if` with `cudf::bools_to_mask`. The functionality should not be altered. Existing tests ought to cover the changes. Signed-off-by: MithunR --- .../cpp/benchmarks/common/generate_input.cu | 62 +++++++++---------- src/main/cpp/src/histogram.cu | 2 +- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/src/main/cpp/benchmarks/common/generate_input.cu b/src/main/cpp/benchmarks/common/generate_input.cu index 8a4c7bf780..8b60114b83 100644 --- a/src/main/cpp/benchmarks/common/generate_input.cu +++ b/src/main/cpp/benchmarks/common/generate_input.cu @@ -21,20 +21,20 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include +#include #include #include #include -#include #include #include #include @@ -430,17 +430,15 @@ std::unique_ptr create_random_column(data_profile const& profile, } auto [result_bitmask, null_count] = - cudf::detail::valid_if(null_mask.begin(), - null_mask.end(), - cuda::std::identity{}, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource_ref()); + cudf::bools_to_mask(cudf::device_span(null_mask), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource_ref()); return std::make_unique( cudf::data_type{cudf::type_to_id()}, num_rows, data.release(), - profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}, + profile.get_null_frequency().has_value() ? std::move(*result_bitmask) : rmm::device_buffer{}, null_count); } @@ -514,18 +512,16 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons num_rows, string_generator{chars.data(), engine}); auto [result_bitmask, null_count] = - cudf::detail::valid_if(null_mask.begin(), - null_mask.end() - 1, - cuda::std::identity{}, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource_ref()); + cudf::bools_to_mask(cudf::device_span(null_mask.data(), num_rows), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource_ref()); return cudf::make_strings_column( num_rows, std::make_unique(std::move(offsets), rmm::device_buffer{}, 0), chars.release(), null_count, - profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}); + profile.get_null_frequency().has_value() ? std::move(*result_bitmask) : rmm::device_buffer{}); } /** @@ -638,13 +634,12 @@ std::unique_ptr create_random_column(data_profi auto [null_mask, null_count] = [&]() { if (profile.get_null_frequency().has_value()) { auto valids = valid_dist(engine, num_rows); - return cudf::detail::valid_if(valids.begin(), - valids.end(), - cuda::std::identity{}, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource_ref()); + return cudf::bools_to_mask(cudf::device_span(valids), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource_ref()); } - return std::pair{}; + return std::pair, cudf::size_type>{ + std::make_unique(), cudf::size_type{0}}; }(); // Adopt remaining children as evenly as possible @@ -659,7 +654,7 @@ std::unique_ptr create_random_column(data_profi current_child += children_to_adopt.size(); *current_parent = cudf::make_structs_column( - num_rows, std::move(children_to_adopt), null_count, std::move(null_mask)); + num_rows, std::move(children_to_adopt), null_count, std::move(*null_mask)); } if (lvl == 1) { @@ -728,18 +723,15 @@ std::unique_ptr create_random_column(data_profile rmm::device_buffer{}, 0); - auto [null_mask, null_count] = - cudf::detail::valid_if(valids.begin(), - valids.end(), - cuda::std::identity{}, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource_ref()); - list_column = cudf::make_lists_column( + auto [null_mask, null_count] = cudf::bools_to_mask(cudf::device_span(valids), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource_ref()); + list_column = cudf::make_lists_column( num_rows, std::move(offsets_column), std::move(current_child_column), profile.get_null_frequency().has_value() ? null_count : 0, // cudf::UNKNOWN_NULL_COUNT, - profile.get_null_frequency().has_value() ? std::move(null_mask) : rmm::device_buffer{}); + profile.get_null_frequency().has_value() ? std::move(*null_mask) : rmm::device_buffer{}); } return list_column; // return the top-level column } @@ -849,11 +841,13 @@ std::pair create_random_null_mask( } else if (*null_probability == 1.0) { return {cudf::create_null_mask(size, cudf::mask_state::ALL_NULL), size}; } else { - return cudf::detail::valid_if(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(size), - bool_generator{seed, 1.0 - *null_probability}, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource_ref()); + rmm::device_uvector valids(size, cudf::get_default_stream()); + thrust::tabulate( + thrust::device, valids.begin(), valids.end(), bool_generator{seed, 1.0 - *null_probability}); + auto [mask, null_count] = cudf::bools_to_mask(cudf::device_span(valids), + cudf::get_default_stream(), + rmm::mr::get_current_device_resource_ref()); + return {std::move(*mask), null_count}; } } diff --git a/src/main/cpp/src/histogram.cu b/src/main/cpp/src/histogram.cu index 9306709ab0..eb516d52d3 100644 --- a/src/main/cpp/src/histogram.cu +++ b/src/main/cpp/src/histogram.cu @@ -327,7 +327,7 @@ std::unique_ptr create_histogram_if_valid(cudf::column_view const& if (frequencies[idx] < 0) { *check_invalid = 1; } if (frequencies[idx] == 0) { *check_zero = 1; } - check_valid[idx] = static_cast(frequencies[idx] > 0); + check_valid[idx] = frequencies[idx] > 0; }); auto const h_checks = cudf::detail::make_std_vector(check_invalid_and_zero, stream); From c3f255069fe0b43a5c7dc9a1dd4dae416383f94d Mon Sep 17 00:00:00 2001 From: MithunR Date: Fri, 20 Feb 2026 17:00:01 -0800 Subject: [PATCH 2/3] Materialized bool vectors. This change is more controversial. The only way to get away from using `cudf::detail::valid_if` in the files modified here is to materialize a temporary bool vector (that is then packed). Signed-off-by: MithunR --- src/main/cpp/src/from_json_to_raw_map.cu | 16 +++++++++++----- src/main/cpp/src/get_json_object.cu | 16 +++++++++++++--- thirdparty/cudf | 2 +- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/main/cpp/src/from_json_to_raw_map.cu b/src/main/cpp/src/from_json_to_raw_map.cu index a926904088..7f91550896 100644 --- a/src/main/cpp/src/from_json_to_raw_map.cu +++ b/src/main/cpp/src/from_json_to_raw_map.cu @@ -19,11 +19,11 @@ #include #include -#include #include #include #include #include +#include #include #include @@ -771,10 +771,16 @@ std::pair create_null_mask( }); } - auto const valid_it = should_be_nullified->view().begin(); - auto [null_mask, null_count] = cudf::detail::valid_if( - valid_it, valid_it + should_be_nullified->size(), thrust::logical_not{}, stream, mr); - return {null_count > 0 ? std::move(null_mask) : rmm::device_buffer{0, stream, mr}, null_count}; + rmm::device_uvector valids(should_be_nullified->size(), stream); + auto const nullify_it = should_be_nullified->view().begin(); + thrust::transform(rmm::exec_policy_nosync(stream), + nullify_it, + nullify_it + should_be_nullified->size(), + valids.begin(), + thrust::logical_not{}); + auto [null_mask, null_count] = + cudf::bools_to_mask(cudf::device_span(valids), stream, mr); + return {null_count > 0 ? std::move(*null_mask) : rmm::device_buffer{0, stream, mr}, null_count}; } } // namespace diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu index 4f9e0a91ec..734437768b 100644 --- a/src/main/cpp/src/get_json_object.cu +++ b/src/main/cpp/src/get_json_object.cu @@ -24,11 +24,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1092,8 +1093,17 @@ std::vector> get_json_object_batch( if (h_error_check[idx]) { oob_indices.emplace_back(idx); - out_null_masks_and_null_counts.emplace_back( - cudf::detail::valid_if(out_sview.begin(), out_sview.end(), validator, stream, mr)); + { + rmm::device_uvector valids(out_sview.size(), stream); + thrust::transform(rmm::exec_policy_nosync(stream), + out_sview.begin(), + out_sview.end(), + valids.begin(), + validator); + auto [mask, null_count] = + cudf::bools_to_mask(cudf::device_span(valids), stream, mr); + out_null_masks_and_null_counts.emplace_back(std::move(*mask), null_count); + } // The string sizes computed in the previous kernel call will be used to allocate a new char // buffer to store the output. diff --git a/thirdparty/cudf b/thirdparty/cudf index 1ed06c6105..cb161cb3e2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1ed06c6105fb31bba18fc7cf69e2529f9c6a7a22 +Subproject commit cb161cb3e2079152da6baba098a33bb9894efd5c From 717c80bea764335c4972fed0a7c3d9b0c54eda6a Mon Sep 17 00:00:00 2001 From: MithunR Date: Tue, 24 Feb 2026 11:07:18 -0800 Subject: [PATCH 3/3] Release ptr before move. Signed-off-by: MithunR --- .../cpp/benchmarks/common/generate_input.cu | 23 +++++++++++-------- src/main/cpp/src/from_json_to_raw_map.cu | 3 ++- src/main/cpp/src/get_json_object.cu | 9 ++++---- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/main/cpp/benchmarks/common/generate_input.cu b/src/main/cpp/benchmarks/common/generate_input.cu index 8b60114b83..1f980a3329 100644 --- a/src/main/cpp/benchmarks/common/generate_input.cu +++ b/src/main/cpp/benchmarks/common/generate_input.cu @@ -434,12 +434,13 @@ std::unique_ptr create_random_column(data_profile const& profile, cudf::get_default_stream(), rmm::mr::get_current_device_resource_ref()); - return std::make_unique( - cudf::data_type{cudf::type_to_id()}, - num_rows, - data.release(), - profile.get_null_frequency().has_value() ? std::move(*result_bitmask) : rmm::device_buffer{}, - null_count); + return std::make_unique(cudf::data_type{cudf::type_to_id()}, + num_rows, + data.release(), + profile.get_null_frequency().has_value() + ? std::move(*result_bitmask.release()) + : rmm::device_buffer{}, + null_count); } struct valid_or_zero { @@ -521,7 +522,8 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons std::make_unique(std::move(offsets), rmm::device_buffer{}, 0), chars.release(), null_count, - profile.get_null_frequency().has_value() ? std::move(*result_bitmask) : rmm::device_buffer{}); + profile.get_null_frequency().has_value() ? std::move(*result_bitmask.release()) + : rmm::device_buffer{}); } /** @@ -654,7 +656,7 @@ std::unique_ptr create_random_column(data_profi current_child += children_to_adopt.size(); *current_parent = cudf::make_structs_column( - num_rows, std::move(children_to_adopt), null_count, std::move(*null_mask)); + num_rows, std::move(children_to_adopt), null_count, std::move(*null_mask.release())); } if (lvl == 1) { @@ -731,7 +733,8 @@ std::unique_ptr create_random_column(data_profile std::move(offsets_column), std::move(current_child_column), profile.get_null_frequency().has_value() ? null_count : 0, // cudf::UNKNOWN_NULL_COUNT, - profile.get_null_frequency().has_value() ? std::move(*null_mask) : rmm::device_buffer{}); + profile.get_null_frequency().has_value() ? std::move(*null_mask.release()) + : rmm::device_buffer{}); } return list_column; // return the top-level column } @@ -847,7 +850,7 @@ std::pair create_random_null_mask( auto [mask, null_count] = cudf::bools_to_mask(cudf::device_span(valids), cudf::get_default_stream(), rmm::mr::get_current_device_resource_ref()); - return {std::move(*mask), null_count}; + return {std::move(*mask.release()), null_count}; } } diff --git a/src/main/cpp/src/from_json_to_raw_map.cu b/src/main/cpp/src/from_json_to_raw_map.cu index 7f91550896..e2b2ecc4ab 100644 --- a/src/main/cpp/src/from_json_to_raw_map.cu +++ b/src/main/cpp/src/from_json_to_raw_map.cu @@ -780,7 +780,8 @@ std::pair create_null_mask( thrust::logical_not{}); auto [null_mask, null_count] = cudf::bools_to_mask(cudf::device_span(valids), stream, mr); - return {null_count > 0 ? std::move(*null_mask) : rmm::device_buffer{0, stream, mr}, null_count}; + return {null_count > 0 ? std::move(*null_mask.release()) : rmm::device_buffer{0, stream, mr}, + null_count}; } } // namespace diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu index 734437768b..f74d015deb 100644 --- a/src/main/cpp/src/get_json_object.cu +++ b/src/main/cpp/src/get_json_object.cu @@ -1093,17 +1093,16 @@ std::vector> get_json_object_batch( if (h_error_check[idx]) { oob_indices.emplace_back(idx); - { + auto [mask, null_count] = [&] { rmm::device_uvector valids(out_sview.size(), stream); thrust::transform(rmm::exec_policy_nosync(stream), out_sview.begin(), out_sview.end(), valids.begin(), validator); - auto [mask, null_count] = - cudf::bools_to_mask(cudf::device_span(valids), stream, mr); - out_null_masks_and_null_counts.emplace_back(std::move(*mask), null_count); - } + return cudf::bools_to_mask(cudf::device_span(valids), stream, mr); + }(); + out_null_masks_and_null_counts.emplace_back(std::move(*mask.release()), null_count); // The string sizes computed in the previous kernel call will be used to allocate a new char // buffer to store the output.