Skip to content

Commit b2d5ab5

Browse files
authored
Merge branch 'main' into bot-auto-merge-release/26.04
2 parents 661456c + 66cda62 commit b2d5ab5

32 files changed

+233
-178
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
<groupId>com.nvidia</groupId>
2323
<artifactId>spark-rapids-jni</artifactId>
24-
<version>26.04.0-SNAPSHOT</version>
24+
<version>26.06.0-SNAPSHOT</version>
2525
<packaging>jar</packaging>
2626
<name>RAPIDS Accelerator JNI for Apache Spark</name>
2727
<description>

src/main/cpp/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI)
4444

4545
project(
4646
SPARK_RAPIDS_JNI
47-
VERSION 26.04.00
47+
VERSION 26.06.00
4848
LANGUAGES CXX CUDA
4949
)
5050

src/main/cpp/benchmarks/common/generate_input.cu

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#include <cudf/column/column.hpp>
2121
#include <cudf/column/column_factories.hpp>
22-
#include <cudf/detail/gather.hpp>
22+
#include <cudf/copying.hpp>
2323
#include <cudf/detail/utilities/integer_utils.hpp>
2424
#include <cudf/detail/valid_if.cuh>
2525
#include <cudf/filling.hpp>
@@ -279,7 +279,7 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
279279
[scale = *(this->scale),
280280
upper_bound = this->upper_bound,
281281
lower_bound = this->lower_bound] __device__(auto int_value) {
282-
return T{std::clamp(int_value, lower_bound, upper_bound), scale};
282+
return T{cuda::std::clamp(int_value, lower_bound, upper_bound), scale};
283283
});
284284
return result;
285285
}
@@ -355,7 +355,7 @@ rmm::device_uvector<cudf::size_type> sample_indices_with_run_length(cudf::size_t
355355
auto const approx_run_len = num_rows / avg_run_len + 1;
356356
auto run_lens = avglen_dist(engine, approx_run_len);
357357
thrust::inclusive_scan(
358-
thrust::device, run_lens.begin(), run_lens.end(), run_lens.begin(), std::plus<int>{});
358+
thrust::device, run_lens.begin(), run_lens.end(), run_lens.begin(), cuda::std::plus<int>{});
359359
auto const samples_indices = sample_dist(engine, approx_run_len + 1);
360360
// This is gather.
361361
auto avg_repeated_sample_indices_iterator = thrust::make_transform_iterator(
@@ -548,12 +548,14 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
548548
create_random_utf8_string_column(profile, engine, cardinality == 0 ? num_rows : cardinality);
549549
if (cardinality == 0) { return sample_strings; }
550550
auto sample_indices = sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine);
551-
auto str_table = cudf::detail::gather(cudf::table_view{{sample_strings->view()}},
552-
sample_indices,
553-
cudf::out_of_bounds_policy::DONT_CHECK,
554-
cudf::detail::negative_index_policy::NOT_ALLOWED,
555-
cudf::get_default_stream(),
556-
rmm::mr::get_current_device_resource_ref());
551+
auto sample_indices_span =
552+
cudf::device_span<cudf::size_type const>(sample_indices.data(), sample_indices.size());
553+
auto str_table = cudf::gather(cudf::table_view{{sample_strings->view()}},
554+
cudf::column_view{sample_indices_span},
555+
cudf::out_of_bounds_policy::DONT_CHECK,
556+
cudf::negative_index_policy::NOT_ALLOWED,
557+
cudf::get_default_stream(),
558+
rmm::mr::get_current_device_resource_ref());
557559
return std::move(str_table->release()[0]);
558560
}
559561

src/main/cpp/benchmarks/common/random_distribution_factory.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ struct value_generator {
123123
engine.discard(n);
124124
if constexpr (cuda::std::is_integral_v<T> &&
125125
cuda::std::is_floating_point_v<decltype(dist(engine))>) {
126-
return std::clamp(static_cast<T>(std::round(dist(engine))), lower_bound, upper_bound);
126+
return cuda::std::clamp(static_cast<T>(std::round(dist(engine))), lower_bound, upper_bound);
127127
} else {
128-
return std::clamp(dist(engine), lower_bound, upper_bound);
128+
return cuda::std::clamp(dist(engine), lower_bound, upper_bound);
129129
}
130130
// Note: uniform does not need clamp, because already range is guaranteed to be within bounds.
131131
}

src/main/cpp/src/cast_decimal_to_string.cu

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <rmm/cuda_stream_view.hpp>
3333
#include <rmm/exec_policy.hpp>
3434

35+
#include <cuda/std/algorithm>
3536
#include <cuda/std/climits>
3637
#include <cuda/std/limits>
3738
#include <cuda/std/type_traits>
@@ -78,7 +79,7 @@ struct decimal_to_non_ansi_string_fn {
7879
} else if (scale < 0 && adjusted_exponent >= -6) {
7980
auto const exp_ten = numeric::detail::exp10<DecimalType>(-scale);
8081
auto const fraction = strings::detail::count_digits(abs_value % exp_ten);
81-
auto const num_zeros = std::max(0, (-scale - fraction));
82+
auto const num_zeros = cuda::std::max(0, (-scale - fraction));
8283
return static_cast<int32_t>(value < 0) + // sign if negative
8384
strings::detail::count_digits(abs_value / exp_ten) + // integer
8485
1 + // decimal point
@@ -123,7 +124,7 @@ struct decimal_to_non_ansi_string_fn {
123124
if (scale <= 0 && adjusted_exponent >= -6) {
124125
auto const exp_ten = numeric::detail::exp10<DecimalType>(-scale);
125126
auto const num_zeros =
126-
std::max(0, (-scale - strings::detail::count_digits(abs_value % exp_ten)));
127+
cuda::std::max(0, (-scale - strings::detail::count_digits(abs_value % exp_ten)));
127128
d_buffer +=
128129
strings::detail::integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part
129130
if (scale != 0) {
@@ -139,8 +140,8 @@ struct decimal_to_non_ansi_string_fn {
139140
if (abs_value_digits > 1) {
140141
auto const digits_after_decimal = abs_value_digits - 1;
141142
auto const exp_ten = numeric::detail::exp10<DecimalType>(digits_after_decimal);
142-
auto const num_zeros =
143-
std::max(0, (digits_after_decimal - strings::detail::count_digits(abs_value % exp_ten)));
143+
auto const num_zeros = cuda::std::max(
144+
0, (digits_after_decimal - strings::detail::count_digits(abs_value % exp_ten)));
144145
d_buffer +=
145146
strings::detail::integer_to_string(abs_value / exp_ten, d_buffer); // add integer part
146147
*d_buffer++ = '.'; // add decimal point

src/main/cpp/src/cast_float_to_string.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ struct float_to_string_fn {
4141
__device__ cudf::size_type compute_output_size(cudf::size_type idx) const
4242
{
4343
auto const value = d_floats.element<FloatType>(idx);
44-
bool constexpr is_float = std::is_same_v<FloatType, float>;
44+
bool constexpr is_float = cuda::std::is_same_v<FloatType, float>;
4545
return static_cast<cudf::size_type>(
4646
ftos_converter::compute_ftos_size(static_cast<double>(value), is_float));
4747
}
4848

4949
__device__ void float_to_string(cudf::size_type idx) const
5050
{
5151
auto const value = d_floats.element<FloatType>(idx);
52-
bool constexpr is_float = std::is_same_v<FloatType, float>;
52+
bool constexpr is_float = cuda::std::is_same_v<FloatType, float>;
5353
auto const output = d_chars + d_offsets[idx];
5454
ftos_converter::float_to_string(static_cast<double>(value), is_float, output);
5555
}
@@ -74,7 +74,7 @@ struct float_to_string_fn {
7474
* The template function declaration ensures only float types are allowed.
7575
*/
7676
struct dispatch_float_to_string_fn {
77-
template <typename FloatType, CUDF_ENABLE_IF(std::is_floating_point_v<FloatType>)>
77+
template <typename FloatType, CUDF_ENABLE_IF(cuda::std::is_floating_point_v<FloatType>)>
7878
std::unique_ptr<cudf::column> operator()(cudf::column_view const& floats,
7979
rmm::cuda_stream_view stream,
8080
rmm::device_async_resource_ref mr)
@@ -95,7 +95,7 @@ struct dispatch_float_to_string_fn {
9595
}
9696

9797
// non-float types throw an exception
98-
template <typename T, CUDF_ENABLE_IF(not std::is_floating_point_v<T>)>
98+
template <typename T, CUDF_ENABLE_IF(not cuda::std::is_floating_point_v<T>)>
9999
std::unique_ptr<cudf::column> operator()(cudf::column_view const&,
100100
rmm::cuda_stream_view,
101101
rmm::device_async_resource_ref)

src/main/cpp/src/cast_string.cu

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727

2828
#include <cooperative_groups.h>
2929
#include <cub/warp/warp_reduce.cuh>
30+
#include <cuda/std/algorithm>
3031
#include <cuda/std/optional>
3132
#include <cuda/std/tuple>
33+
#include <cuda/std/type_traits>
3234
#include <cuda/std/utility>
3335
#include <thrust/find.h>
3436
#include <thrust/iterator/counting_iterator.h>
@@ -47,7 +49,7 @@ constexpr auto NUM_THREADS{256};
4749
* @param chr character to test
4850
* @return true if character is a whitespace character
4951
*/
50-
constexpr bool is_whitespace(char const chr)
52+
__host__ __device__ constexpr bool is_whitespace(char const chr)
5153
{
5254
// Whitespace characters include:
5355
// - Space (0x20, ' ')
@@ -83,7 +85,7 @@ constexpr T __device__ generic_abs(T value)
8385
template <typename T>
8486
bool __device__ will_overflow(T const val, bool adding)
8587
{
86-
if constexpr (std::is_signed_v<T>) {
88+
if constexpr (cuda::std::is_signed_v<T>) {
8789
if (!adding) {
8890
auto constexpr minval = cuda::std::numeric_limits<T>::min() / 10;
8991
return val < minval;
@@ -106,7 +108,7 @@ bool __device__ will_overflow(T const val, bool adding)
106108
template <typename T>
107109
bool __device__ will_overflow(T const lhs, T const rhs, bool adding)
108110
{
109-
if constexpr (std::is_signed_v<T>) {
111+
if constexpr (cuda::std::is_signed_v<T>) {
110112
if (!adding) {
111113
auto const minval = cuda::std::numeric_limits<T>::min() + rhs;
112114
return lhs < minval;
@@ -184,7 +186,7 @@ CUDF_KERNEL void string_to_integer_kernel(T* out,
184186
T thread_val = 0;
185187
int i = 0;
186188
T sign = 1;
187-
constexpr bool is_signed_type = std::is_signed_v<T>;
189+
constexpr bool is_signed_type = cuda::std::is_signed_v<T>;
188190

189191
if (valid) {
190192
if (strip) {
@@ -534,7 +536,7 @@ CUDF_KERNEL void string_to_decimal_kernel(T* out,
534536
}
535537

536538
auto const significant_preceding_zeros = decimal_location < 0 ? -decimal_location : 0;
537-
auto const zeros_to_decimal = std::max(
539+
auto const zeros_to_decimal = cuda::std::max(
538540
0, scale > 0 ? decimal_location - total_digits - scale : decimal_location - total_digits);
539541
auto const significant_digits_before_decimal =
540542
significant_digits_before_decimal_in_string + zeros_to_decimal + rounding_digits;

src/main/cpp/src/cast_string_to_float.cu

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include <rmm/resource_ref.hpp>
2929

3030
#include <cub/warp/warp_reduce.cuh>
31+
#include <cuda/std/cmath>
32+
#include <cuda/std/limits>
3133
#include <cuda/std/utility>
3234

3335
using namespace cudf;
@@ -44,7 +46,7 @@ __device__ __inline__ bool is_digit(char c) { return c >= '0' && c <= '9'; }
4446
* @param chr character to test
4547
* @return true if character is a whitespace character
4648
*/
47-
constexpr bool is_whitespace(char const chr)
49+
__host__ __device__ constexpr bool is_whitespace(char const chr)
4850
{
4951
// Whitespace characters include:
5052
// - Space (0x20, ' ')
@@ -113,8 +115,8 @@ class string_to_float {
113115
// check for inf / infinity
114116
if (check_for_inf()) {
115117
if (_warp_lane == 0) {
116-
_out[_row] =
117-
sign >= 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
118+
_out[_row] = sign >= 0 ? cuda::std::numeric_limits<T>::infinity()
119+
: -cuda::std::numeric_limits<T>::infinity();
118120
}
119121
compute_validity(_valid, _except);
120122
return;
@@ -164,9 +166,9 @@ class string_to_float {
164166
int exp_ten = exp_base + manual_exp;
165167

166168
// final value
167-
if (exp_ten > std::numeric_limits<double>::max_exponent10) {
168-
_out[_row] = sign >= 0 ? std::numeric_limits<double>::infinity()
169-
: -std::numeric_limits<double>::infinity();
169+
if (exp_ten > cuda::std::numeric_limits<double>::max_exponent10) {
170+
_out[_row] = sign >= 0 ? cuda::std::numeric_limits<double>::infinity()
171+
: -cuda::std::numeric_limits<double>::infinity();
170172
} else {
171173
// make sure we don't produce a subnormal number.
172174
// - a normal number is one where the leading digit of the floating point rep is not zero.
@@ -182,7 +184,7 @@ class string_to_float {
182184
// https://en.wikipedia.org/wiki/Denormal_number
183185
//
184186

185-
auto const subnormal_shift = std::numeric_limits<double>::min_exponent10 - exp_ten;
187+
auto const subnormal_shift = cuda::std::numeric_limits<double>::min_exponent10 - exp_ten;
186188
if (subnormal_shift > 0) {
187189
// Handle subnormal values. Ensure that both base and exponent are
188190
// normal values before computing their product.
@@ -192,7 +194,7 @@ class string_to_float {
192194
auto const exponent = exp10(static_cast<double>(exp_ten + subnormal_shift));
193195
_out[_row] = static_cast<T>(digitsf * exponent);
194196
} else {
195-
double const exponent = exp10(static_cast<double>(std::abs(exp_ten)));
197+
double const exponent = exp10(static_cast<double>(cuda::std::abs(exp_ten)));
196198
double const result = exp_ten < 0 ? digitsf / exponent : digitsf * exponent;
197199

198200
_out[_row] = static_cast<T>(result);
@@ -426,7 +428,7 @@ class string_to_float {
426428
// 1,844,674,407,370,955,160 + 1X -> 18,446,744,073,709,551,61X -> potentially rolls
427429
// past the limit
428430
//
429-
constexpr uint64_t max_holding = (std::numeric_limits<uint64_t>::max() - 9) / 10;
431+
constexpr uint64_t max_holding = (cuda::std::numeric_limits<uint64_t>::max() - 9) / 10;
430432
// if we're already past the max_holding, just truncate.
431433
// eg: 9,999,999,999,999,999,999
432434
if (digits > max_holding) {

src/main/cpp/src/datetime_truncate.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ __host__ __device__ truncation_format parse_format(char const* fmt_data, cudf::s
9696
"MICROSECOND"};
9797
// Manually calculate sizes of the strings since `strlen` is not available in device code.
9898
cudf::size_type constexpr comp_sizes[] = {4, 4, 2, 7, 5, 2, 3, 4, 3, 2, 4, 6, 6, 11, 11};
99-
auto constexpr num_components = std::size(components);
99+
auto constexpr num_components = sizeof(components) / sizeof(components[0]);
100100

101101
for (std::size_t comp_idx = 0; comp_idx < num_components; ++comp_idx) {
102102
if (fmt_size != comp_sizes[comp_idx]) { continue; }
@@ -166,8 +166,8 @@ __device__ inline cuda::std::optional<Timestamp> trunc_date(
166166
template <typename FormatDeviceT>
167167
struct truncate_date_fn {
168168
using Timestamp = cudf::timestamp_D;
169-
static_assert(std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
170-
std::is_same_v<FormatDeviceT, truncation_format>,
169+
static_assert(cuda::std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
170+
cuda::std::is_same_v<FormatDeviceT, truncation_format>,
171171
"FormatDeviceT must be either 'cudf::column_device_view' or 'truncation_format'.");
172172

173173
cudf::column_device_view datetime;
@@ -204,8 +204,8 @@ struct truncate_date_fn {
204204
template <typename FormatDeviceT>
205205
struct truncate_timestamp_fn {
206206
using Timestamp = cudf::timestamp_us;
207-
static_assert(std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
208-
std::is_same_v<FormatDeviceT, truncation_format>,
207+
static_assert(cuda::std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
208+
cuda::std::is_same_v<FormatDeviceT, truncation_format>,
209209
"FormatDeviceT must be either 'cudf::column_device_view' or 'truncation_format'.");
210210

211211
cudf::column_device_view datetime;

src/main/cpp/src/decimal_utils.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <rmm/device_scalar.hpp>
3131
#include <rmm/exec_policy.hpp>
3232

33+
#include <cuda/std/cmath>
3334
#include <cuda/std/functional>
3435
#include <thrust/tabulate.h>
3536

@@ -1263,7 +1264,7 @@ __device__ inline IntType scaled_round(FloatType input, int32_t pow10)
12631264
} else {
12641265
// Spark rounds up the power-of-10 to floor for DOUBLES >= 2^63 (and yes, this is the exact
12651266
// cutoff).
1266-
bool const round_up = unsigned_floating > std::numeric_limits<std::int64_t>::max();
1267+
bool const round_up = unsigned_floating > cuda::std::numeric_limits<std::int64_t>::max();
12671268
return (3 * pow2_bit - 10 * pow10 + 9 * round_up) / 10;
12681269
}
12691270
}(pow2);
@@ -1319,7 +1320,7 @@ struct floating_point_to_decimal_fn {
13191320
{
13201321
auto const x = input.element<FloatType>(idx);
13211322

1322-
if (input.is_null(idx) || !std::isfinite(x)) {
1323+
if (input.is_null(idx) || !cuda::std::isfinite(x)) {
13231324
validity[idx] = false;
13241325
return DecimalRepType{0};
13251326
}

0 commit comments

Comments
 (0)