NVIDIA
diff --git a/‎pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/main/cpp/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎src/main/cpp/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/main/cpp/benchmarks/common/generate_input.cu‎
Lines changed: 11 additions & 9 deletions b/‎src/main/cpp/benchmarks/common/generate_input.cu‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎src/main/cpp/benchmarks/common/random_distribution_factory.cuh‎
Lines changed: 2 additions & 2 deletions b/‎src/main/cpp/benchmarks/common/random_distribution_factory.cuh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/cpp/src/cast_decimal_to_string.cu‎
Lines changed: 5 additions & 4 deletions b/‎src/main/cpp/src/cast_decimal_to_string.cu‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/main/cpp/src/cast_float_to_string.cu‎
Lines changed: 4 additions & 4 deletions b/‎src/main/cpp/src/cast_float_to_string.cu‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/main/cpp/src/cast_string.cu‎
Lines changed: 7 additions & 5 deletions b/‎src/main/cpp/src/cast_string.cu‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎src/main/cpp/src/cast_string_to_float.cu‎
Lines changed: 11 additions & 9 deletions b/‎src/main/cpp/src/cast_string_to_float.cu‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎src/main/cpp/src/datetime_truncate.cu‎
Lines changed: 5 additions & 5 deletions b/‎src/main/cpp/src/datetime_truncate.cu‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/main/cpp/src/decimal_utils.cu‎
Lines changed: 3 additions & 2 deletions b/‎src/main/cpp/src/decimal_utils.cu‎
Lines changed: 3 additions & 2 deletions
@@ -21,7 +21,7 @@
 
   <groupId>com.nvidia</groupId>
   <artifactId>spark-rapids-jni</artifactId>
-  <version>26.04.0-SNAPSHOT</version>
+  <version>26.06.0-SNAPSHOT</version>
   <packaging>jar</packaging>
   <name>RAPIDS Accelerator JNI for Apache Spark</name>
   <description>
 
@@ -44,7 +44,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI)
 
 project(
   SPARK_RAPIDS_JNI
-  VERSION 26.04.00
+  VERSION 26.06.00
   LANGUAGES CXX CUDA
 )
 
 
@@ -19,7 +19,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/gather.hpp>
+#include <cudf/copying.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/filling.hpp>
@@ -279,7 +279,7 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
                       [scale       = *(this->scale),
                        upper_bound = this->upper_bound,
                        lower_bound = this->lower_bound] __device__(auto int_value) {
-                        return T{std::clamp(int_value, lower_bound, upper_bound), scale};
+                        return T{cuda::std::clamp(int_value, lower_bound, upper_bound), scale};
                       });
     return result;
   }
@@ -355,7 +355,7 @@ rmm::device_uvector<cudf::size_type> sample_indices_with_run_length(cudf::size_t
     auto const approx_run_len = num_rows / avg_run_len + 1;
     auto run_lens             = avglen_dist(engine, approx_run_len);
     thrust::inclusive_scan(
-      thrust::device, run_lens.begin(), run_lens.end(), run_lens.begin(), std::plus<int>{});
+      thrust::device, run_lens.begin(), run_lens.end(), run_lens.begin(), cuda::std::plus<int>{});
     auto const samples_indices = sample_dist(engine, approx_run_len + 1);
     // This is gather.
     auto avg_repeated_sample_indices_iterator = thrust::make_transform_iterator(
@@ -548,12 +548,14 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
     create_random_utf8_string_column(profile, engine, cardinality == 0 ? num_rows : cardinality);
   if (cardinality == 0) { return sample_strings; }
   auto sample_indices = sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine);
-  auto str_table      = cudf::detail::gather(cudf::table_view{{sample_strings->view()}},
-                                        sample_indices,
-                                        cudf::out_of_bounds_policy::DONT_CHECK,
-                                        cudf::detail::negative_index_policy::NOT_ALLOWED,
-                                        cudf::get_default_stream(),
-                                        rmm::mr::get_current_device_resource_ref());
+  auto sample_indices_span =
+    cudf::device_span<cudf::size_type const>(sample_indices.data(), sample_indices.size());
+  auto str_table = cudf::gather(cudf::table_view{{sample_strings->view()}},
+                                cudf::column_view{sample_indices_span},
+                                cudf::out_of_bounds_policy::DONT_CHECK,
+                                cudf::negative_index_policy::NOT_ALLOWED,
+                                cudf::get_default_stream(),
+                                rmm::mr::get_current_device_resource_ref());
   return std::move(str_table->release()[0]);
 }
 
 
@@ -123,9 +123,9 @@ struct value_generator {
     engine.discard(n);
     if constexpr (cuda::std::is_integral_v<T> &&
                   cuda::std::is_floating_point_v<decltype(dist(engine))>) {
-      return std::clamp(static_cast<T>(std::round(dist(engine))), lower_bound, upper_bound);
+      return cuda::std::clamp(static_cast<T>(std::round(dist(engine))), lower_bound, upper_bound);
     } else {
-      return std::clamp(dist(engine), lower_bound, upper_bound);
+      return cuda::std::clamp(dist(engine), lower_bound, upper_bound);
     }
     // Note: uniform does not need clamp, because already range is guaranteed to be within bounds.
   }
 
@@ -32,6 +32,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/algorithm>
 #include <cuda/std/climits>
 #include <cuda/std/limits>
 #include <cuda/std/type_traits>
@@ -78,7 +79,7 @@ struct decimal_to_non_ansi_string_fn {
     } else if (scale < 0 && adjusted_exponent >= -6) {
       auto const exp_ten   = numeric::detail::exp10<DecimalType>(-scale);
       auto const fraction  = strings::detail::count_digits(abs_value % exp_ten);
-      auto const num_zeros = std::max(0, (-scale - fraction));
+      auto const num_zeros = cuda::std::max(0, (-scale - fraction));
       return static_cast<int32_t>(value < 0) +                     // sign if negative
              strings::detail::count_digits(abs_value / exp_ten) +  // integer
              1 +                                                   // decimal point
@@ -123,7 +124,7 @@ struct decimal_to_non_ansi_string_fn {
     if (scale <= 0 && adjusted_exponent >= -6) {
       auto const exp_ten = numeric::detail::exp10<DecimalType>(-scale);
       auto const num_zeros =
-        std::max(0, (-scale - strings::detail::count_digits(abs_value % exp_ten)));
+        cuda::std::max(0, (-scale - strings::detail::count_digits(abs_value % exp_ten)));
       d_buffer +=
         strings::detail::integer_to_string(abs_value / exp_ten, d_buffer);  // add the integer part
       if (scale != 0) {
@@ -139,8 +140,8 @@ struct decimal_to_non_ansi_string_fn {
       if (abs_value_digits > 1) {
         auto const digits_after_decimal = abs_value_digits - 1;
         auto const exp_ten              = numeric::detail::exp10<DecimalType>(digits_after_decimal);
-        auto const num_zeros =
-          std::max(0, (digits_after_decimal - strings::detail::count_digits(abs_value % exp_ten)));
+        auto const num_zeros            = cuda::std::max(
+          0, (digits_after_decimal - strings::detail::count_digits(abs_value % exp_ten)));
         d_buffer +=
           strings::detail::integer_to_string(abs_value / exp_ten, d_buffer);  // add integer part
         *d_buffer++ = '.';                                                    // add decimal point
 
@@ -41,15 +41,15 @@ struct float_to_string_fn {
   __device__ cudf::size_type compute_output_size(cudf::size_type idx) const
   {
     auto const value        = d_floats.element<FloatType>(idx);
-    bool constexpr is_float = std::is_same_v<FloatType, float>;
+    bool constexpr is_float = cuda::std::is_same_v<FloatType, float>;
     return static_cast<cudf::size_type>(
       ftos_converter::compute_ftos_size(static_cast<double>(value), is_float));
   }
 
   __device__ void float_to_string(cudf::size_type idx) const
   {
     auto const value        = d_floats.element<FloatType>(idx);
-    bool constexpr is_float = std::is_same_v<FloatType, float>;
+    bool constexpr is_float = cuda::std::is_same_v<FloatType, float>;
     auto const output       = d_chars + d_offsets[idx];
     ftos_converter::float_to_string(static_cast<double>(value), is_float, output);
   }
@@ -74,7 +74,7 @@ struct float_to_string_fn {
  * The template function declaration ensures only float types are allowed.
  */
 struct dispatch_float_to_string_fn {
-  template <typename FloatType, CUDF_ENABLE_IF(std::is_floating_point_v<FloatType>)>
+  template <typename FloatType, CUDF_ENABLE_IF(cuda::std::is_floating_point_v<FloatType>)>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& floats,
                                            rmm::cuda_stream_view stream,
                                            rmm::device_async_resource_ref mr)
@@ -95,7 +95,7 @@ struct dispatch_float_to_string_fn {
   }
 
   // non-float types throw an exception
-  template <typename T, CUDF_ENABLE_IF(not std::is_floating_point_v<T>)>
+  template <typename T, CUDF_ENABLE_IF(not cuda::std::is_floating_point_v<T>)>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const&,
                                            rmm::cuda_stream_view,
                                            rmm::device_async_resource_ref)
 
@@ -27,8 +27,10 @@
 
 #include <cooperative_groups.h>
 #include <cub/warp/warp_reduce.cuh>
+#include <cuda/std/algorithm>
 #include <cuda/std/optional>
 #include <cuda/std/tuple>
+#include <cuda/std/type_traits>
 #include <cuda/std/utility>
 #include <thrust/find.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -47,7 +49,7 @@ constexpr auto NUM_THREADS{256};
  * @param chr character to test
  * @return true if character is a whitespace character
  */
-constexpr bool is_whitespace(char const chr)
+__host__ __device__ constexpr bool is_whitespace(char const chr)
 {
   // Whitespace characters include:
   // - Space (0x20, ' ')
@@ -83,7 +85,7 @@ constexpr T __device__ generic_abs(T value)
 template <typename T>
 bool __device__ will_overflow(T const val, bool adding)
 {
-  if constexpr (std::is_signed_v<T>) {
+  if constexpr (cuda::std::is_signed_v<T>) {
     if (!adding) {
       auto constexpr minval = cuda::std::numeric_limits<T>::min() / 10;
       return val < minval;
@@ -106,7 +108,7 @@ bool __device__ will_overflow(T const val, bool adding)
 template <typename T>
 bool __device__ will_overflow(T const lhs, T const rhs, bool adding)
 {
-  if constexpr (std::is_signed_v<T>) {
+  if constexpr (cuda::std::is_signed_v<T>) {
     if (!adding) {
       auto const minval = cuda::std::numeric_limits<T>::min() + rhs;
       return lhs < minval;
@@ -184,7 +186,7 @@ CUDF_KERNEL void string_to_integer_kernel(T* out,
   T thread_val           = 0;
   int i                  = 0;
   T sign                 = 1;
-  constexpr bool is_signed_type = std::is_signed_v<T>;
+  constexpr bool is_signed_type = cuda::std::is_signed_v<T>;
 
   if (valid) {
     if (strip) {
@@ -534,7 +536,7 @@ CUDF_KERNEL void string_to_decimal_kernel(T* out,
     }
 
     auto const significant_preceding_zeros = decimal_location < 0 ? -decimal_location : 0;
-    auto const zeros_to_decimal            = std::max(
+    auto const zeros_to_decimal            = cuda::std::max(
       0, scale > 0 ? decimal_location - total_digits - scale : decimal_location - total_digits);
     auto const significant_digits_before_decimal =
       significant_digits_before_decimal_in_string + zeros_to_decimal + rounding_digits;
 
@@ -28,6 +28,8 @@
 #include <rmm/resource_ref.hpp>
 
 #include <cub/warp/warp_reduce.cuh>
+#include <cuda/std/cmath>
+#include <cuda/std/limits>
 #include <cuda/std/utility>
 
 using namespace cudf;
@@ -44,7 +46,7 @@ __device__ __inline__ bool is_digit(char c) { return c >= '0' && c <= '9'; }
  * @param chr character to test
  * @return true if character is a whitespace character
  */
-constexpr bool is_whitespace(char const chr)
+__host__ __device__ constexpr bool is_whitespace(char const chr)
 {
   // Whitespace characters include:
   // - Space (0x20, ' ')
@@ -113,8 +115,8 @@ class string_to_float {
     // check for inf / infinity
     if (check_for_inf()) {
       if (_warp_lane == 0) {
-        _out[_row] =
-          sign >= 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
+        _out[_row] = sign >= 0 ? cuda::std::numeric_limits<T>::infinity()
+                               : -cuda::std::numeric_limits<T>::infinity();
       }
       compute_validity(_valid, _except);
       return;
@@ -164,9 +166,9 @@ class string_to_float {
       int exp_ten = exp_base + manual_exp;
 
       // final value
-      if (exp_ten > std::numeric_limits<double>::max_exponent10) {
-        _out[_row] = sign >= 0 ? std::numeric_limits<double>::infinity()
-                               : -std::numeric_limits<double>::infinity();
+      if (exp_ten > cuda::std::numeric_limits<double>::max_exponent10) {
+        _out[_row] = sign >= 0 ? cuda::std::numeric_limits<double>::infinity()
+                               : -cuda::std::numeric_limits<double>::infinity();
       } else {
         // make sure we don't produce a subnormal number.
         // - a normal number is one where the leading digit of the floating point rep is not zero.
@@ -182,7 +184,7 @@ class string_to_float {
         // https://en.wikipedia.org/wiki/Denormal_number
         //
 
-        auto const subnormal_shift = std::numeric_limits<double>::min_exponent10 - exp_ten;
+        auto const subnormal_shift = cuda::std::numeric_limits<double>::min_exponent10 - exp_ten;
         if (subnormal_shift > 0) {
           // Handle subnormal values. Ensure that both base and exponent are
           // normal values before computing their product.
@@ -192,7 +194,7 @@ class string_to_float {
           auto const exponent = exp10(static_cast<double>(exp_ten + subnormal_shift));
           _out[_row]          = static_cast<T>(digitsf * exponent);
         } else {
-          double const exponent = exp10(static_cast<double>(std::abs(exp_ten)));
+          double const exponent = exp10(static_cast<double>(cuda::std::abs(exp_ten)));
           double const result   = exp_ten < 0 ? digitsf / exponent : digitsf * exponent;
 
           _out[_row] = static_cast<T>(result);
@@ -426,7 +428,7 @@ class string_to_float {
       //        1,844,674,407,370,955,160 + 1X   -> 18,446,744,073,709,551,61X  -> potentially rolls
       //        past the limit
       //
-      constexpr uint64_t max_holding = (std::numeric_limits<uint64_t>::max() - 9) / 10;
+      constexpr uint64_t max_holding = (cuda::std::numeric_limits<uint64_t>::max() - 9) / 10;
       // if we're already past the max_holding, just truncate.
       // eg:    9,999,999,999,999,999,999
       if (digits > max_holding) {
 
@@ -96,7 +96,7 @@ __host__ __device__ truncation_format parse_format(char const* fmt_data, cudf::s
                               "MICROSECOND"};
   // Manually calculate sizes of the strings since `strlen` is not available in device code.
   cudf::size_type constexpr comp_sizes[] = {4, 4, 2, 7, 5, 2, 3, 4, 3, 2, 4, 6, 6, 11, 11};
-  auto constexpr num_components          = std::size(components);
+  auto constexpr num_components          = sizeof(components) / sizeof(components[0]);
 
   for (std::size_t comp_idx = 0; comp_idx < num_components; ++comp_idx) {
     if (fmt_size != comp_sizes[comp_idx]) { continue; }
@@ -166,8 +166,8 @@ __device__ inline cuda::std::optional<Timestamp> trunc_date(
 template <typename FormatDeviceT>
 struct truncate_date_fn {
   using Timestamp = cudf::timestamp_D;
-  static_assert(std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
-                  std::is_same_v<FormatDeviceT, truncation_format>,
+  static_assert(cuda::std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
+                  cuda::std::is_same_v<FormatDeviceT, truncation_format>,
                 "FormatDeviceT must be either 'cudf::column_device_view' or 'truncation_format'.");
 
   cudf::column_device_view datetime;
@@ -204,8 +204,8 @@ struct truncate_date_fn {
 template <typename FormatDeviceT>
 struct truncate_timestamp_fn {
   using Timestamp = cudf::timestamp_us;
-  static_assert(std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
-                  std::is_same_v<FormatDeviceT, truncation_format>,
+  static_assert(cuda::std::is_same_v<FormatDeviceT, cudf::column_device_view> ||
+                  cuda::std::is_same_v<FormatDeviceT, truncation_format>,
                 "FormatDeviceT must be either 'cudf::column_device_view' or 'truncation_format'.");
 
   cudf::column_device_view datetime;
 
@@ -30,6 +30,7 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/std/cmath>
 #include <cuda/std/functional>
 #include <thrust/tabulate.h>
 
@@ -1263,7 +1264,7 @@ __device__ inline IntType scaled_round(FloatType input, int32_t pow10)
     } else {
       // Spark rounds up the power-of-10 to floor for DOUBLES >= 2^63 (and yes, this is the exact
       // cutoff).
-      bool const round_up = unsigned_floating > std::numeric_limits<std::int64_t>::max();
+      bool const round_up = unsigned_floating > cuda::std::numeric_limits<std::int64_t>::max();
       return (3 * pow2_bit - 10 * pow10 + 9 * round_up) / 10;
     }
   }(pow2);
@@ -1319,7 +1320,7 @@ struct floating_point_to_decimal_fn {
   {
     auto const x = input.element<FloatType>(idx);
 
-    if (input.is_null(idx) || !std::isfinite(x)) {
+    if (input.is_null(idx) || !cuda::std::isfinite(x)) {
       validity[idx] = false;
       return DecimalRepType{0};
     }
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI)`
`44`	`44`
`45`	`45`	`project(`
`46`	`46`	`SPARK_RAPIDS_JNI`
`47`		`- VERSION 26.04.00`
	`47`	`+ VERSION 26.06.00`
`48`	`48`	`LANGUAGES CXX CUDA`
`49`	`49`	`)`
`50`	`50`
Original file line number	Diff line number	Diff line change
`@@ -123,9 +123,9 @@ struct value_generator {`
`123`	`123`	`engine.discard(n);`
`124`	`124`	`if constexpr (cuda::std::is_integral_v<T> &&`
`125`	`125`	`cuda::std::is_floating_point_v<decltype(dist(engine))>) {`
`126`		`- return std::clamp(static_cast<T>(std::round(dist(engine))), lower_bound, upper_bound);`
	`126`	`+ return cuda::std::clamp(static_cast<T>(std::round(dist(engine))), lower_bound, upper_bound);`
`127`	`127`	`} else {`
`128`		`- return std::clamp(dist(engine), lower_bound, upper_bound);`
	`128`	`+ return cuda::std::clamp(dist(engine), lower_bound, upper_bound);`
`129`	`129`	`}`
`130`	`130`	`// Note: uniform does not need clamp, because already range is guaranteed to be within bounds.`
`131`	`131`	`}`