rapidsai · rapids-bot · May 13, 2025 · Apr 28, 2025 · Apr 28, 2025 · Apr 28, 2025
@@ -137,6 +137,7 @@ enum class unary_operator : int32_t {
   FLOOR,       ///< largest integer value not greater than arg
   ABS,         ///< Absolute value
   RINT,        ///< Rounds the floating-point argument arg to an integer value
+  BIT_COUNT,   ///< Count the number of bits set to 1 of an integer value
   BIT_INVERT,  ///< Bitwise Not (~)
   NOT,         ///< Logical Not (!)
   NEGATE,      ///< Unary negation (-), only for signed numeric and duration types.

@@ -27,6 +27,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cuda/std/bit>
 #include <cuda/std/cmath>
 #include <thrust/transform.h>
 
@@ -209,9 +210,20 @@ struct DeviceRInt {
   }
 };
 
-// bitwise op
+struct DeviceBitCount {
+  template <typename T>
+  int32_t __device__ operator()(T data)
+  {
+    if constexpr (cuda::std::is_same_v<T, bool>) {
+      return static_cast<int32_t>(data);
+    } else {
+      using UnsignedT = cuda::std::make_unsigned_t<T>;
+      return cuda::std::popcount(static_cast<UnsignedT>(data));
+    }
+  }
+};
 
-struct DeviceInvert {
+struct DeviceBitInvert {
   template <typename T>
   __device__ T operator()(T data)
   {
@@ -433,6 +445,49 @@ struct MathOpDispatcher {
   }
 };
 
+template <typename UFN>
+struct BitwiseCountDispatcher {
+ private:
+  template <typename T>
+  static constexpr bool is_supported()
+  {
+    return std::is_integral_v<T>;
+  }
+
+  // Always use int32_t as output type for bit count.
+  using OutputType = int32_t;
+
+ public:
+  template <typename T, std::enable_if_t<is_supported<T>()>* = nullptr>
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr)
+  {
+    if (input.type().id() == type_id::DICTIONARY32) {
+      auto dictionary_view = cudf::column_device_view::create(input, stream);
+      auto dictionary_itr  = dictionary::detail::make_dictionary_iterator<T>(*dictionary_view);
+      return transform_fn<OutputType, UFN>(dictionary_itr,
+                                           dictionary_itr + input.size(),
+                                           cudf::detail::copy_bitmask(input, stream, mr),
+                                           input.null_count(),
+                                           stream,
+                                           mr);
+    }
+    return transform_fn<OutputType, UFN>(input.begin<T>(),
+                                         input.end<T>(),
+                                         cudf::detail::copy_bitmask(input, stream, mr),
+                                         input.null_count(),
+                                         stream,
+                                         mr);
+  }
+
+  template <typename T, typename... Args>
+  std::enable_if_t<!is_supported<T>(), std::unique_ptr<cudf::column>> operator()(Args&&...)
+  {
+    CUDF_FAIL("Unsupported datatype for operation");
+  }
+};
+
 template <typename UFN>
 struct LogicalOpDispatcher {
  private:
@@ -510,7 +565,9 @@ std::unique_ptr<cudf::column> unary_operation(cudf::column_view const& input,
     return type_dispatcher(input.type(), detail::FixedPointOpDispatcher{}, input, op, stream, mr);
 
   if (input.is_empty()) {
-    return op == cudf::unary_operator::NOT ? make_empty_column(type_id::BOOL8) : empty_like(input);
+    if (op == cudf::unary_operator::NOT) { return make_empty_column(type_id::BOOL8); }
+    if (op == cudf::unary_operator::BIT_COUNT) { return make_empty_column(type_id::INT32); }
+    return empty_like(input);
   }
 
   // dispatch on the keys if dictionary saves a 2nd dispatch later
@@ -579,9 +636,12 @@ std::unique_ptr<cudf::column> unary_operation(cudf::column_view const& input,
     case cudf::unary_operator::RINT:
       return cudf::type_dispatcher(
         dispatch_type, MathOpDispatcher<DeviceRInt, FloatOnlyOps>{}, input, stream, mr);
+    case cudf::unary_operator::BIT_COUNT:
+      return cudf::type_dispatcher(
+        dispatch_type, detail::BitwiseCountDispatcher<DeviceBitCount>{}, input, stream, mr);
     case cudf::unary_operator::BIT_INVERT:
       return cudf::type_dispatcher(
-        dispatch_type, MathOpDispatcher<DeviceInvert, BitWiseOps>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceBitInvert, BitWiseOps>{}, input, stream, mr);
     case cudf::unary_operator::NOT:
       return cudf::type_dispatcher(
         dispatch_type, detail::LogicalOpDispatcher<DeviceNot>{}, input, stream, mr);

@@ -17,12 +17,14 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/dictionary/encode.hpp>
 #include <cudf/unary.hpp>
 
+#include <bitset>
 #include <numeric>
 #include <vector>
 
@@ -89,6 +91,76 @@ TEST_F(UnaryNegateComplexTypesErrorTests, NegateListsColumnFail)
   EXPECT_THROW(cudf::unary_operation(input, cudf::unary_operator::NEGATE), cudf::logic_error);
 }
 
+struct UnaryBitwiseOpsBoolTest : public cudf::test::BaseFixture {};
+
+template <typename T>
+struct UnaryBitwiseOpsTypedTest : public cudf::test::BaseFixture {};
+TYPED_TEST_SUITE(UnaryBitwiseOpsTypedTest, cudf::test::IntegralTypesNotBool);
+
+TEST_F(UnaryBitwiseOpsBoolTest, BitCountBool)
+{
+  using T          = bool;
+  auto const data  = std::vector<T>{true, false, true, true, false, true, false, false};
+  auto const input = cudf::test::fixed_width_column_wrapper<T>(data.begin(), data.end());
+
+  std::vector<int32_t> expected_data(data.size());
+  std::transform(data.begin(), data.end(), expected_data.begin(), [](T val) {
+    return static_cast<int32_t>(val);
+  });
+  auto const expected =
+    cudf::test::fixed_width_column_wrapper<int32_t>(expected_data.begin(), expected_data.end());
+  auto const output = cudf::unary_operation(input, cudf::unary_operator::BIT_COUNT);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, output->view());
+}
+
+TYPED_TEST(UnaryBitwiseOpsTypedTest, BitCount)
+{
+  using T         = TypeParam;
+  auto const data = [] {
+    std::vector<T> data(15);
+    std::iota(data.begin(), data.end(), 1);
+    return data;
+  }();
+  auto const input = cudf::test::fixed_width_column_wrapper<T>(data.begin(), data.end());
+
+  std::vector<int32_t> expected_data(data.size());
+  std::transform(data.begin(), data.end(), expected_data.begin(), [](T val) {
+    using UnsignedT      = std::conditional_t<std::is_same_v<T, bool>, T, std::make_unsigned_t<T>>;
+    auto constexpr nbits = CHAR_BIT * sizeof(T);
+    auto const b         = std::bitset<nbits>(static_cast<UnsignedT>(val));
+    return b.count();
+  });
+  auto const expected =
+    cudf::test::fixed_width_column_wrapper<int32_t>(expected_data.begin(), expected_data.end());
+  auto const output = cudf::unary_operation(input, cudf::unary_operator::BIT_COUNT);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, output->view());
+}
+
+TYPED_TEST(UnaryBitwiseOpsTypedTest, BitCountWithNulls)
+{
+  using T         = TypeParam;
+  auto const data = [] {
+    std::vector<T> data(15);
+    std::iota(data.begin(), data.end(), 1);
+    return data;
+  }();
+  auto const validity = cudf::test::iterators::nulls_at({2, 5, 9, 12});
+  auto const input =
+    cudf::test::fixed_width_column_wrapper<TypeParam>(data.begin(), data.end(), validity);
+
+  std::vector<int32_t> expected_data(data.size());
+  std::transform(data.begin(), data.end(), expected_data.begin(), [](T val) {
+    using UnsignedT      = std::conditional_t<std::is_same_v<T, bool>, T, std::make_unsigned_t<T>>;
+    auto constexpr nbits = CHAR_BIT * sizeof(T);
+    auto const b         = std::bitset<nbits>(static_cast<UnsignedT>(val));
+    return b.count();
+  });
+  auto const expected = cudf::test::fixed_width_column_wrapper<int32_t>(
+    expected_data.begin(), expected_data.end(), validity);
+  auto const output = cudf::unary_operation(input, cudf::unary_operator::BIT_COUNT);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, output->view());
+}
+
 template <typename T>
 struct UnaryLogicalOpsTest : public cudf::test::BaseFixture {};
 
@@ -466,11 +538,14 @@ TYPED_TEST(UnaryMathFloatOpsTest, RINTNonFloatingFail)
 
 TYPED_TEST(UnaryMathFloatOpsTest, IntegralTypeFail)
 {
-  cudf::test::fixed_width_column_wrapper<TypeParam> input{1.0};
-  EXPECT_THROW(cudf::unary_operation(input, cudf::unary_operator::BIT_INVERT), cudf::logic_error);
-  auto d = cudf::dictionary::encode(input);
-  EXPECT_THROW(cudf::unary_operation(d->view(), cudf::unary_operator::BIT_INVERT),
-               cudf::logic_error);
+  auto const test = [](auto const op_type) {
+    cudf::test::fixed_width_column_wrapper<TypeParam> input{1.0};
+    EXPECT_THROW(cudf::unary_operation(input, op_type), cudf::logic_error);
+    auto d = cudf::dictionary::encode(input);
+    EXPECT_THROW(cudf::unary_operation(d->view(), op_type), cudf::logic_error);
+  };
+  test(cudf::unary_operator::BIT_INVERT);
+  test(cudf::unary_operator::BIT_COUNT);
 }
 
 TYPED_TEST(UnaryMathFloatOpsTest, SimpleCBRT)

@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *  Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -1335,7 +1335,16 @@ public final ColumnVector rint() {
   }
 
   /**
-   * invert the bits, output is the same type as input.
+   * Count the number of set bit for each integer value.
+   */
+  public final ColumnVector bitCount() {
+    return unaryOp(UnaryOp.BIT_COUNT);
+  }
+
+  /**
+   * Invert the bits, output is the same type as input.
+   * For BOOL8 type, this is equivalent to logical not (UnaryOp.NOT), but this does not
+   * matter since Spark does not support bitwise inverting on boolean type.
    */
   public final ColumnVector bitInvert() {
     return unaryOp(UnaryOp.BIT_INVERT);

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,8 +39,9 @@ public enum UnaryOp {
   FLOOR(17),
   ABS(18),
   RINT(19),
-  BIT_INVERT(20),
-  NOT(21);
+  BIT_COUNT(20),
+  BIT_INVERT(21),
+  NOT(22);
 
   private static final UnaryOp[] OPS = UnaryOp.values();
   final int nativeId;

@@ -31,6 +31,7 @@ cdef extern from "cudf/unary.hpp" namespace "cudf" nogil:
         FLOOR
         ABS
         RINT
+        BIT_COUNT
         BIT_INVERT
         NOT
         NEGATE

@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from enum import IntEnum
 
@@ -26,6 +26,7 @@ class UnaryOperator(IntEnum):
     FLOOR = ...
     ABS = ...
     RINT = ...
+    BIT_COUNT = ...
     BIT_INVERT = ...
     NOT = ...
     NEGATE = ...
-Original file line number
+Diff line change
@@ Expand Up / @@ -31,6 +31,7 @@ cdef extern from "cudf/unary.hpp" namespace "cudf" nogil: @@
             FLOOR
             ABS
             RINT
+            BIT_COUNT
             BIT_INVERT
             NOT
             NEGATE
@@ Expand Down @@