11/*
2- * Copyright (c) 2023, NVIDIA CORPORATION.
2+ * Copyright (c) 2023-2024 , NVIDIA CORPORATION.
33 *
44 * Licensed under the Apache License, Version 2.0 (the "License");
55 * you may not use this file except in compliance with the License.
1515 */
1616
1717#include < benchmarks/common/generate_input.hpp>
18+ #include < benchmarks/stream_compaction/stream_compaction_common.hpp>
1819
1920#include < cudf/column/column_view.hpp>
2021#include < cudf/lists/list_view.hpp>
2324
2425#include < nvbench/nvbench.cuh>
2526
27+ #include < limits>
28+
2629NVBENCH_DECLARE_TYPE_STRINGS (cudf::timestamp_ms, " cudf::timestamp_ms" , " cudf::timestamp_ms" );
2730
2831template <typename Type>
2932void nvbench_stable_distinct (nvbench::state& state, nvbench::type_list<Type>)
3033{
31- cudf::size_type const num_rows = state.get_int64 (" NumRows" );
34+ cudf::size_type const num_rows = state.get_int64 (" NumRows" );
35+ auto const keep = get_keep (state.get_string (" keep" ));
36+ cudf::size_type const cardinality = state.get_int64 (" cardinality" );
37+
38+ if (cardinality > num_rows) {
39+ state.skip (" cardinality > num_rows" );
40+ return ;
41+ }
3242
33- data_profile profile = data_profile_builder ().cardinality (0 ).null_probability (0.01 ).distribution (
34- cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0 , 100 );
43+ data_profile profile = data_profile_builder ()
44+ .cardinality (cardinality)
45+ .null_probability (0.01 )
46+ .distribution (cudf::type_to_id<Type>(),
47+ distribution_id::UNIFORM,
48+ static_cast <Type>(0 ),
49+ std::numeric_limits<Type>::max ());
3550
3651 auto source_column = create_random_column (cudf::type_to_id<Type>(), row_count{num_rows}, profile);
3752
@@ -40,27 +55,27 @@ void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list<Type>)
4055
4156 state.set_cuda_stream (nvbench::make_cuda_stream_view (cudf::get_default_stream ().value ()));
4257 state.exec (nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
43- auto result = cudf::stable_distinct (input_table,
44- {0 },
45- cudf::duplicate_keep_option::KEEP_ANY,
46- cudf::null_equality::EQUAL,
47- cudf::nan_equality::ALL_EQUAL);
58+ auto result = cudf::stable_distinct (
59+ input_table, {0 }, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL);
4860 });
4961}
5062
51- using data_type = nvbench::type_list<bool , int8_t , int32_t , int64_t , float , cudf::timestamp_ms >;
63+ using data_type = nvbench::type_list<int32_t , int64_t >;
5264
5365NVBENCH_BENCH_TYPES (nvbench_stable_distinct, NVBENCH_TYPE_AXES(data_type))
5466 .set_name(" stable_distinct" )
5567 .set_type_axes_names({" Type" })
56- .add_int64_axis(" NumRows" , {10'000 , 100'000 , 1'000'000 , 10'000'000 });
68+ .add_string_axis(" keep" , {" any" , " first" , " last" , " none" })
69+ .add_int64_axis(" cardinality" , {100 , 100'000 , 10'000'000 , 1'000'000'000 })
70+ .add_int64_axis(" NumRows" , {100 , 100'000 , 10'000'000 , 1'000'000'000 });
5771
5872template <typename Type>
5973void nvbench_stable_distinct_list (nvbench::state& state, nvbench::type_list<Type>)
6074{
6175 auto const size = state.get_int64 (" ColumnSize" );
6276 auto const dtype = cudf::type_to_id<Type>();
6377 double const null_probability = state.get_float64 (" null_probability" );
78+ auto const keep = get_keep (state.get_string (" keep" ));
6479
6580 auto builder = data_profile_builder ().null_probability (null_probability);
6681 if (dtype == cudf::type_id::LIST) {
@@ -80,17 +95,15 @@ void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list<Type
8095
8196 state.set_cuda_stream (nvbench::make_cuda_stream_view (cudf::get_default_stream ().value ()));
8297 state.exec (nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
83- auto result = cudf::stable_distinct (*table,
84- {0 },
85- cudf::duplicate_keep_option::KEEP_ANY,
86- cudf::null_equality::EQUAL,
87- cudf::nan_equality::ALL_EQUAL);
98+ auto result = cudf::stable_distinct (
99+ *table, {0 }, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL);
88100 });
89101}
90102
91103NVBENCH_BENCH_TYPES (nvbench_stable_distinct_list,
92104 NVBENCH_TYPE_AXES (nvbench::type_list<int32_t , cudf::list_view>))
93105 .set_name(" stable_distinct_list" )
94106 .set_type_axes_names({" Type" })
107+ .add_string_axis(" keep" , {" any" , " first" , " last" , " none" })
95108 .add_float64_axis(" null_probability" , {0.0 , 0.1 })
96109 .add_int64_axis(" ColumnSize" , {100'000'000 });
0 commit comments