Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
976a746
dynamic map erase, still needs work
niskos99 Mar 9, 2022
eead8b8
minor clarity changes
niskos99 Mar 9, 2022
20ac7a3
erase bug fix
niskos99 Apr 5, 2022
ede50d6
dynamic map erase working, only 4 submaps for now
niskos99 Apr 5, 2022
1d8fbd0
type wrappers added
niskos99 Apr 5, 2022
63dd4eb
prevent implicit type conversion of sentinels during construction
niskos99 Apr 5, 2022
52d83f6
erase benchmark added
niskos99 Apr 5, 2022
0878216
num_successes managed pointer updated
niskos99 Apr 6, 2022
7eac9d1
more efficient block reduce
niskos99 Apr 6, 2022
4d10631
doc changes
niskos99 Apr 6, 2022
b59a16b
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Apr 6, 2022
b00fcba
shared mem atomics to keep track of per-submap erases
niskos99 Apr 7, 2022
c146f9d
doc improvements
niskos99 Apr 30, 2022
faf8224
warning fixes
niskos99 Aug 31, 2022
e4b548e
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Aug 31, 2022
93b7983
removed nvtx file
niskos99 Aug 31, 2022
cd21190
num_successes_ removed
niskos99 Oct 7, 2022
4c1952d
doxygen warning fixes
niskos99 Oct 7, 2022
80f4d14
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Oct 7, 2022
6616889
code cleanup
niskos99 Nov 14, 2022
2df247c
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Nov 14, 2022
af7706d
switched typedef to using
niskos99 Nov 14, 2022
54ae254
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Nov 14, 2022
593fe12
responding to PR comments
niskos99 Nov 18, 2022
7598e47
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Nov 18, 2022
ecd3945
Merge remote-tracking branch 'upstream/dev' into erase
PointKernel Dec 20, 2022
1e6ad99
Add more data types for erase tests
PointKernel Dec 20, 2022
9e324fc
Use public murmurhash
PointKernel Dec 20, 2022
bb0e4e9
Update static map benchmark: fix runtime stall bug, remove redundant …
PointKernel Dec 20, 2022
10fd08a
Update dynamic map benchmark: fix conversion warning, add search_none…
PointKernel Dec 20, 2022
788ad29
Cleanups: get rid of host-side counter vector, remove get_ prefixes a…
PointKernel Dec 20, 2022
c71dd60
Get rid of num_successes getter
PointKernel Dec 20, 2022
ab4ef0c
Fix comments
PointKernel Dec 20, 2022
d72e403
Update tests
PointKernel Dec 20, 2022
9478650
Update include/cuco/detail/dynamic_map_kernels.cuh
PointKernel Dec 20, 2022
82e0f2e
Cleanups: relaxed memory atomic, static_assert instead of runtime exp…
PointKernel Dec 21, 2022
412b7f9
Merge branch 'dynamic_map_erase' of github.com:Nicolas-Iskos/cuCollec…
PointKernel Dec 21, 2022
f5ec677
Reorder header groups + remove unused counter allocator
PointKernel Dec 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 119 additions & 41 deletions benchmarks/hash_table/dynamic_map_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ static void generate_keys(OutputIt output_begin, OutputIt output_end)

static void gen_final_size(benchmark::internal::Benchmark* b)
{
for (auto size = 10'000'000; size <= 150'000'000; size += 20'000'000) {
for (auto size = 10'000'000; size <= 310'000'000; size += 20'000'000) {
b->Args({size});
}
}
Expand Down Expand Up @@ -137,62 +137,140 @@ static void BM_dynamic_search_all(::benchmark::State& state)
int64_t(state.range(0)));
}

BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
template <typename Key, typename Value, dist_type Dist>
static void BM_dynamic_search_none(::benchmark::State& state)
{
using map_type = cuco::dynamic_map<Key, Value>;

BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
std::size_t num_keys = state.range(0);
std::size_t initial_size = 1 << 27;

BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIFORM)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
std::vector<Key> h_keys(num_keys);
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);

BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIFORM)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());

BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::GAUSSIAN)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
for (auto i = 0; i < num_keys; ++i) {
Key key = h_keys[i] + num_keys;
Value val = h_keys[i] + num_keys;
h_pairs[i].first = key;
h_pairs[i].second = val;
}

BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::GAUSSIAN)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
thrust::device_vector<Key> d_keys(h_keys);
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
thrust::device_vector<Value> d_results(num_keys);

BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
map_type map{
initial_size, cuco::sentinel::empty_key<Key>{-1}, cuco::sentinel::empty_value<Value>{-1}};
map.insert(d_pairs.begin(), d_pairs.end());

BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
for (auto _ : state) {
cuda_event_timer raii{state};
map.find(d_keys.begin(), d_keys.end(), d_results.begin());
}

BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::UNIFORM)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
int64_t(state.range(0)));
}

BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIFORM)
template <typename Key, typename Value, dist_type Dist>
static void BM_dynamic_erase_all(::benchmark::State& state)
{
using map_type = cuco::dynamic_map<Key, Value>;

std::size_t num_keys = state.range(0);
std::size_t initial_size = 1 << 27;

std::vector<Key> h_keys(num_keys);
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);

generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());

for (uint32_t i = 0; i < num_keys; ++i) {
Key key = h_keys[i];
Value val = h_keys[i];
h_pairs[i].first = key;
h_pairs[i].second = val;
}

thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
thrust::device_vector<Key> d_keys(h_keys);

std::size_t batch_size = 1E6;
for (auto _ : state) {
map_type map{initial_size,
cuco::sentinel::empty_key<Key>{-1},
cuco::sentinel::empty_value<Value>{-1},
cuco::sentinel::erased_key<Key>{-2}};
for (uint32_t i = 0; i < num_keys; i += batch_size) {
map.insert(d_pairs.begin() + i, d_pairs.begin() + i + batch_size);
}
{
cuda_event_timer raii{state};
for (uint32_t i = 0; i < num_keys; i += batch_size) {
map.erase(d_keys.begin() + i, d_keys.begin() + i + batch_size);
}
}
}

state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
int64_t(state.range(0)));
}

template <typename Key, typename Value, dist_type Dist>
static void BM_dynamic_erase_none(::benchmark::State& state)
{
using map_type = cuco::dynamic_map<Key, Value>;

std::size_t num_keys = state.range(0);
std::size_t initial_size = 1 << 27;

std::vector<Key> h_keys(num_keys);
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);

generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());

for (auto i = 0; i < num_keys; ++i) {
Key key = h_keys[i] + num_keys;
Value val = h_keys[i] + num_keys;
h_pairs[i].first = key;
h_pairs[i].second = val;
}

thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
thrust::device_vector<Key> d_keys(h_keys);

std::size_t batch_size = 1E6;
for (auto _ : state) {
map_type map{initial_size,
cuco::sentinel::empty_key<Key>{-1},
cuco::sentinel::empty_value<Value>{-1},
cuco::sentinel::erased_key<Key>{-2}};
for (auto i = 0; i < num_keys; i += batch_size) {
map.insert(d_pairs.begin() + i, d_pairs.begin() + i + batch_size);
}
{
cuda_event_timer raii{state};
map.erase(d_keys.begin(), d_keys.end());
}
}

state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
int64_t(state.range(0)));
}

BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();

BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::GAUSSIAN)
BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();

BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::GAUSSIAN)
BENCHMARK_TEMPLATE(BM_dynamic_erase_all, int32_t, int32_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(gen_final_size)
->UseManualTime();
->UseManualTime();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to keep the existing benchmarks as they are right now.

If you want, similar to multimap benchmarks, we can split the current file into several smaller ones and relocate them into a new hash_table/dynamic_map folder.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the rest of the benchmarks back in. Although even before, there wasn't any particular reason why that subset of benchmarks was being executed.

93 changes: 93 additions & 0 deletions benchmarks/hash_table/static_map_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,50 @@ static void BM_static_map_search_all(::benchmark::State& state)
int64_t(state.range(0)));
}

template <typename Key, typename Value, dist_type Dist>
static void BM_static_map_search_none(::benchmark::State& state)
{
using map_type = cuco::static_map<Key, Value>;

std::size_t num_keys = state.range(0);
float occupancy = state.range(1) / float{100};
std::size_t size = num_keys / occupancy;

map_type map{size, -1, -1};
auto view = map.get_device_mutable_view();

std::vector<Key> h_keys(num_keys);
std::vector<Value> h_values(num_keys);
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);
std::vector<Value> h_results(num_keys);

generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());

for (auto i = 0; i < num_keys; ++i) {
Key key = h_keys[i];
Value val = h_keys[i];
h_pairs[i].first = key;
h_pairs[i].second = val;
}

// diff keys
for (int i = 0; i < num_keys; ++i)
h_keys[i] += num_keys;

thrust::device_vector<Key> d_keys(h_keys);
thrust::device_vector<Value> d_results(num_keys);
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);

map.insert(d_pairs.begin(), d_pairs.end());

for (auto _ : state) {
map.find(d_keys.begin(), d_keys.end(), d_results.begin());
}

state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
int64_t(state.range(0)));
}

template <typename Key, typename Value, dist_type Dist>
static void BM_static_map_erase_all(::benchmark::State& state)
{
Expand Down Expand Up @@ -200,6 +244,55 @@ static void BM_static_map_erase_all(::benchmark::State& state)
int64_t(state.range(0)));
}

template <typename Key, typename Value, dist_type Dist>
static void BM_static_map_erase_none(::benchmark::State& state)
{
using map_type = cuco::static_map<Key, Value>;

std::size_t num_keys = state.range(0);
float occupancy = state.range(1) / float{100};
std::size_t size = num_keys / occupancy;

map_type map{size, -1, -1};
auto view = map.get_device_mutable_view();

std::vector<Key> h_keys(num_keys);
std::vector<Value> h_values(num_keys);
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);
std::vector<Value> h_results(num_keys);

generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());

for (auto i = 0; i < num_keys; ++i) {
Key key = h_keys[i];
Value val = h_keys[i];
h_pairs[i].first = key;
h_pairs[i].second = val;
}

// diff keys
for (int i = 0; i < num_keys; ++i)
h_keys[i] += num_keys;

thrust::device_vector<Key> d_keys(h_keys);
thrust::device_vector<bool> d_results(num_keys);
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);

for (auto _ : state) {
// state.ResumeTiming();
state.PauseTiming();
map.insert(d_pairs.begin(), d_pairs.end());
state.ResumeTiming();

map.erase(d_keys.begin(), d_keys.end());

// state.PauseTiming();
}

state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
int64_t(state.range(0)));
}

BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE)
->Unit(benchmark::kMillisecond)
->Apply(generate_size_and_occupancy)
Expand Down
Loading