Skip to content

Commit 125e331

Browse files
authored
Merge pull request #149 from Nicolas-Iskos/dynamic_map_erase
Erase Functionality for dynamic_map
2 parents 31e1d5d + f5ec677 commit 125e331

File tree

11 files changed

+894
-98
lines changed

11 files changed

+894
-98
lines changed

benchmarks/hash_table/dynamic_map_bench.cu

Lines changed: 170 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ static void generate_keys(OutputIt output_begin, OutputIt output_end)
5757

5858
static void gen_final_size(benchmark::internal::Benchmark* b)
5959
{
60-
for (auto size = 10'000'000; size <= 150'000'000; size += 20'000'000) {
60+
for (auto size = 10'000'000; size <= 310'000'000; size += 20'000'000) {
6161
b->Args({size});
6262
}
6363
}
@@ -135,6 +135,128 @@ static void BM_dynamic_search_all(::benchmark::State& state)
135135
int64_t(state.range(0)));
136136
}
137137

138+
template <typename Key, typename Value, dist_type Dist>
139+
static void BM_dynamic_search_none(::benchmark::State& state)
140+
{
141+
using map_type = cuco::dynamic_map<Key, Value>;
142+
143+
std::size_t num_keys = state.range(0);
144+
std::size_t initial_size = 1 << 27;
145+
146+
std::vector<Key> h_keys(num_keys);
147+
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);
148+
149+
generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());
150+
151+
for (std::size_t i = 0; i < num_keys; ++i) {
152+
Key key = h_keys[i] + num_keys;
153+
Value val = h_keys[i] + num_keys;
154+
h_pairs[i].first = key;
155+
h_pairs[i].second = val;
156+
}
157+
158+
thrust::device_vector<Key> d_keys(h_keys);
159+
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
160+
thrust::device_vector<Value> d_results(num_keys);
161+
162+
map_type map{initial_size, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
163+
map.insert(d_pairs.begin(), d_pairs.end());
164+
165+
for (auto _ : state) {
166+
cuda_event_timer raii{state};
167+
map.find(d_keys.begin(), d_keys.end(), d_results.begin());
168+
}
169+
170+
state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
171+
int64_t(state.range(0)));
172+
}
173+
174+
template <typename Key, typename Value, dist_type Dist>
175+
static void BM_dynamic_erase_all(::benchmark::State& state)
176+
{
177+
using map_type = cuco::dynamic_map<Key, Value>;
178+
179+
std::size_t num_keys = state.range(0);
180+
std::size_t initial_size = 1 << 27;
181+
182+
std::vector<Key> h_keys(num_keys);
183+
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);
184+
185+
generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());
186+
187+
for (uint32_t i = 0; i < num_keys; ++i) {
188+
Key key = h_keys[i];
189+
Value val = h_keys[i];
190+
h_pairs[i].first = key;
191+
h_pairs[i].second = val;
192+
}
193+
194+
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
195+
thrust::device_vector<Key> d_keys(h_keys);
196+
197+
std::size_t batch_size = 1E6;
198+
for (auto _ : state) {
199+
map_type map{initial_size,
200+
cuco::empty_key<Key>{-1},
201+
cuco::empty_value<Value>{-1},
202+
cuco::erased_key<Key>{-2}};
203+
for (uint32_t i = 0; i < num_keys; i += batch_size) {
204+
map.insert(d_pairs.begin() + i, d_pairs.begin() + i + batch_size);
205+
}
206+
{
207+
cuda_event_timer raii{state};
208+
for (uint32_t i = 0; i < num_keys; i += batch_size) {
209+
map.erase(d_keys.begin() + i, d_keys.begin() + i + batch_size);
210+
}
211+
}
212+
}
213+
214+
state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
215+
int64_t(state.range(0)));
216+
}
217+
218+
template <typename Key, typename Value, dist_type Dist>
219+
static void BM_dynamic_erase_none(::benchmark::State& state)
220+
{
221+
using map_type = cuco::dynamic_map<Key, Value>;
222+
223+
std::size_t num_keys = state.range(0);
224+
std::size_t initial_size = 1 << 27;
225+
226+
std::vector<Key> h_keys(num_keys);
227+
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);
228+
229+
generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());
230+
231+
for (std::size_t i = 0; i < num_keys; ++i) {
232+
Key key = h_keys[i] + num_keys;
233+
Value val = h_keys[i] + num_keys;
234+
h_pairs[i].first = key;
235+
h_pairs[i].second = val;
236+
}
237+
238+
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
239+
thrust::device_vector<Key> d_keys(h_keys);
240+
241+
std::size_t batch_size = 1E6;
242+
for (auto _ : state) {
243+
map_type map{initial_size,
244+
cuco::empty_key<Key>{-1},
245+
cuco::empty_value<Value>{-1},
246+
cuco::erased_key<Key>{-2}};
247+
for (std::size_t i = 0; i < num_keys; i += batch_size) {
248+
map.insert(d_pairs.begin() + i, d_pairs.begin() + i + batch_size);
249+
}
250+
{
251+
cuda_event_timer raii{state};
252+
map.erase(d_keys.begin(), d_keys.end());
253+
}
254+
}
255+
256+
state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
257+
int64_t(state.range(0)));
258+
}
259+
138260
BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIQUE)
139261
->Unit(benchmark::kMillisecond)
140262
->Apply(gen_final_size)
@@ -145,32 +267,37 @@ BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIQUE)
145267
->Apply(gen_final_size)
146268
->UseManualTime();
147269

148-
BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIFORM)
270+
BENCHMARK_TEMPLATE(BM_dynamic_erase_all, int32_t, int32_t, dist_type::UNIQUE)
149271
->Unit(benchmark::kMillisecond)
150272
->Apply(gen_final_size)
151273
->UseManualTime();
152274

153-
BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIFORM)
275+
BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::UNIQUE)
154276
->Unit(benchmark::kMillisecond)
155277
->Apply(gen_final_size)
156278
->UseManualTime();
157279

158-
BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::GAUSSIAN)
280+
BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIQUE)
159281
->Unit(benchmark::kMillisecond)
160282
->Apply(gen_final_size)
161283
->UseManualTime();
162284

163-
BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::GAUSSIAN)
285+
BENCHMARK_TEMPLATE(BM_dynamic_erase_all, int64_t, int64_t, dist_type::UNIQUE)
164286
->Unit(benchmark::kMillisecond)
165287
->Apply(gen_final_size)
166288
->UseManualTime();
167289

168-
BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::UNIQUE)
290+
BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::UNIFORM)
169291
->Unit(benchmark::kMillisecond)
170292
->Apply(gen_final_size)
171293
->UseManualTime();
172294

173-
BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIQUE)
295+
BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIFORM)
296+
->Unit(benchmark::kMillisecond)
297+
->Apply(gen_final_size)
298+
->UseManualTime();
299+
300+
BENCHMARK_TEMPLATE(BM_dynamic_erase_all, int32_t, int32_t, dist_type::UNIFORM)
174301
->Unit(benchmark::kMillisecond)
175302
->Apply(gen_final_size)
176303
->UseManualTime();
@@ -185,6 +312,26 @@ BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIFORM)
185312
->Apply(gen_final_size)
186313
->UseManualTime();
187314

315+
BENCHMARK_TEMPLATE(BM_dynamic_erase_all, int64_t, int64_t, dist_type::UNIFORM)
316+
->Unit(benchmark::kMillisecond)
317+
->Apply(gen_final_size)
318+
->UseManualTime();
319+
320+
BENCHMARK_TEMPLATE(BM_dynamic_insert, int32_t, int32_t, dist_type::GAUSSIAN)
321+
->Unit(benchmark::kMillisecond)
322+
->Apply(gen_final_size)
323+
->UseManualTime();
324+
325+
BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::GAUSSIAN)
326+
->Unit(benchmark::kMillisecond)
327+
->Apply(gen_final_size)
328+
->UseManualTime();
329+
330+
BENCHMARK_TEMPLATE(BM_dynamic_erase_all, int32_t, int32_t, dist_type::GAUSSIAN)
331+
->Unit(benchmark::kMillisecond)
332+
->Apply(gen_final_size)
333+
->UseManualTime();
334+
188335
BENCHMARK_TEMPLATE(BM_dynamic_insert, int64_t, int64_t, dist_type::GAUSSIAN)
189336
->Unit(benchmark::kMillisecond)
190337
->Apply(gen_final_size)
@@ -194,3 +341,19 @@ BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::GAUSSIAN)
194341
->Unit(benchmark::kMillisecond)
195342
->Apply(gen_final_size)
196343
->UseManualTime();
344+
345+
BENCHMARK_TEMPLATE(BM_dynamic_erase_all, int64_t, int64_t, dist_type::GAUSSIAN)
346+
->Unit(benchmark::kMillisecond)
347+
->Apply(gen_final_size)
348+
->UseManualTime();
349+
350+
// TODO: comprehensive tests for erase_none and search_none?
351+
BENCHMARK_TEMPLATE(BM_dynamic_search_none, int32_t, int32_t, dist_type::UNIFORM)
352+
->Unit(benchmark::kMillisecond)
353+
->Apply(gen_final_size)
354+
->UseManualTime();
355+
356+
BENCHMARK_TEMPLATE(BM_dynamic_erase_none, int32_t, int32_t, dist_type::UNIFORM)
357+
->Unit(benchmark::kMillisecond)
358+
->Apply(gen_final_size)
359+
->UseManualTime();

benchmarks/hash_table/static_map_bench.cu

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,53 @@ static void BM_static_map_search_all(::benchmark::State& state)
155155
int64_t(state.range(0)));
156156
}
157157

158+
template <typename Key, typename Value, dist_type Dist>
159+
static void BM_static_map_search_none(::benchmark::State& state)
160+
{
161+
using map_type = cuco::static_map<Key, Value>;
162+
163+
std::size_t num_keys = state.range(0);
164+
float occupancy = state.range(1) / float{100};
165+
std::size_t size = num_keys / occupancy;
166+
167+
map_type map{size, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
168+
169+
std::vector<Key> h_keys(num_keys);
170+
std::vector<Value> h_values(num_keys);
171+
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);
172+
std::vector<Value> h_results(num_keys);
173+
174+
generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());
175+
176+
for (std::size_t i = 0; i < num_keys; ++i) {
177+
Key key = h_keys[i];
178+
Value val = h_keys[i];
179+
h_pairs[i].first = key;
180+
h_pairs[i].second = val;
181+
}
182+
183+
// diff keys
184+
for (std::size_t i = 0; i < num_keys; ++i) {
185+
h_keys[i] += num_keys;
186+
}
187+
188+
thrust::device_vector<Key> d_keys(h_keys);
189+
thrust::device_vector<Value> d_results(num_keys);
190+
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
191+
192+
map.insert(d_pairs.begin(), d_pairs.end());
193+
194+
for (auto _ : state) {
195+
map.find(d_keys.begin(), d_keys.end(), d_results.begin());
196+
// TODO: get rid of sync and rewrite the benchmark with `nvbench`
197+
// once https://github.com/NVIDIA/nvbench/pull/80 is merged
198+
cudaDeviceSynchronize();
199+
}
200+
201+
state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
202+
int64_t(state.range(0)));
203+
}
204+
158205
template <typename Key, typename Value, dist_type Dist>
159206
static void BM_static_map_erase_all(::benchmark::State& state)
160207
{
@@ -198,6 +245,52 @@ static void BM_static_map_erase_all(::benchmark::State& state)
198245
int64_t(state.range(0)));
199246
}
200247

248+
template <typename Key, typename Value, dist_type Dist>
249+
static void BM_static_map_erase_none(::benchmark::State& state)
250+
{
251+
using map_type = cuco::static_map<Key, Value>;
252+
253+
std::size_t num_keys = state.range(0);
254+
float occupancy = state.range(1) / float{100};
255+
std::size_t size = num_keys / occupancy;
256+
257+
map_type map{size, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}, cuco::erased_key{-2}};
258+
259+
std::vector<Key> h_keys(num_keys);
260+
std::vector<Value> h_values(num_keys);
261+
std::vector<cuco::pair_type<Key, Value>> h_pairs(num_keys);
262+
std::vector<Value> h_results(num_keys);
263+
264+
generate_keys<Dist, Key>(h_keys.begin(), h_keys.end());
265+
266+
for (std::size_t i = 0; i < num_keys; ++i) {
267+
Key key = h_keys[i];
268+
Value val = h_keys[i];
269+
h_pairs[i].first = key;
270+
h_pairs[i].second = val;
271+
}
272+
273+
// diff keys
274+
for (std::size_t i = 0; i < num_keys; ++i) {
275+
h_keys[i] += num_keys;
276+
}
277+
278+
thrust::device_vector<Key> d_keys(h_keys);
279+
thrust::device_vector<bool> d_results(num_keys);
280+
thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs(h_pairs);
281+
282+
for (auto _ : state) {
283+
state.PauseTiming();
284+
map.insert(d_pairs.begin(), d_pairs.end());
285+
state.ResumeTiming();
286+
287+
map.erase(d_keys.begin(), d_keys.end());
288+
}
289+
290+
state.SetBytesProcessed((sizeof(Key) + sizeof(Value)) * int64_t(state.iterations()) *
291+
int64_t(state.range(0)));
292+
}
293+
201294
BENCHMARK_TEMPLATE(BM_static_map_insert, int32_t, int32_t, dist_type::UNIQUE)
202295
->Unit(benchmark::kMillisecond)
203296
->Apply(generate_size_and_occupancy)
@@ -252,6 +345,15 @@ BENCHMARK_TEMPLATE(BM_static_map_search_all, int64_t, int64_t, dist_type::GAUSSI
252345
->Unit(benchmark::kMillisecond)
253346
->Apply(generate_size_and_occupancy);
254347

255-
BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIQUE)
348+
// TODO: comprehensive tests for erase_all, erase_none and search_none
349+
BENCHMARK_TEMPLATE(BM_static_map_erase_all, int32_t, int32_t, dist_type::UNIFORM)
350+
->Unit(benchmark::kMillisecond)
351+
->Apply(generate_size_and_occupancy);
352+
353+
BENCHMARK_TEMPLATE(BM_static_map_search_none, int32_t, int32_t, dist_type::UNIFORM)
354+
->Unit(benchmark::kMillisecond)
355+
->Apply(generate_size_and_occupancy);
356+
357+
BENCHMARK_TEMPLATE(BM_static_map_erase_none, int32_t, int32_t, dist_type::UNIFORM)
256358
->Unit(benchmark::kMillisecond)
257359
->Apply(generate_size_and_occupancy);

0 commit comments

Comments
 (0)