@@ -57,7 +57,7 @@ static void generate_keys(OutputIt output_begin, OutputIt output_end)
5757
5858static void gen_final_size (benchmark::internal::Benchmark* b)
5959{
60- for (auto size = 10'000'000 ; size <= 150 '000'000 ; size += 20'000'000 ) {
60+ for (auto size = 10'000'000 ; size <= 310 '000'000 ; size += 20'000'000 ) {
6161 b->Args ({size});
6262 }
6363}
@@ -135,6 +135,128 @@ static void BM_dynamic_search_all(::benchmark::State& state)
135135 int64_t (state.range (0 )));
136136}
137137
138+ template <typename Key, typename Value, dist_type Dist>
139+ static void BM_dynamic_search_none (::benchmark::State& state)
140+ {
141+ using map_type = cuco::dynamic_map<Key, Value>;
142+
143+ std::size_t num_keys = state.range (0 );
144+ std::size_t initial_size = 1 << 27 ;
145+
146+ std::vector<Key> h_keys (num_keys);
147+ std::vector<cuco::pair_type<Key, Value>> h_pairs (num_keys);
148+
149+ generate_keys<Dist, Key>(h_keys.begin (), h_keys.end ());
150+
151+ for (std::size_t i = 0 ; i < num_keys; ++i) {
152+ Key key = h_keys[i] + num_keys;
153+ Value val = h_keys[i] + num_keys;
154+ h_pairs[i].first = key;
155+ h_pairs[i].second = val;
156+ }
157+
158+ thrust::device_vector<Key> d_keys (h_keys);
159+ thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs (h_pairs);
160+ thrust::device_vector<Value> d_results (num_keys);
161+
162+ map_type map{initial_size, cuco::empty_key<Key>{-1 }, cuco::empty_value<Value>{-1 }};
163+ map.insert (d_pairs.begin (), d_pairs.end ());
164+
165+ for (auto _ : state) {
166+ cuda_event_timer raii{state};
167+ map.find (d_keys.begin (), d_keys.end (), d_results.begin ());
168+ }
169+
170+ state.SetBytesProcessed ((sizeof (Key) + sizeof (Value)) * int64_t (state.iterations ()) *
171+ int64_t (state.range (0 )));
172+ }
173+
174+ template <typename Key, typename Value, dist_type Dist>
175+ static void BM_dynamic_erase_all (::benchmark::State& state)
176+ {
177+ using map_type = cuco::dynamic_map<Key, Value>;
178+
179+ std::size_t num_keys = state.range (0 );
180+ std::size_t initial_size = 1 << 27 ;
181+
182+ std::vector<Key> h_keys (num_keys);
183+ std::vector<cuco::pair_type<Key, Value>> h_pairs (num_keys);
184+
185+ generate_keys<Dist, Key>(h_keys.begin (), h_keys.end ());
186+
187+ for (uint32_t i = 0 ; i < num_keys; ++i) {
188+ Key key = h_keys[i];
189+ Value val = h_keys[i];
190+ h_pairs[i].first = key;
191+ h_pairs[i].second = val;
192+ }
193+
194+ thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs (h_pairs);
195+ thrust::device_vector<Key> d_keys (h_keys);
196+
197+ std::size_t batch_size = 1E6 ;
198+ for (auto _ : state) {
199+ map_type map{initial_size,
200+ cuco::empty_key<Key>{-1 },
201+ cuco::empty_value<Value>{-1 },
202+ cuco::erased_key<Key>{-2 }};
203+ for (uint32_t i = 0 ; i < num_keys; i += batch_size) {
204+ map.insert (d_pairs.begin () + i, d_pairs.begin () + i + batch_size);
205+ }
206+ {
207+ cuda_event_timer raii{state};
208+ for (uint32_t i = 0 ; i < num_keys; i += batch_size) {
209+ map.erase (d_keys.begin () + i, d_keys.begin () + i + batch_size);
210+ }
211+ }
212+ }
213+
214+ state.SetBytesProcessed ((sizeof (Key) + sizeof (Value)) * int64_t (state.iterations ()) *
215+ int64_t (state.range (0 )));
216+ }
217+
218+ template <typename Key, typename Value, dist_type Dist>
219+ static void BM_dynamic_erase_none (::benchmark::State& state)
220+ {
221+ using map_type = cuco::dynamic_map<Key, Value>;
222+
223+ std::size_t num_keys = state.range (0 );
224+ std::size_t initial_size = 1 << 27 ;
225+
226+ std::vector<Key> h_keys (num_keys);
227+ std::vector<cuco::pair_type<Key, Value>> h_pairs (num_keys);
228+
229+ generate_keys<Dist, Key>(h_keys.begin (), h_keys.end ());
230+
231+ for (std::size_t i = 0 ; i < num_keys; ++i) {
232+ Key key = h_keys[i] + num_keys;
233+ Value val = h_keys[i] + num_keys;
234+ h_pairs[i].first = key;
235+ h_pairs[i].second = val;
236+ }
237+
238+ thrust::device_vector<cuco::pair_type<Key, Value>> d_pairs (h_pairs);
239+ thrust::device_vector<Key> d_keys (h_keys);
240+
241+ std::size_t batch_size = 1E6 ;
242+ for (auto _ : state) {
243+ map_type map{initial_size,
244+ cuco::empty_key<Key>{-1 },
245+ cuco::empty_value<Value>{-1 },
246+ cuco::erased_key<Key>{-2 }};
247+ for (std::size_t i = 0 ; i < num_keys; i += batch_size) {
248+ map.insert (d_pairs.begin () + i, d_pairs.begin () + i + batch_size);
249+ }
250+ {
251+ cuda_event_timer raii{state};
252+ map.erase (d_keys.begin (), d_keys.end ());
253+ }
254+ }
255+
256+ state.SetBytesProcessed ((sizeof (Key) + sizeof (Value)) * int64_t (state.iterations ()) *
257+ int64_t (state.range (0 )));
258+ }
259+
138260BENCHMARK_TEMPLATE (BM_dynamic_insert, int32_t , int32_t , dist_type::UNIQUE)
139261 ->Unit(benchmark::kMillisecond )
140262 ->Apply(gen_final_size)
@@ -145,32 +267,37 @@ BENCHMARK_TEMPLATE(BM_dynamic_search_all, int32_t, int32_t, dist_type::UNIQUE)
145267 ->Apply(gen_final_size)
146268 ->UseManualTime();
147269
148- BENCHMARK_TEMPLATE (BM_dynamic_insert , int32_t , int32_t , dist_type::UNIFORM )
270+ BENCHMARK_TEMPLATE (BM_dynamic_erase_all , int32_t , int32_t , dist_type::UNIQUE )
149271 ->Unit(benchmark::kMillisecond )
150272 ->Apply(gen_final_size)
151273 ->UseManualTime();
152274
153- BENCHMARK_TEMPLATE (BM_dynamic_search_all, int32_t , int32_t , dist_type::UNIFORM )
275+ BENCHMARK_TEMPLATE (BM_dynamic_insert, int64_t , int64_t , dist_type::UNIQUE )
154276 ->Unit(benchmark::kMillisecond )
155277 ->Apply(gen_final_size)
156278 ->UseManualTime();
157279
158- BENCHMARK_TEMPLATE (BM_dynamic_insert, int32_t , int32_t , dist_type::GAUSSIAN )
280+ BENCHMARK_TEMPLATE (BM_dynamic_search_all, int64_t , int64_t , dist_type::UNIQUE )
159281 ->Unit(benchmark::kMillisecond )
160282 ->Apply(gen_final_size)
161283 ->UseManualTime();
162284
163- BENCHMARK_TEMPLATE (BM_dynamic_search_all, int32_t , int32_t , dist_type::GAUSSIAN )
285+ BENCHMARK_TEMPLATE (BM_dynamic_erase_all, int64_t , int64_t , dist_type::UNIQUE )
164286 ->Unit(benchmark::kMillisecond )
165287 ->Apply(gen_final_size)
166288 ->UseManualTime();
167289
168- BENCHMARK_TEMPLATE (BM_dynamic_insert, int64_t , int64_t , dist_type::UNIQUE )
290+ BENCHMARK_TEMPLATE (BM_dynamic_insert, int32_t , int32_t , dist_type::UNIFORM )
169291 ->Unit(benchmark::kMillisecond )
170292 ->Apply(gen_final_size)
171293 ->UseManualTime();
172294
173- BENCHMARK_TEMPLATE (BM_dynamic_search_all, int64_t , int64_t , dist_type::UNIQUE)
295+ BENCHMARK_TEMPLATE (BM_dynamic_search_all, int32_t , int32_t , dist_type::UNIFORM)
296+ ->Unit(benchmark::kMillisecond )
297+ ->Apply(gen_final_size)
298+ ->UseManualTime();
299+
300+ BENCHMARK_TEMPLATE (BM_dynamic_erase_all, int32_t , int32_t , dist_type::UNIFORM)
174301 ->Unit(benchmark::kMillisecond )
175302 ->Apply(gen_final_size)
176303 ->UseManualTime();
@@ -185,6 +312,26 @@ BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::UNIFORM)
185312 ->Apply(gen_final_size)
186313 ->UseManualTime();
187314
315+ BENCHMARK_TEMPLATE (BM_dynamic_erase_all, int64_t , int64_t , dist_type::UNIFORM)
316+ ->Unit(benchmark::kMillisecond )
317+ ->Apply(gen_final_size)
318+ ->UseManualTime();
319+
320+ BENCHMARK_TEMPLATE (BM_dynamic_insert, int32_t , int32_t , dist_type::GAUSSIAN)
321+ ->Unit(benchmark::kMillisecond )
322+ ->Apply(gen_final_size)
323+ ->UseManualTime();
324+
325+ BENCHMARK_TEMPLATE (BM_dynamic_search_all, int32_t , int32_t , dist_type::GAUSSIAN)
326+ ->Unit(benchmark::kMillisecond )
327+ ->Apply(gen_final_size)
328+ ->UseManualTime();
329+
330+ BENCHMARK_TEMPLATE (BM_dynamic_erase_all, int32_t , int32_t , dist_type::GAUSSIAN)
331+ ->Unit(benchmark::kMillisecond )
332+ ->Apply(gen_final_size)
333+ ->UseManualTime();
334+
188335BENCHMARK_TEMPLATE (BM_dynamic_insert, int64_t , int64_t , dist_type::GAUSSIAN)
189336 ->Unit(benchmark::kMillisecond )
190337 ->Apply(gen_final_size)
@@ -194,3 +341,19 @@ BENCHMARK_TEMPLATE(BM_dynamic_search_all, int64_t, int64_t, dist_type::GAUSSIAN)
194341 ->Unit(benchmark::kMillisecond )
195342 ->Apply(gen_final_size)
196343 ->UseManualTime();
344+
345+ BENCHMARK_TEMPLATE (BM_dynamic_erase_all, int64_t , int64_t , dist_type::GAUSSIAN)
346+ ->Unit(benchmark::kMillisecond )
347+ ->Apply(gen_final_size)
348+ ->UseManualTime();
349+
350+ // TODO: comprehensive tests for erase_none and search_none?
351+ BENCHMARK_TEMPLATE (BM_dynamic_search_none, int32_t , int32_t , dist_type::UNIFORM)
352+ ->Unit(benchmark::kMillisecond )
353+ ->Apply(gen_final_size)
354+ ->UseManualTime();
355+
356+ BENCHMARK_TEMPLATE (BM_dynamic_erase_none, int32_t , int32_t , dist_type::UNIFORM)
357+ ->Unit(benchmark::kMillisecond )
358+ ->Apply(gen_final_size)
359+ ->UseManualTime();
0 commit comments