@@ -237,38 +237,33 @@ PointSearch<DeviceType>::PointSearch(
237
237
auto topo_size_host = Kokkos::create_mirror_view ( topo_size );
238
238
Kokkos::deep_copy ( topo_size_host, topo_size );
239
239
240
- // Now that we know the size, allocate all the Views.
241
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO>
242
- filtered_per_topo_cell_indices;
243
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO>
244
- filtered_per_topo_query_ids;
245
- std::array<Kokkos::View<double **, DeviceType>, DTK_N_TOPO>
246
- filtered_per_topo_reference_points;
247
- std::array<Kokkos::View<bool *, DeviceType>, DTK_N_TOPO>
248
- filtered_per_topo_point_in_cell;
249
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO>
250
- filtered_per_topo_ranks;
240
+ std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO> filtered_ranks;
251
241
// Check if the points are in the cells
252
242
for ( unsigned int topo_id = 0 ; topo_id < DTK_N_TOPO; ++topo_id )
253
243
if ( block_cells[topo_id].extent ( 0 ) != 0 )
254
244
{
255
- std::tie ( filtered_per_topo_cell_indices[topo_id],
256
- filtered_per_topo_query_ids[topo_id],
257
- filtered_per_topo_reference_points[topo_id],
258
- filtered_per_topo_point_in_cell[topo_id],
259
- filtered_per_topo_ranks[topo_id] ) =
245
+ Kokkos::View<int *, DeviceType> filtered_per_topo_cell_indices;
246
+ Kokkos::View<int *, DeviceType> filtered_per_topo_query_ids;
247
+ Kokkos::View<double **, DeviceType>
248
+ filtered_per_topo_reference_points;
249
+ Kokkos::View<bool *, DeviceType> filtered_per_topo_point_in_cell;
250
+ Kokkos::View<int *, DeviceType> filtered_per_topo_ranks;
251
+ std::tie (
252
+ filtered_per_topo_cell_indices, filtered_per_topo_query_ids,
253
+ filtered_per_topo_reference_points,
254
+ filtered_per_topo_point_in_cell, filtered_per_topo_ranks ) =
260
255
performPointInCell ( block_cells[topo_id], bounding_box_to_cell,
261
256
imported_cell_indices, imported_points,
262
257
imported_query_ids, imported_ranks, topo,
263
258
topo_id, topo_size_host ( topo_id ) );
264
- }
265
259
266
- // Filter the points. Only keep the points that are in cell
267
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO> filtered_ranks;
268
- filterInCell ( filtered_per_topo_point_in_cell,
269
- filtered_per_topo_reference_points,
270
- filtered_per_topo_cell_indices, filtered_per_topo_query_ids,
271
- filtered_per_topo_ranks, filtered_ranks );
260
+ // Filter the points. Only keep the points that are in cell
261
+ filtered_ranks[topo_id] = filterInCell (
262
+ filtered_per_topo_point_in_cell,
263
+ filtered_per_topo_reference_points,
264
+ filtered_per_topo_cell_indices, filtered_per_topo_query_ids,
265
+ filtered_per_topo_ranks, topo_id );
266
+ }
272
267
273
268
// Build the _source_to_target_distributor
274
269
build_distributor ( filtered_ranks );
@@ -486,83 +481,70 @@ PointSearch<DeviceType>::filterTopology(
486
481
}
487
482
488
483
template <typename DeviceType>
489
- void PointSearch<DeviceType>::filterInCell(
490
- std::array<Kokkos::View<bool *, DeviceType>, DTK_N_TOPO> const
491
- &filtered_per_topo_point_in_cell,
492
- std::array<Kokkos::View<double **, DeviceType>, DTK_N_TOPO> const
493
- &filtered_per_topo_reference_points,
494
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO> const
495
- &filtered_per_topo_cell_indices,
496
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO> const
497
- &filtered_per_topo_query_ids,
498
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO> const &ranks,
499
- std::array<Kokkos::View<int *, DeviceType>, DTK_N_TOPO> &filtered_ranks )
484
+ Kokkos::View<int *, DeviceType> PointSearch<DeviceType>::filterInCell(
485
+ Kokkos::View<bool *, DeviceType> filtered_per_topo_point_in_cell,
486
+ Kokkos::View<double **, DeviceType> filtered_per_topo_reference_points,
487
+ Kokkos::View<int *, DeviceType> filtered_per_topo_cell_indices,
488
+ Kokkos::View<int *, DeviceType> filtered_per_topo_query_ids,
489
+ Kokkos::View<int *, DeviceType> filtered_per_topo_ranks,
490
+ unsigned int topo_id )
500
491
{
501
492
using ExecutionSpace = typename DeviceType::execution_space;
502
493
unsigned int dim = _dim;
503
494
504
- for ( unsigned int topo_id = 0 ; topo_id < DTK_N_TOPO; ++topo_id )
495
+ Kokkos::View<int *, DeviceType> filtered_ranks;
496
+ unsigned int n_ref_points = filtered_per_topo_point_in_cell.extent ( 0 );
497
+ if ( n_ref_points != 0 )
505
498
{
506
- unsigned int n_ref_points =
507
- filtered_per_topo_point_in_cell[topo_id].extent ( 0 );
508
- if ( n_ref_points != 0 )
509
- {
510
- int n_filtered_ref_points = 0 ;
511
- Kokkos::View<bool *, DeviceType> pt_in_cell =
512
- filtered_per_topo_point_in_cell[topo_id];
513
- Kokkos::parallel_reduce (
514
- DTK_MARK_REGION ( " compute_n_ref_pts" ),
515
- Kokkos::RangePolicy<ExecutionSpace>( 0 , n_ref_points ),
516
- KOKKOS_LAMBDA ( int i, int &partial_sum ) {
517
- if ( pt_in_cell[i] == true )
518
- partial_sum += 1 ;
519
- },
520
- n_filtered_ref_points );
521
-
522
- // We are only interested in points that belong to the cells. So we
523
- // need to filter out all the points that were false positive of
524
- // the distributed search.
525
- Kokkos::realloc ( _reference_points[topo_id], n_filtered_ref_points,
526
- _dim );
527
- Kokkos::realloc ( _query_ids[topo_id], n_filtered_ref_points );
528
- Kokkos::realloc ( _cell_indices[topo_id], n_filtered_ref_points );
529
- Kokkos::realloc ( filtered_ranks[topo_id], n_filtered_ref_points );
530
-
531
- // We cannot use private member in a lambda function with CUDA
532
- Kokkos::View<Coordinate **, DeviceType> ref_points =
533
- _reference_points[topo_id];
534
- Kokkos::View<int *, DeviceType> query_ids = _query_ids[topo_id];
535
- Kokkos::View<int *, DeviceType> cell_indices =
536
- _cell_indices[topo_id];
537
-
538
- Kokkos::View<unsigned int *, DeviceType> offset ( " offset" ,
539
- n_ref_points );
540
- Discretization::Helpers::computeOffset ( pt_in_cell, true , offset );
541
- auto filtered_reference_points =
542
- filtered_per_topo_reference_points[topo_id];
543
- auto filtered_query_ids = filtered_per_topo_query_ids[topo_id];
544
- auto filtered_cell_indices =
545
- filtered_per_topo_cell_indices[topo_id];
546
- auto f_ranks = filtered_ranks[topo_id];
547
- auto r = ranks[topo_id];
548
- Kokkos::parallel_for (
549
- DTK_MARK_REGION ( " filter" ),
550
- Kokkos::RangePolicy<ExecutionSpace>( 0 , n_ref_points ),
551
- KOKKOS_LAMBDA ( int const i ) {
552
- if ( pt_in_cell[i] )
553
- {
554
- unsigned int k = offset ( i );
555
- for ( unsigned int d = 0 ; d < dim; ++d )
556
- ref_points ( k, d ) =
557
- filtered_reference_points ( i, d );
558
- query_ids ( k ) = filtered_query_ids ( i );
559
- cell_indices ( k ) = filtered_cell_indices ( i );
560
- f_ranks ( k ) = r ( i );
561
- }
562
- } );
563
- Kokkos::fence ();
564
- }
499
+ int n_filtered_ref_points = 0 ;
500
+ Kokkos::View<bool *, DeviceType> pt_in_cell =
501
+ filtered_per_topo_point_in_cell;
502
+ Kokkos::parallel_reduce (
503
+ DTK_MARK_REGION ( " compute_n_ref_pts" ),
504
+ Kokkos::RangePolicy<ExecutionSpace>( 0 , n_ref_points ),
505
+ KOKKOS_LAMBDA ( int i, int &partial_sum ) {
506
+ if ( pt_in_cell[i] == true )
507
+ partial_sum += 1 ;
508
+ },
509
+ n_filtered_ref_points );
510
+
511
+ // We are only interested in points that belong to the cells. So we
512
+ // need to filter out all the points that were false positive of
513
+ // the distributed search.
514
+ Kokkos::realloc ( _reference_points[topo_id], n_filtered_ref_points,
515
+ _dim );
516
+ Kokkos::realloc ( _query_ids[topo_id], n_filtered_ref_points );
517
+ Kokkos::realloc ( _cell_indices[topo_id], n_filtered_ref_points );
518
+ Kokkos::realloc ( filtered_ranks, n_filtered_ref_points );
519
+
520
+ // We cannot use private member in a lambda function with CUDA
521
+ Kokkos::View<Coordinate **, DeviceType> ref_points =
522
+ _reference_points[topo_id];
523
+ Kokkos::View<int *, DeviceType> query_ids = _query_ids[topo_id];
524
+ Kokkos::View<int *, DeviceType> cell_indices = _cell_indices[topo_id];
525
+
526
+ Kokkos::View<unsigned int *, DeviceType> offset ( " offset" ,
527
+ n_ref_points );
528
+ Discretization::Helpers::computeOffset ( pt_in_cell, true , offset );
529
+ Kokkos::parallel_for (
530
+ DTK_MARK_REGION ( " filter" ),
531
+ Kokkos::RangePolicy<ExecutionSpace>( 0 , n_ref_points ),
532
+ KOKKOS_LAMBDA ( int const i ) {
533
+ if ( pt_in_cell[i] )
534
+ {
535
+ unsigned int k = offset ( i );
536
+ for ( unsigned int d = 0 ; d < dim; ++d )
537
+ ref_points ( k, d ) =
538
+ filtered_per_topo_reference_points ( i, d );
539
+ query_ids ( k ) = filtered_per_topo_query_ids ( i );
540
+ cell_indices ( k ) = filtered_per_topo_cell_indices ( i );
541
+ filtered_ranks ( k ) = filtered_per_topo_ranks ( i );
542
+ }
543
+ } );
544
+ Kokkos::fence ();
565
545
}
546
+
547
+ return filtered_ranks;
566
548
}
567
549
568
550
template <typename DeviceType>
@@ -579,11 +561,11 @@ PointSearch<DeviceType>::performPointInCell(
579
561
Kokkos::View<unsigned int *, DeviceType> topo, unsigned int topo_id,
580
562
unsigned int size )
581
563
{
564
+ // Filter the data for a given topology
582
565
Kokkos::View<double **, DeviceType> filtered_per_topo_points;
583
566
Kokkos::View<int *, DeviceType> filtered_per_topo_cell_indices;
584
567
Kokkos::View<int *, DeviceType> filtered_per_topo_query_ids;
585
568
Kokkos::View<int *, DeviceType> filtered_per_topo_ranks;
586
-
587
569
std::tie ( filtered_per_topo_cell_indices, filtered_per_topo_points,
588
570
filtered_per_topo_query_ids, filtered_per_topo_ranks ) =
589
571
filterTopology ( topo, topo_id, size, bounding_box_to_cell,
0 commit comments