@@ -124,7 +124,7 @@ namespace Portable
124124 * Constructor.
125125 */
126126 DEAL_II_HOST_DEVICE
127- FEEvaluation (const data_type *data, SharedData<dim, Number> *shdata );
127+ explicit FEEvaluation (const data_type *data);
128128
129129 /* *
130130 * Return the index of the current cell.
@@ -264,9 +264,10 @@ namespace Portable
264264 apply_for_each_quad_point (const Functor &func);
265265
266266 private:
267- const data_type *data;
268- SharedData<dim, Number> *shared_data;
269- int cell_id;
267+ const typename MatrixFree<dim, Number>::Data *dataa;
268+ const typename MatrixFree<dim, Number>::PrecomputedData *precomputed_data;
269+ SharedData<dim, Number> *shared_data;
270+ int cell_id;
270271 };
271272
272273
@@ -278,10 +279,11 @@ namespace Portable
278279 typename Number>
279280 DEAL_II_HOST_DEVICE
280281 FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::
281- FEEvaluation (const data_type *data, SharedData<dim, Number> *shdata)
282- : data(data)
283- , shared_data(shdata)
284- , cell_id(shared_data->team_member.league_rank())
282+ FEEvaluation (const data_type *data)
283+ : dataa(data)
284+ , precomputed_data(dataa->precomputed_data)
285+ , shared_data(dataa->shared_data)
286+ , cell_id(data->team_member.league_rank())
285287 {}
286288
287289
@@ -313,7 +315,7 @@ namespace Portable
313315 FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::
314316 get_matrix_free_data ()
315317 {
316- return data ;
318+ return dataa ;
317319 }
318320
319321
@@ -328,22 +330,22 @@ namespace Portable
328330 read_dof_values (const Number *src)
329331 {
330332 // Populate the scratch memory
331- Kokkos::parallel_for (Kokkos::TeamThreadRange (shared_data ->team_member ,
333+ Kokkos::parallel_for (Kokkos::TeamThreadRange (dataa ->team_member ,
332334 tensor_dofs_per_component),
333335 [&](const int &i) {
334336 for (unsigned int c = 0 ; c < n_components_; ++c)
335337 shared_data->values (i, c) =
336- src[data ->local_to_global (
338+ src[precomputed_data ->local_to_global (
337339 cell_id, i + tensor_dofs_per_component * c)];
338340 });
339- shared_data ->team_member .team_barrier ();
341+ dataa ->team_member .team_barrier ();
340342
341343 for (unsigned int c = 0 ; c < n_components_; ++c)
342344 {
343345 internal::resolve_hanging_nodes<dim, fe_degree, false , Number>(
344- shared_data ->team_member ,
345- data ->constraint_weights ,
346- data ->constraint_mask (cell_id * n_components + c),
346+ dataa ->team_member ,
347+ precomputed_data ->constraint_weights ,
348+ precomputed_data ->constraint_mask (cell_id * n_components + c),
347349 Kokkos::subview (shared_data->values , Kokkos::ALL, c));
348350 }
349351 }
@@ -362,32 +364,32 @@ namespace Portable
362364 for (unsigned int c = 0 ; c < n_components_; ++c)
363365 {
364366 internal::resolve_hanging_nodes<dim, fe_degree, true , Number>(
365- shared_data ->team_member ,
366- data ->constraint_weights ,
367- data ->constraint_mask (cell_id * n_components + c),
367+ dataa ->team_member ,
368+ precomputed_data ->constraint_weights ,
369+ precomputed_data ->constraint_mask (cell_id * n_components + c),
368370 Kokkos::subview (shared_data->values , Kokkos::ALL, c));
369371 }
370372
371- if (data ->use_coloring )
373+ if (precomputed_data ->use_coloring )
372374 {
373375 Kokkos::parallel_for (
374- Kokkos::TeamThreadRange (shared_data ->team_member ,
376+ Kokkos::TeamThreadRange (dataa ->team_member ,
375377 tensor_dofs_per_component),
376378 [&](const int &i) {
377379 for (unsigned int c = 0 ; c < n_components_; ++c)
378- dst[data ->local_to_global (cell_id,
379- i + tensor_dofs_per_component * c)] +=
380+ dst[precomputed_data ->local_to_global (
381+ cell_id, i + tensor_dofs_per_component * c)] +=
380382 shared_data->values (i, c);
381383 });
382384 }
383385 else
384386 {
385387 Kokkos::parallel_for (
386- Kokkos::TeamThreadRange (shared_data ->team_member ,
388+ Kokkos::TeamThreadRange (dataa ->team_member ,
387389 tensor_dofs_per_component),
388390 [&](const int &i) {
389391 for (unsigned int c = 0 ; c < n_components_; ++c)
390- Kokkos::atomic_add (&dst[data ->local_to_global (
392+ Kokkos::atomic_add (&dst[precomputed_data ->local_to_global (
391393 cell_id, i + (tensor_dofs_per_component)*c)],
392394 shared_data->values (i, c));
393395 });
@@ -408,28 +410,29 @@ namespace Portable
408410 using ElementType = ::dealii::internal::MatrixFreeFunctions::ElementType;
409411
410412 if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
411- data->element_type == ElementType::tensor_symmetric_collocation)
413+ precomputed_data->element_type ==
414+ ElementType::tensor_symmetric_collocation)
412415 {
413416 internal::FEEvaluationImplCollocation<dim, fe_degree, Number>::evaluate (
414- n_components, evaluation_flag, data, shared_data );
417+ n_components, evaluation_flag, dataa );
415418 }
416419 // '<=' on type means tensor_symmetric or tensor_symmetric_hermite, see
417420 // shape_info.h for more details
418421 else if (fe_degree >= 0 &&
419422 internal::use_collocation_evaluation (fe_degree, n_q_points_1d) &&
420- data ->element_type <= ElementType::tensor_symmetric)
423+ precomputed_data ->element_type <= ElementType::tensor_symmetric)
421424 {
422425 internal::FEEvaluationImplTransformToCollocation<
423426 dim,
424427 fe_degree,
425428 n_q_points_1d,
426- Number>::evaluate (n_components, evaluation_flag, data, shared_data );
429+ Number>::evaluate (n_components, evaluation_flag, dataa );
427430 }
428- else if (fe_degree >= 0 &&
429- data-> element_type <= ElementType::tensor_symmetric_no_collocation)
431+ else if (fe_degree >= 0 && precomputed_data-> element_type <=
432+ ElementType::tensor_symmetric_no_collocation)
430433 {
431434 internal::FEEvaluationImpl<dim, fe_degree, n_q_points_1d, Number>::
432- evaluate (n_components, evaluation_flag, data, shared_data );
435+ evaluate (n_components, evaluation_flag, dataa );
433436 }
434437 else
435438 {
@@ -469,28 +472,29 @@ namespace Portable
469472 using ElementType = ::dealii::internal::MatrixFreeFunctions::ElementType;
470473
471474 if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
472- data->element_type == ElementType::tensor_symmetric_collocation)
475+ precomputed_data->element_type ==
476+ ElementType::tensor_symmetric_collocation)
473477 {
474478 internal::FEEvaluationImplCollocation<dim, fe_degree, Number>::
475- integrate (n_components, integration_flag, data, shared_data );
479+ integrate (n_components, integration_flag, dataa );
476480 }
477481 // '<=' on type means tensor_symmetric or tensor_symmetric_hermite, see
478482 // shape_info.h for more details
479483 else if (fe_degree >= 0 &&
480484 internal::use_collocation_evaluation (fe_degree, n_q_points_1d) &&
481- data ->element_type <= ElementType::tensor_symmetric)
485+ precomputed_data ->element_type <= ElementType::tensor_symmetric)
482486 {
483487 internal::FEEvaluationImplTransformToCollocation<
484488 dim,
485489 fe_degree,
486490 n_q_points_1d,
487- Number>::integrate (n_components, integration_flag, data, shared_data );
491+ Number>::integrate (n_components, integration_flag, dataa );
488492 }
489- else if (fe_degree >= 0 &&
490- data-> element_type <= ElementType::tensor_symmetric_no_collocation)
493+ else if (fe_degree >= 0 && precomputed_data-> element_type <=
494+ ElementType::tensor_symmetric_no_collocation)
491495 {
492496 internal::FEEvaluationImpl<dim, fe_degree, n_q_points_1d, Number>::
493- integrate (n_components, integration_flag, data, shared_data );
497+ integrate (n_components, integration_flag, dataa );
494498 }
495499 else
496500 {
@@ -588,13 +592,14 @@ namespace Portable
588592 Assert (q_point >= 0 && q_point < n_q_points, ExcInternalError ());
589593 if constexpr (n_components_ == 1 )
590594 {
591- shared_data->values (q_point, 0 ) = val_in * data->JxW (cell_id, q_point);
595+ shared_data->values (q_point, 0 ) =
596+ val_in * precomputed_data->JxW (cell_id, q_point);
592597 }
593598 else
594599 {
595600 for (unsigned int c = 0 ; c < n_components; ++c)
596601 shared_data->values (q_point, c) =
597- val_in[c] * data ->JxW (cell_id, q_point);
602+ val_in[c] * precomputed_data ->JxW (cell_id, q_point);
598603 }
599604 }
600605
@@ -645,8 +650,9 @@ namespace Portable
645650 {
646651 Number tmp = 0 .;
647652 for (unsigned int d_2 = 0 ; d_2 < dim; ++d_2)
648- tmp += data->inv_jacobian (cell_id, q_point, d_2, d_1) *
649- shared_data->gradients (q_point, d_2, 0 );
653+ tmp +=
654+ precomputed_data->inv_jacobian (cell_id, q_point, d_2, d_1) *
655+ shared_data->gradients (q_point, d_2, 0 );
650656 grad[d_1] = tmp;
651657 }
652658 }
@@ -657,8 +663,9 @@ namespace Portable
657663 {
658664 Number tmp = 0 .;
659665 for (unsigned int d_2 = 0 ; d_2 < dim; ++d_2)
660- tmp += data->inv_jacobian (cell_id, q_point, d_2, d_1) *
661- shared_data->gradients (q_point, d_2, c);
666+ tmp +=
667+ precomputed_data->inv_jacobian (cell_id, q_point, d_2, d_1) *
668+ shared_data->gradients (q_point, d_2, c);
662669 grad[c][d_1] = tmp;
663670 }
664671 }
@@ -685,9 +692,10 @@ namespace Portable
685692 Number tmp = 0 .;
686693 for (unsigned int d_2 = 0 ; d_2 < dim; ++d_2)
687694 tmp +=
688- data->inv_jacobian (cell_id, q_point, d_1, d_2) * grad_in[d_2];
695+ precomputed_data->inv_jacobian (cell_id, q_point, d_1, d_2) *
696+ grad_in[d_2];
689697 shared_data->gradients (q_point, d_1, 0 ) =
690- tmp * data ->JxW (cell_id, q_point);
698+ tmp * precomputed_data ->JxW (cell_id, q_point);
691699 }
692700 }
693701 else
@@ -697,10 +705,11 @@ namespace Portable
697705 {
698706 Number tmp = 0 .;
699707 for (unsigned int d_2 = 0 ; d_2 < dim; ++d_2)
700- tmp += data->inv_jacobian (cell_id, q_point, d_1, d_2) *
701- grad_in[c][d_2];
708+ tmp +=
709+ precomputed_data->inv_jacobian (cell_id, q_point, d_1, d_2) *
710+ grad_in[c][d_2];
702711 shared_data->gradients (q_point, d_1, c) =
703- tmp * data ->JxW (cell_id, q_point);
712+ tmp * precomputed_data ->JxW (cell_id, q_point);
704713 }
705714 }
706715 }
@@ -717,10 +726,10 @@ namespace Portable
717726 FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::
718727 apply_for_each_quad_point (const Functor &func)
719728 {
720- Kokkos::parallel_for (Kokkos::TeamThreadRange (shared_data ->team_member ,
729+ Kokkos::parallel_for (Kokkos::TeamThreadRange (dataa ->team_member ,
721730 n_q_points),
722731 [&](const int &i) { func (this , i); });
723- shared_data ->team_member .team_barrier ();
732+ dataa ->team_member .team_barrier ();
724733 }
725734
726735
0 commit comments