Skip to content

Commit b363917

Browse files
committed
one commit
1 parent b556c95 commit b363917

File tree

9 files changed

+1084
-672
lines changed

9 files changed

+1084
-672
lines changed

include/deal.II/matrix_free/portable_evaluation_kernels.h

Lines changed: 620 additions & 0 deletions
Large diffs are not rendered by default.

include/deal.II/matrix_free/portable_fe_evaluation.h

Lines changed: 65 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <deal.II/base/utilities.h>
2323

2424
#include <deal.II/matrix_free/evaluation_flags.h>
25+
#include <deal.II/matrix_free/portable_evaluation_kernels.h>
2526
#include <deal.II/matrix_free/portable_hanging_nodes_internal.h>
2627
#include <deal.II/matrix_free/portable_matrix_free.h>
2728
#include <deal.II/matrix_free/portable_matrix_free.templates.h>
@@ -272,7 +273,7 @@ namespace Portable
272273
{
273274
// Populate the scratch memory
274275
Kokkos::parallel_for(
275-
Kokkos::TeamThreadRange(shared_data->team_member, n_q_points),
276+
Kokkos::TeamThreadRange(shared_data->team_member, tensor_dofs_per_cell),
276277
[&](const int &i) {
277278
for (unsigned int c = 0; c < n_components_; ++c)
278279
shared_data->values(i, c) = src[data->local_to_global(
@@ -313,7 +314,8 @@ namespace Portable
313314
if (data->use_coloring)
314315
{
315316
Kokkos::parallel_for(
316-
Kokkos::TeamThreadRange(shared_data->team_member, n_q_points),
317+
Kokkos::TeamThreadRange(shared_data->team_member,
318+
tensor_dofs_per_cell / n_components),
317319
[&](const int &i) {
318320
for (unsigned int c = 0; c < n_components_; ++c)
319321
dst[data->local_to_global(
@@ -324,7 +326,8 @@ namespace Portable
324326
else
325327
{
326328
Kokkos::parallel_for(
327-
Kokkos::TeamThreadRange(shared_data->team_member, n_q_points),
329+
Kokkos::TeamThreadRange(shared_data->team_member,
330+
tensor_dofs_per_cell / n_components),
328331
[&](const int &i) {
329332
for (unsigned int c = 0; c < n_components_; ++c)
330333
Kokkos::atomic_add(
@@ -340,50 +343,39 @@ namespace Portable
340343
template <int dim,
341344
int fe_degree,
342345
int n_q_points_1d,
343-
int n_components_,
346+
int n_components,
344347
typename Number>
345348
DEAL_II_HOST_DEVICE void
346-
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::evaluate(
347-
const EvaluationFlags::EvaluationFlags evaluate_flag)
349+
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components, Number>::evaluate(
350+
const EvaluationFlags::EvaluationFlags evaluation_flag)
348351
{
349-
// First evaluate the gradients because it requires values that will be
350-
// changed if evaluate_val is true
351-
internal::EvaluatorTensorProduct<
352-
internal::EvaluatorVariant::evaluate_general,
353-
dim,
354-
fe_degree,
355-
n_q_points_1d,
356-
Number>
357-
evaluator_tensor_product(shared_data->team_member,
358-
data->shape_values,
359-
data->shape_gradients,
360-
data->co_shape_gradients);
352+
using ElementType = ::dealii::internal::MatrixFreeFunctions::ElementType;
361353

362-
for (unsigned int c = 0; c < n_components_; ++c)
354+
if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
355+
data->element_type == ElementType::tensor_symmetric_collocation)
363356
{
364-
if ((evaluate_flag & EvaluationFlags::values) &&
365-
(evaluate_flag & EvaluationFlags::gradients))
366-
{
367-
evaluator_tensor_product.evaluate_values_and_gradients(
368-
Kokkos::subview(shared_data->values, Kokkos::ALL, c),
369-
Kokkos::subview(
370-
shared_data->gradients, Kokkos::ALL, Kokkos::ALL, c));
371-
shared_data->team_member.team_barrier();
372-
}
373-
else if (evaluate_flag & EvaluationFlags::gradients)
374-
{
375-
evaluator_tensor_product.evaluate_gradients(
376-
Kokkos::subview(shared_data->values, Kokkos::ALL, c),
377-
Kokkos::subview(
378-
shared_data->gradients, Kokkos::ALL, Kokkos::ALL, c));
379-
shared_data->team_member.team_barrier();
380-
}
381-
else if (evaluate_flag & EvaluationFlags::values)
382-
{
383-
evaluator_tensor_product.evaluate_values(
384-
Kokkos::subview(shared_data->values, Kokkos::ALL, c));
385-
shared_data->team_member.team_barrier();
386-
}
357+
internal::FEEvaluationImplCollocation<dim, fe_degree, Number>::evaluate(
358+
n_components, evaluation_flag, data, shared_data);
359+
}
360+
else if (fe_degree >= 0 &&
361+
internal::use_collocation_evaluation(fe_degree, n_q_points_1d) &&
362+
data->element_type <= ElementType::tensor_symmetric)
363+
{
364+
internal::FEEvaluationImplTransformToCollocation<
365+
dim,
366+
fe_degree,
367+
n_q_points_1d,
368+
Number>::evaluate(n_components, evaluation_flag, data, shared_data);
369+
}
370+
else if (fe_degree >= 0 &&
371+
data->element_type <= ElementType::tensor_symmetric_no_collocation)
372+
{
373+
internal::FEEvaluationImpl<dim, fe_degree, n_q_points_1d, Number>::
374+
evaluate(n_components, evaluation_flag, data, shared_data);
375+
}
376+
else
377+
{
378+
AssertThrow(false, ExcNotImplemented());
387379
}
388380
}
389381

@@ -415,42 +407,33 @@ namespace Portable
415407
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::integrate(
416408
const EvaluationFlags::EvaluationFlags integration_flag)
417409
{
418-
internal::EvaluatorTensorProduct<
419-
internal::EvaluatorVariant::evaluate_general,
420-
dim,
421-
fe_degree,
422-
n_q_points_1d,
423-
Number>
424-
evaluator_tensor_product(shared_data->team_member,
425-
data->shape_values,
426-
data->shape_gradients,
427-
data->co_shape_gradients);
428-
410+
using ElementType = ::dealii::internal::MatrixFreeFunctions::ElementType;
429411

430-
for (unsigned int c = 0; c < n_components_; ++c)
412+
if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
413+
data->element_type == ElementType::tensor_symmetric_collocation)
431414
{
432-
if ((integration_flag & EvaluationFlags::values) &&
433-
(integration_flag & EvaluationFlags::gradients))
434-
{
435-
evaluator_tensor_product.integrate_values_and_gradients(
436-
Kokkos::subview(shared_data->values, Kokkos::ALL, c),
437-
Kokkos::subview(
438-
shared_data->gradients, Kokkos::ALL, Kokkos::ALL, c));
439-
}
440-
else if (integration_flag & EvaluationFlags::values)
441-
{
442-
evaluator_tensor_product.integrate_values(
443-
Kokkos::subview(shared_data->values, Kokkos::ALL, c));
444-
shared_data->team_member.team_barrier();
445-
}
446-
else if (integration_flag & EvaluationFlags::gradients)
447-
{
448-
evaluator_tensor_product.template integrate_gradients<false>(
449-
Kokkos::subview(shared_data->values, Kokkos::ALL, c),
450-
Kokkos::subview(
451-
shared_data->gradients, Kokkos::ALL, Kokkos::ALL, c));
452-
shared_data->team_member.team_barrier();
453-
}
415+
internal::FEEvaluationImplCollocation<dim, fe_degree, Number>::
416+
integrate(n_components, integration_flag, data, shared_data);
417+
}
418+
else if (fe_degree >= 0 &&
419+
internal::use_collocation_evaluation(fe_degree, n_q_points_1d) &&
420+
data->element_type <= ElementType::tensor_symmetric)
421+
{
422+
internal::FEEvaluationImplTransformToCollocation<
423+
dim,
424+
fe_degree,
425+
n_q_points_1d,
426+
Number>::integrate(n_components, integration_flag, data, shared_data);
427+
}
428+
else if (fe_degree >= 0 &&
429+
data->element_type <= ElementType::tensor_symmetric_no_collocation)
430+
{
431+
internal::FEEvaluationImpl<dim, fe_degree, n_q_points_1d, Number>::
432+
integrate(n_components, integration_flag, data, shared_data);
433+
}
434+
else
435+
{
436+
AssertThrow(false, ExcNotImplemented());
454437
}
455438
}
456439

@@ -512,17 +495,17 @@ namespace Portable
512495
n_components_,
513496
Number>::value_type
514497
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::
515-
get_dof_value(int q_point) const
498+
get_dof_value(int dof) const
516499
{
517500
if constexpr (n_components_ == 1)
518501
{
519-
return shared_data->values(q_point, 0);
502+
return shared_data->values(dof, 0);
520503
}
521504
else
522505
{
523506
value_type result;
524507
for (unsigned int c = 0; c < n_components; ++c)
525-
result[c] = shared_data->values(q_point, c);
508+
result[c] = shared_data->values(dof, c);
526509
return result;
527510
}
528511
}
@@ -559,16 +542,16 @@ namespace Portable
559542
typename Number>
560543
DEAL_II_HOST_DEVICE void
561544
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::
562-
submit_dof_value(const value_type &val_in, int q_point)
545+
submit_dof_value(const value_type &val_in, int dof)
563546
{
564547
if constexpr (n_components_ == 1)
565548
{
566-
shared_data->values(q_point, 0) = val_in;
549+
shared_data->values(dof, 0) = val_in;
567550
}
568551
else
569552
{
570553
for (unsigned int c = 0; c < n_components; ++c)
571-
shared_data->values(q_point, c) = val_in[c];
554+
shared_data->values(dof, c) = val_in[c];
572555
}
573556
}
574557

include/deal.II/matrix_free/portable_matrix_free.h

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include <deal.II/lac/affine_constraints.h>
3636
#include <deal.II/lac/la_parallel_vector.h>
3737

38+
#include <deal.II/matrix_free/shape_info.h>
39+
3840
#include <Kokkos_Core.hpp>
3941

4042

@@ -234,6 +236,18 @@ namespace Portable
234236
*/
235237
bool use_coloring;
236238

239+
/**
240+
* Encodes the type of element detected at construction. FEEvaluation
241+
* will select the most efficient algorithm based on the given element
242+
* type.
243+
*/
244+
::dealii::internal::MatrixFreeFunctions::ElementType element_type;
245+
246+
/**
247+
* Size of the scratch pad for temporary storage in shared memory.
248+
*/
249+
unsigned int scratch_pad_size;
250+
237251
/**
238252
* Return the quadrature point index local. The index is
239253
* only unique for a given MPI process.
@@ -474,6 +488,18 @@ namespace Portable
474488
*/
475489
types::global_dof_index n_dofs;
476490

491+
/**
492+
* Encodes the type of element detected at construction. FEEvaluation
493+
* will select the most efficient algorithm based on the given element
494+
* type.
495+
*/
496+
::dealii::internal::MatrixFreeFunctions::ElementType element_type;
497+
498+
/**
499+
* Size of the scratch pad for temporary storage in shared memory.
500+
*/
501+
unsigned int scratch_pad_size;
502+
477503
/**
478504
* Degree of the finite element used.
479505
*/
@@ -627,14 +653,20 @@ namespace Portable
627653
Number ***,
628654
MemorySpace::Default::kokkos_space::execution_space::scratch_memory_space,
629655
Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
656+
using SharedViewScratchPad = Kokkos::View<
657+
Number *,
658+
MemorySpace::Default::kokkos_space::execution_space::scratch_memory_space,
659+
Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
630660

631661
DEAL_II_HOST_DEVICE
632-
SharedData(const TeamHandle &team_member,
633-
const SharedViewValues &values,
634-
const SharedViewGradients &gradients)
662+
SharedData(const TeamHandle &team_member,
663+
const SharedViewValues &values,
664+
const SharedViewGradients &gradients,
665+
const SharedViewScratchPad &scratch_pad)
635666
: team_member(team_member)
636667
, values(values)
637668
, gradients(gradients)
669+
, scratch_pad(scratch_pad)
638670
{}
639671

640672
/**
@@ -651,6 +683,11 @@ namespace Portable
651683
* Memory for computed gradients in reference coordinate system.
652684
*/
653685
SharedViewGradients gradients;
686+
687+
/**
688+
* Memory for temporary arrays required by evaluation and integration.
689+
*/
690+
SharedViewScratchPad scratch_pad;
654691
};
655692

656693

0 commit comments

Comments
 (0)