Skip to content

Commit b7e9564

Browse files
committed
portable matrix-free: fix GPU crash in compute_diagonal()
fixes dealii#18210 The Functor passed to Kokkos::parallel_for() is placed into constant memory, which is read-only. This means the HelmholtzOperatorQuad can not be modified by setting a member variable to the current cell index for example. Instead, add access functions for the necessary functions to FEEvaluation and query the information as needed.
1 parent a6be64a commit b7e9564

File tree

5 files changed

+71
-37
lines changed

5 files changed

+71
-37
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
New: Portable::FEEvaluation::get_current_cell_index() and
2+
Portable::FEEvaluation::get_matrix_free_data() allow querying
3+
the current Portable::MatrixFree::Data object and the current cell index respectively.
4+
<br>
5+
(Daniel Arndt, Timo Heister, 2025/03/19)

examples/step-64/step-64.cc

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -122,39 +122,23 @@ namespace Step64
122122
class HelmholtzOperatorQuad
123123
{
124124
public:
125-
DEAL_II_HOST_DEVICE HelmholtzOperatorQuad(
126-
const typename Portable::MatrixFree<dim, double>::Data *gpu_data,
127-
double *coef,
128-
int cell)
129-
: gpu_data(gpu_data)
130-
, coef(coef)
131-
, cell(cell)
125+
DEAL_II_HOST_DEVICE HelmholtzOperatorQuad(double *coef)
126+
: coef(coef)
132127
{}
133128

134129
DEAL_II_HOST_DEVICE void operator()(
135130
Portable::FEEvaluation<dim, fe_degree, fe_degree + 1, 1, double> *fe_eval,
136131
const int q_point) const;
137132

138-
DEAL_II_HOST_DEVICE void set_matrix_free_data(
139-
const typename Portable::MatrixFree<dim, double>::Data &data)
140-
{
141-
gpu_data = &data;
142-
}
143133

144-
DEAL_II_HOST_DEVICE void set_cell(int new_cell)
145-
{
146-
cell = new_cell;
147-
}
148134

149135
static const unsigned int n_q_points =
150136
dealii::Utilities::pow(fe_degree + 1, dim);
151137

152138
static const unsigned int n_local_dofs = n_q_points;
153139

154140
private:
155-
const typename Portable::MatrixFree<dim, double>::Data *gpu_data;
156-
double *coef;
157-
int cell;
141+
double *coef;
158142
};
159143

160144

@@ -170,10 +154,17 @@ namespace Step64
170154
Portable::FEEvaluation<dim, fe_degree, fe_degree + 1, 1, double> *fe_eval,
171155
const int q_point) const
172156
{
173-
const unsigned int pos =
174-
gpu_data->local_q_point_id(cell, n_q_points, q_point);
157+
const int cell_index = fe_eval->get_current_cell_index();
158+
const typename Portable::MatrixFree<dim, double>::Data *gpu_data =
159+
fe_eval->get_matrix_free_data();
175160

176-
fe_eval->submit_value(coef[pos] * fe_eval->get_value(q_point), q_point);
161+
const unsigned int position =
162+
gpu_data->local_q_point_id(cell_index, n_q_points, q_point);
163+
auto coeff = coef[position];
164+
165+
auto value = fe_eval->get_value(q_point);
166+
167+
fe_eval->submit_value(coeff * value, q_point);
177168
fe_eval->submit_gradient(fe_eval->get_gradient(q_point), q_point);
178169
}
179170

@@ -218,7 +209,7 @@ namespace Step64
218209
// vector.
219210
template <int dim, int fe_degree>
220211
DEAL_II_HOST_DEVICE void LocalHelmholtzOperator<dim, fe_degree>::operator()(
221-
const unsigned int cell,
212+
const unsigned int /*cell*/,
222213
const typename Portable::MatrixFree<dim, double>::Data *gpu_data,
223214
Portable::SharedData<dim, double> *shared_data,
224215
const double *src,
@@ -229,7 +220,7 @@ namespace Step64
229220
fe_eval.read_dof_values(src);
230221
fe_eval.evaluate(EvaluationFlags::values | EvaluationFlags::gradients);
231222
fe_eval.apply_for_each_quad_point(
232-
HelmholtzOperatorQuad<dim, fe_degree>(gpu_data, coef, cell));
223+
HelmholtzOperatorQuad<dim, fe_degree>(coef));
233224
fe_eval.integrate(EvaluationFlags::values | EvaluationFlags::gradients);
234225
fe_eval.distribute_local_to_global(dst);
235226
}
@@ -363,7 +354,7 @@ namespace Step64
363354
initialize_dof_vector(inverse_diagonal);
364355

365356
HelmholtzOperatorQuad<dim, fe_degree> helmholtz_operator_quad(
366-
nullptr, coef.get_values(), -1);
357+
coef.get_values());
367358

368359
MatrixFreeTools::compute_diagonal<dim, fe_degree, fe_degree + 1, 1, double>(
369360
mf_data,

include/deal.II/matrix_free/portable_fe_evaluation.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,22 @@ namespace Portable
126126
DEAL_II_HOST_DEVICE
127127
FEEvaluation(const data_type *data, SharedData<dim, Number> *shdata);
128128

129+
/**
130+
* Return the index of the current cell.
131+
*/
132+
DEAL_II_HOST_DEVICE
133+
int
134+
get_current_cell_index();
135+
136+
/**
137+
* Return a pointer to the MatrixFree<dim, Number>::Data object on device
138+
* that contains necessary constraint, dof index, and shape function
139+
* information for evaluation used in the matrix-free kernels.
140+
*/
141+
DEAL_II_HOST_DEVICE
142+
const data_type *
143+
get_matrix_free_data();
144+
129145
/**
130146
* For the vector @p src, read out the values on the degrees of freedom of
131147
* the current cell, and store them internally. Similar functionality as
@@ -270,6 +286,38 @@ namespace Portable
270286

271287

272288

289+
template <int dim,
290+
int fe_degree,
291+
int n_q_points_1d,
292+
int n_components_,
293+
typename Number>
294+
DEAL_II_HOST_DEVICE int
295+
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::
296+
get_current_cell_index()
297+
{
298+
return cell_id;
299+
}
300+
301+
302+
303+
template <int dim,
304+
int fe_degree,
305+
int n_q_points_1d,
306+
int n_components_,
307+
typename Number>
308+
DEAL_II_HOST_DEVICE const typename FEEvaluation<dim,
309+
fe_degree,
310+
n_q_points_1d,
311+
n_components_,
312+
Number>::data_type *
313+
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components_, Number>::
314+
get_matrix_free_data()
315+
{
316+
return data;
317+
}
318+
319+
320+
273321
template <int dim,
274322
int fe_degree,
275323
int n_q_points_1d,

include/deal.II/matrix_free/tools.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,8 +1407,7 @@ namespace MatrixFreeTools
14071407
Portable::
14081408
FEEvaluation<dim, fe_degree, n_q_points_1d, n_components, Number>
14091409
fe_eval(gpu_data, shared_data);
1410-
m_quad_operation.set_matrix_free_data(*gpu_data);
1411-
m_quad_operation.set_cell(cell);
1410+
14121411
constexpr int dofs_per_cell = decltype(fe_eval)::tensor_dofs_per_cell;
14131412
typename decltype(fe_eval)::value_type
14141413
diagonal[dofs_per_cell / n_components] = {};
@@ -1503,7 +1502,7 @@ namespace MatrixFreeTools
15031502
dealii::Utilities::pow(n_q_points_1d, dim);
15041503

15051504
private:
1506-
mutable QuadOperation m_quad_operation;
1505+
const QuadOperation m_quad_operation;
15071506
const EvaluationFlags::EvaluationFlags m_evaluation_flags;
15081507
const EvaluationFlags::EvaluationFlags m_integration_flags;
15091508
};

tests/matrix_free_kokkos/compute_diagonal_util.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,6 @@ class LaplaceOperatorQuad
5959
fe_eval->submit_gradient(fe_eval->get_gradient(q_point), q_point);
6060
}
6161

62-
DEAL_II_HOST_DEVICE
63-
void
64-
set_cell(int)
65-
{}
66-
67-
DEAL_II_HOST_DEVICE void
68-
set_matrix_free_data(const typename Portable::MatrixFree<dim, Number>::Data &)
69-
{}
70-
7162
static const unsigned int n_q_points =
7263
dealii::Utilities::pow(fe_degree + 1, dim);
7364

0 commit comments

Comments
 (0)