@@ -117,11 +117,14 @@ FloatType complex_norm2(std::complex<FloatType> const &z) {
117
117
template <Utils::MemoryOrder order_in, Utils::MemoryOrder order_out, typename T>
118
118
auto transpose (std::span<T> const &flat_array, Utils::Vector3i const &shape) {
119
119
auto constexpr mesh_start = Utils::Vector3i::broadcast (0 );
120
+ std::size_t index_out{};
120
121
Utils::Vector3i indices{};
121
122
std::vector<T> flat_array_t (flat_array.size ());
122
- for_each_3d (mesh_start, shape, indices, [&]() {
123
- auto const index_in = Utils::get_linear_index (indices, shape, order_in);
124
- auto const index_out = Utils::get_linear_index (indices, shape, order_out);
123
+ for_each_3d_lin<order_out>(mesh_start, shape, indices, index_out, [&]() {
124
+ auto const index_in = Utils::get_linear_index<order_in>(indices, shape);
125
+ #ifdef ADDITIONAL_CHECKS
126
+ assert (index_out == Utils::get_linear_index<order_out>(indices, shape));
127
+ #endif
125
128
flat_array_t [index_out] = flat_array[index_in];
126
129
});
127
130
return flat_array_t ;
@@ -451,10 +454,11 @@ void CoulombP3MImpl<FloatType, Architecture>::kernel_ks_charge_density() {
451
454
p3m.local_mesh .dim );
452
455
453
456
// get real space charge density without ghost layers
454
- auto charge_density_no_halos = extract_block (
455
- p3m.rs_charge_density , p3m.local_mesh .dim , p3m.local_mesh .n_halo_ld ,
456
- p3m.local_mesh .dim - p3m.local_mesh .n_halo_ur ,
457
- Utils::MemoryOrder::ROW_MAJOR, Utils::MemoryOrder::COLUMN_MAJOR);
457
+ auto charge_density_no_halos =
458
+ extract_block<Utils::MemoryOrder::ROW_MAJOR,
459
+ Utils::MemoryOrder::COLUMN_MAJOR>(
460
+ p3m.rs_charge_density , p3m.local_mesh .dim , p3m.local_mesh .n_halo_ld ,
461
+ p3m.local_mesh .dim - p3m.local_mesh .n_halo_ur );
458
462
459
463
// Set up the FFT using the Heffte library.
460
464
// This is in global mesh coordinates without any ghost layers
@@ -466,14 +470,14 @@ void CoulombP3MImpl<FloatType, Architecture>::kernel_ks_charge_density() {
466
470
467
471
template <typename FloatType, Arch Architecture>
468
472
void CoulombP3MImpl<FloatType, Architecture>::kernel_rs_electric_field() {
469
- auto constexpr mesh_start = Utils::Vector3i::broadcast ( 0 );
470
- auto const & mesh_stop = p3m.fft ->ks_local_size ();
473
+ auto const mesh_start = p3m. fft -> ks_local_ld_index ( );
474
+ auto const mesh_stop = mesh_start + p3m.fft ->ks_local_size ();
471
475
auto const &box_geo = *get_system ().box_geo ;
472
- auto indices = Utils::Vector3i{};
476
+ Utils::Vector3i indices {};
473
477
474
478
// hold electric field in k-space
475
479
std::array<std::span<std::complex<FloatType>>, 3 > ks_E_fields;
476
- auto const fft_mesh_length = get_size_from_shape (mesh_stop);
480
+ auto const fft_mesh_length = get_size_from_shape (mesh_stop - mesh_start );
477
481
for (auto d : {0u , 1u , 2u }) {
478
482
auto const offset = d * fft_mesh_length;
479
483
auto const begin = p3m.ks_E_fields_storage .begin () + offset;
@@ -485,30 +489,35 @@ void CoulombP3MImpl<FloatType, Architecture>::kernel_rs_electric_field() {
485
489
Utils::Vector3<FloatType>((2 . * std::numbers::pi ) * box_geo.length_inv ());
486
490
487
491
// compute electric field, Eq. (3.49) @cite deserno00b
488
- for_each_3d (mesh_start, mesh_stop, indices, [&]() {
489
- auto const global_index = indices + p3m.fft ->ks_local_ld_index ();
490
- auto const local_index = Utils::get_linear_index (
491
- indices, mesh_stop, Utils::MemoryOrder::COLUMN_MAJOR);
492
- auto const phi_hat = multiply_complex_by_real (
493
- p3m.ks_charge_density [local_index], p3m.g_force [local_index]);
494
-
495
- for (auto d : {0u , 1u , 2u }) {
496
- // wave vector of the current mesh point
497
- auto const k = FloatType (p3m.d_op [d][global_index[d]]) * wavevector[d];
498
- // electric field in k-space
499
- ks_E_fields[d][local_index] = multiply_complex_by_imaginary (phi_hat, k);
500
- }
501
- });
492
+ std::size_t local_index{};
493
+ for_each_3d_lin<Utils::MemoryOrder::COLUMN_MAJOR>(
494
+ mesh_start, mesh_stop, indices, local_index, [&]() {
495
+ #ifdef ADDITIONAL_CHECKS
496
+ assert (local_index ==
497
+ Utils::get_linear_index<Utils::MemoryOrder::COLUMN_MAJOR>(
498
+ indices - mesh_start, mesh_stop - mesh_start));
499
+ #endif
500
+ auto const phi_hat = multiply_complex_by_real (
501
+ p3m.ks_charge_density [local_index], p3m.g_force [local_index]);
502
+
503
+ for (auto d : {0u , 1u , 2u }) {
504
+ // wave vector of the current mesh point
505
+ auto const k = FloatType (p3m.d_op [d][indices[d]]) * wavevector[d];
506
+ // electric field in k-space
507
+ ks_E_fields[d][local_index] =
508
+ multiply_complex_by_imaginary (phi_hat, k);
509
+ }
510
+ });
502
511
503
512
// back-transform the k-space electric field to real space
504
- auto const rs_mesh_size_no_halo =
505
- get_size_from_shape (p3m.local_mesh .dim_no_halo );
506
- p3m.fft ->backward_batch (3 , p3m.ks_E_fields_storage .data (),
507
- p3m.rs_E_fields_no_halo .data ());
508
-
509
- // add zeros around the E-field in real space to make room for ghost layers
510
513
auto const size = p3m.local_mesh .ur_no_halo - p3m.local_mesh .ld_no_halo ;
514
+ auto const rs_mesh_size_no_halo = Utils::product (size);
511
515
for (auto d : {0u , 1u , 2u }) {
516
+ auto k_space = ks_E_fields[d].data ();
517
+ auto real_space = p3m.rs_E_fields_no_halo .data () + d * rs_mesh_size_no_halo;
518
+ p3m.fft ->backward (k_space, real_space);
519
+
520
+ // add zeros around the E-field in real space to make room for ghost layers
512
521
auto const offset = d * rs_mesh_size_no_halo;
513
522
auto const begin = p3m.rs_E_fields_no_halo .begin () + offset;
514
523
auto f = std::span<std::complex<FloatType>>(begin, rs_mesh_size_no_halo);
0 commit comments