|
15 | 15 | #include <cstring> // strcmp() |
16 | 16 |
|
17 | 17 | // heffte headers |
18 | | -#include "globals.hpp" |
19 | 18 | #include "heffte.h" |
20 | 19 |
|
21 | 20 | // Parthenon headers |
22 | 21 | #include "basic_types.hpp" |
| 22 | +#include "globals.hpp" |
23 | 23 | #include "kokkos_abstraction.hpp" |
24 | 24 | #include "mesh/mesh.hpp" |
25 | 25 | #include <iomanip> |
@@ -581,14 +581,12 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin, |
581 | 581 |
|
582 | 582 | // TODO(pgrete) not nice, make nicer |
583 | 583 | #ifndef KOKKOS_ENABLE_CUDA |
584 | | - // if constexpr (std::is_same_v<Kokkos::DefaultExecutionSpace::memory_space, |
585 | | - // Kokkos::HostSpace>) { |
586 | 584 | using backend_tag = heffte::backend::default_backend<heffte::tag::cpu>::type; |
587 | 585 | #else |
588 | | - // } else { |
589 | 586 | using backend_tag = heffte::backend::default_backend<heffte::tag::gpu>::type; |
590 | | - // using backend_tag = heffte::backend::cufft; |
591 | | -// } |
| 587 | + PARTHENON_REQUIRE_THROWS(heffte::gpu::device_count() == 1, |
| 588 | + "To make this work, we need to ensure that Kokkos and heffte " |
| 589 | + "use the same GPUs. So hard fail for now."); |
592 | 590 | #endif |
593 | 591 |
|
594 | 592 | // wrapper around MPI_Comm_rank() and MPI_Comm_size(), using this is optional |
@@ -667,20 +665,6 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin, |
667 | 665 | << " and the real order is for idx 012: " << inbox.order[0] << inbox.order[1] |
668 | 666 | << inbox.order[2] << "\n"; |
669 | 667 |
|
670 | | - // TODO(pgrete) not nice, make nicer |
671 | | -#ifdef KOKKOS_ENABLE_CUDA |
672 | | - // if constexpr (!std::is_same_v<Kokkos::DefaultExecutionSpace::memory_space, |
673 | | - // Kokkos::HostSpace>) { |
674 | | - PARTHENON_REQUIRE_THROWS(heffte::gpu::device_count() == 1, |
675 | | - "To make this work, we need to ensure that Kokkos and heffte " |
676 | | - "use the same GPUs. So hard fail for now."); |
677 | | - // if (heffte::gpu::device_count() > 1) { |
678 | | - // on a multi-gpu system, distribute the devices across the mpi ranks |
679 | | - // heffte::gpu::device_set(heffte::mpi::comm_rank(comm) % |
680 | | - // heffte::gpu::device_count()); |
681 | | - // } |
682 | | -// } |
683 | | -#endif |
684 | 668 | // define the heffte class and the input and output geometry |
685 | 669 | heffte::fft3d_r2c<backend_tag> fft(inbox, outbox, r2c_direction, comm); |
686 | 670 |
|
@@ -748,11 +732,12 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin, |
748 | 732 | const auto outidx = |
749 | 733 | ((k - kb.s) * (jb.e - jb.s + 1) + (j - jb.s)) * (ib.e - ib.s + 1) + i - ib.s; |
750 | 734 |
|
751 | | - auto val2 = SQR(std::abs(output[outidx])) + |
752 | | - SQR(std::abs(output[outidx + fft_size_outbox])) + |
753 | | - SQR(std::abs(output[outidx + 2 * fft_size_outbox])); |
| 735 | + auto val = SQR(output[outidx].real()) + SQR(output[outidx].imag()) + |
| 736 | + SQR(output[outidx + fft_size_outbox].real()) + |
| 737 | + SQR(output[outidx + fft_size_outbox].imag()) + |
| 738 | + SQR(output[outidx + 2 * fft_size_outbox].real()) + |
| 739 | + SQR(output[outidx + 2 * fft_size_outbox].imag()); |
754 | 740 |
|
755 | | - auto val = SQR(std::abs(output[outidx].real())); |
756 | 741 | // account for Hermitian symmetry of r2c transform |
757 | 742 | const auto fac = ((k_x > 0) && (2 * k_x != gnx1)) ? 2.0 : 1.0; |
758 | 743 |
|
@@ -790,8 +775,11 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin, |
790 | 775 | } |
791 | 776 |
|
792 | 777 | outfile << tm.ncycle << "," << tm.time << "," << num_bins; |
793 | | - for (int i = 0; i < spectra_h.size(); i++) { |
794 | | - outfile << "," << spectra_h(i); |
| 778 | + |
| 779 | + for (int j = 0; j < 3; j++) { |
| 780 | + for (int i = 0; i < num_bins; i++) { |
| 781 | + outfile << "," << spectra_h(i, j); |
| 782 | + } |
795 | 783 | } |
796 | 784 | outfile << std::endl; |
797 | 785 |
|
|
0 commit comments