Skip to content

Commit c3fdc71

Browse files
committed
Make work on GPUs
1 parent 59bdff6 commit c3fdc71

1 file changed

Lines changed: 14 additions & 26 deletions

File tree

src/pgen/turbulence.cpp

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
#include <cstring> // strcmp()
1616

1717
// heffte headers
18-
#include "globals.hpp"
1918
#include "heffte.h"
2019

2120
// Parthenon headers
2221
#include "basic_types.hpp"
22+
#include "globals.hpp"
2323
#include "kokkos_abstraction.hpp"
2424
#include "mesh/mesh.hpp"
2525
#include <iomanip>
@@ -581,14 +581,12 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin,
581581

582582
// TODO(pgrete) not nice, make nicer
583583
#ifndef KOKKOS_ENABLE_CUDA
584-
// if constexpr (std::is_same_v<Kokkos::DefaultExecutionSpace::memory_space,
585-
// Kokkos::HostSpace>) {
586584
using backend_tag = heffte::backend::default_backend<heffte::tag::cpu>::type;
587585
#else
588-
// } else {
589586
using backend_tag = heffte::backend::default_backend<heffte::tag::gpu>::type;
590-
// using backend_tag = heffte::backend::cufft;
591-
// }
587+
PARTHENON_REQUIRE_THROWS(heffte::gpu::device_count() == 1,
588+
"To make this work, we need to ensure that Kokkos and heffte "
589+
"use the same GPUs. So hard fail for now.");
592590
#endif
593591

594592
// wrapper around MPI_Comm_rank() and MPI_Comm_size(), using this is optional
@@ -667,20 +665,6 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin,
667665
<< " and the real order is for idx 012: " << inbox.order[0] << inbox.order[1]
668666
<< inbox.order[2] << "\n";
669667

670-
// TODO(pgrete) not nice, make nicer
671-
#ifdef KOKKOS_ENABLE_CUDA
672-
// if constexpr (!std::is_same_v<Kokkos::DefaultExecutionSpace::memory_space,
673-
// Kokkos::HostSpace>) {
674-
PARTHENON_REQUIRE_THROWS(heffte::gpu::device_count() == 1,
675-
"To make this work, we need to ensure that Kokkos and heffte "
676-
"use the same GPUs. So hard fail for now.");
677-
// if (heffte::gpu::device_count() > 1) {
678-
// on a multi-gpu system, distribute the devices across the mpi ranks
679-
// heffte::gpu::device_set(heffte::mpi::comm_rank(comm) %
680-
// heffte::gpu::device_count());
681-
// }
682-
// }
683-
#endif
684668
// define the heffte class and the input and output geometry
685669
heffte::fft3d_r2c<backend_tag> fft(inbox, outbox, r2c_direction, comm);
686670

@@ -748,11 +732,12 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin,
748732
const auto outidx =
749733
((k - kb.s) * (jb.e - jb.s + 1) + (j - jb.s)) * (ib.e - ib.s + 1) + i - ib.s;
750734

751-
auto val2 = SQR(std::abs(output[outidx])) +
752-
SQR(std::abs(output[outidx + fft_size_outbox])) +
753-
SQR(std::abs(output[outidx + 2 * fft_size_outbox]));
735+
auto val = SQR(output[outidx].real()) + SQR(output[outidx].imag()) +
736+
SQR(output[outidx + fft_size_outbox].real()) +
737+
SQR(output[outidx + fft_size_outbox].imag()) +
738+
SQR(output[outidx + 2 * fft_size_outbox].real()) +
739+
SQR(output[outidx + 2 * fft_size_outbox].imag());
754740

755-
auto val = SQR(std::abs(output[outidx].real()));
756741
// account for Hermitian symmetry of r2c transform
757742
const auto fac = ((k_x > 0) && (2 * k_x != gnx1)) ? 2.0 : 1.0;
758743

@@ -790,8 +775,11 @@ void UserMeshWorkBeforeOutput(Mesh *pmesh, ParameterInput *pin,
790775
}
791776

792777
outfile << tm.ncycle << "," << tm.time << "," << num_bins;
793-
for (int i = 0; i < spectra_h.size(); i++) {
794-
outfile << "," << spectra_h(i);
778+
779+
for (int j = 0; j < 3; j++) {
780+
for (int i = 0; i < num_bins; i++) {
781+
outfile << "," << spectra_h(i, j);
782+
}
795783
}
796784
outfile << std::endl;
797785

0 commit comments

Comments
 (0)