Skip to content

Commit 65d1c05

Browse files
authored
Remove ManagedVector from Particles/ (#1273)
1 parent d3deb23 commit 65d1c05

File tree

3 files changed

+54
-41
lines changed

3 files changed

+54
-41
lines changed

Source/Particles/PhysicalParticleContainer.cpp

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -641,8 +641,8 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
641641
overlap_realbox.lo(2))};
642642

643643
// count the number of particles that each cell in overlap_box could add
644-
Gpu::DeviceVector<int> counts(overlap_box.numPts()+1, 0);
645-
Gpu::DeviceVector<int> offset(overlap_box.numPts()+1, 0);
644+
Gpu::DeviceVector<int> counts(overlap_box.numPts(), 0);
645+
Gpu::DeviceVector<int> offset(overlap_box.numPts());
646646
auto pcounts = counts.data();
647647
int lrrfac = rrfac;
648648
int lrefine_injection = refine_injection;
@@ -674,16 +674,10 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
674674
amrex::ignore_unused(k);
675675
#endif
676676
});
677-
Gpu::exclusive_scan(counts.begin(), counts.end(), offset.begin());
678677

679678
// Max number of new particles. All of them are created,
680679
// and invalid ones are then discarded
681-
int max_new_particles;
682-
#ifdef AMREX_USE_GPU
683-
Gpu::dtoh_memcpy(&max_new_particles, offset.dataPtr()+overlap_box.numPts(), sizeof(int));
684-
#else
685-
std::memcpy(&max_new_particles, offset.dataPtr()+overlap_box.numPts(), sizeof(int));
686-
#endif
680+
int max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data());
687681

688682
// Update NextID to include particles created in this function
689683
Long pid;
@@ -913,13 +907,13 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
913907
}
914908
});
915909

910+
amrex::Gpu::synchronize();
911+
916912
if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers)
917913
{
918-
amrex::Gpu::synchronize();
919914
wt = amrex::second() - wt;
920915
amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt);
921916
}
922-
amrex::Gpu::synchronize();
923917
}
924918

925919
// The function that calls this is responsible for redistributing particles.
@@ -1149,9 +1143,10 @@ PhysicalParticleContainer::Evolve (int lev,
11491143
}
11501144
}
11511145

1146+
amrex::Gpu::synchronize();
1147+
11521148
if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers)
11531149
{
1154-
amrex::Gpu::synchronize();
11551150
wt = amrex::second() - wt;
11561151
amrex::HostDevice::Atomic::Add( &(*cost)[pti.index()], wt);
11571152
}
@@ -1255,7 +1250,7 @@ PhysicalParticleContainer::SplitParticles (int lev)
12551250
long np_split;
12561251
if(split_type==0)
12571252
{
1258-
np_split = pow(2, AMREX_SPACEDIM);
1253+
np_split = (AMREX_SPACEDIM == 3) ? 8 : 4;
12591254
} else {
12601255
np_split = 2*AMREX_SPACEDIM;
12611256
}
@@ -1599,8 +1594,8 @@ PhysicalParticleContainer::GetParticleSlice (
15991594
// from going out of scope after each iteration, while the kernels
16001595
// may still need access to them.
16011596
// Note that the destructor for WarpXParIter is synchronized.
1602-
amrex::Gpu::ManagedDeviceVector<int> FlagForPartCopy;
1603-
amrex::Gpu::ManagedDeviceVector<int> IndexForPartCopy;
1597+
amrex::Gpu::DeviceVector<int> FlagForPartCopy;
1598+
amrex::Gpu::DeviceVector<int> IndexForPartCopy;
16041599
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
16051600
{
16061601
const Box& box = pti.validbox();
@@ -1658,9 +1653,7 @@ PhysicalParticleContainer::GetParticleSlice (
16581653
// exclusive scan to obtain location indices using flag values
16591654
// These location indices are used to copy data from
16601655
// src to dst when the copy-flag is set to 1.
1661-
amrex::Gpu::exclusive_scan(Flag,Flag+np,IndexLocation);
1662-
1663-
const int total_partdiag_size = IndexLocation[np-1] + Flag[np-1];
1656+
const int total_partdiag_size = amrex::Scan::ExclusiveSum(np,Flag,IndexLocation);
16641657

16651658
// allocate array size for diagnostic particle array
16661659
diagnostic_particles[lev][index].resize(total_partdiag_size);
@@ -1740,6 +1733,7 @@ PhysicalParticleContainer::GetParticleSlice (
17401733
diag_uzp[loc] = uzp;
17411734
}
17421735
});
1736+
Gpu::synchronize(); // because of FlagForPartCopy & IndexForPartCopy
17431737
}
17441738
}
17451739
}
@@ -1936,10 +1930,10 @@ PhysicalParticleContainer::InitIonizationModule ()
19361930
// Get atomic number and ionization energies from file
19371931
int ion_element_id = ion_map_ids[physical_element];
19381932
ion_atomic_number = ion_atomic_numbers[ion_element_id];
1939-
ionization_energies.resize(ion_atomic_number);
1933+
Vector<Real> h_ionization_energies(ion_atomic_number);
19401934
int offset = ion_energy_offsets[ion_element_id];
19411935
for(int i=0; i<ion_atomic_number; i++){
1942-
ionization_energies[i] = table_ionization_energies[i+offset];
1936+
h_ionization_energies[i] = table_ionization_energies[i+offset];
19431937
}
19441938
// Compute ADK prefactors (See Chen, JCP 236 (2013), equation (2))
19451939
// For now, we assume l=0 and m=0.
@@ -1949,22 +1943,35 @@ PhysicalParticleContainer::InitIonizationModule ()
19491943
Real Ea = PhysConst::m_e * PhysConst::c*PhysConst::c /PhysConst::q_e *
19501944
std::pow(PhysConst::alpha,4)/PhysConst::r_e;
19511945
Real UH = table_ionization_energies[0];
1952-
Real l_eff = std::sqrt(UH/ionization_energies[0]) - 1.;
1946+
Real l_eff = std::sqrt(UH/h_ionization_energies[0]) - 1.;
19531947

19541948
const Real dt = WarpX::GetInstance().getdt(0);
19551949

1950+
ionization_energies.resize(ion_atomic_number);
19561951
adk_power.resize(ion_atomic_number);
19571952
adk_prefactor.resize(ion_atomic_number);
19581953
adk_exp_prefactor.resize(ion_atomic_number);
1959-
for (int i=0; i<ion_atomic_number; ++i){
1960-
Real n_eff = (i+1) * std::sqrt(UH/ionization_energies[i]);
1954+
1955+
Gpu::copyAsync(Gpu::hostToDevice,
1956+
h_ionization_energies.begin(), h_ionization_energies.end(),
1957+
ionization_energies.begin());
1958+
1959+
Real const* AMREX_RESTRICT p_ionization_energies = ionization_energies.data();
1960+
Real * AMREX_RESTRICT p_adk_power = adk_power.data();
1961+
Real * AMREX_RESTRICT p_adk_prefactor = adk_prefactor.data();
1962+
Real * AMREX_RESTRICT p_adk_exp_prefactor = adk_exp_prefactor.data();
1963+
amrex::ParallelFor(ion_atomic_number, [=] AMREX_GPU_DEVICE (int i) noexcept
1964+
{
1965+
Real n_eff = (i+1) * std::sqrt(UH/p_ionization_energies[i]);
19611966
Real C2 = std::pow(2,2*n_eff)/(n_eff*tgamma(n_eff+l_eff+1)*tgamma(n_eff-l_eff));
1962-
adk_power[i] = -(2*n_eff - 1);
1963-
Real Uion = ionization_energies[i];
1964-
adk_prefactor[i] = dt * wa * C2 * ( Uion/(2*UH) )
1967+
p_adk_power[i] = -(2*n_eff - 1);
1968+
Real Uion = p_ionization_energies[i];
1969+
p_adk_prefactor[i] = dt * wa * C2 * ( Uion/(2*UH) )
19651970
* std::pow(2*std::pow((Uion/UH),3./2)*Ea,2*n_eff - 1);
1966-
adk_exp_prefactor[i] = -2./3 * std::pow( Uion/UH,3./2) * Ea;
1967-
}
1971+
p_adk_exp_prefactor[i] = -2./3 * std::pow( Uion/UH,3./2) * Ea;
1972+
});
1973+
1974+
Gpu::synchronize();
19681975
}
19691976

19701977
IonizationFilterFunc

Source/Particles/RigidInjectedParticleContainer.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ RigidInjectedParticleContainer::PushPX (WarpXParIter& pti,
246246
auto& uzp = attribs[PIdx::uz];
247247

248248
// Save the position and momenta, making copies
249-
Gpu::ManagedDeviceVector<ParticleReal> xp_save, yp_save, zp_save;
249+
Gpu::DeviceVector<ParticleReal> xp_save, yp_save, zp_save;
250250
RealVector uxp_save, uyp_save, uzp_save;
251251

252252
const auto GetPosition = GetParticlePosition(pti);
@@ -453,9 +453,12 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
453453
}
454454

455455
// Save the position and momenta, making copies
456-
auto uxp_save = uxp;
457-
auto uyp_save = uyp;
458-
auto uzp_save = uzp;
456+
amrex::Gpu::DeviceVector<ParticleReal> uxp_save(np);
457+
amrex::Gpu::DeviceVector<ParticleReal> uyp_save(np);
458+
amrex::Gpu::DeviceVector<ParticleReal> uzp_save(np);
459+
ParticleReal* const AMREX_RESTRICT ux_save = uxp_save.dataPtr();
460+
ParticleReal* const AMREX_RESTRICT uy_save = uyp_save.dataPtr();
461+
ParticleReal* const AMREX_RESTRICT uz_save = uzp_save.dataPtr();
459462

460463
// Loop over the particles and update their momentum
461464
const amrex::Real q = this->charge;
@@ -466,6 +469,10 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
466469

467470
amrex::ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip)
468471
{
472+
ux_save[ip] = uxpp[ip];
473+
uy_save[ip] = uypp[ip];
474+
uz_save[ip] = uzpp[ip];
475+
469476
amrex::ParticleReal xp, yp, zp;
470477
getPosition(ip, xp, yp, zp);
471478

@@ -513,9 +520,6 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
513520
// Undo the push for particles not injected yet.
514521
// It is assumed that PushP will only be called on the first and last steps
515522
// and that no particles will cross zinject_plane.
516-
const ParticleReal* const AMREX_RESTRICT ux_save = uxp_save.dataPtr();
517-
const ParticleReal* const AMREX_RESTRICT uy_save = uyp_save.dataPtr();
518-
const ParticleReal* const AMREX_RESTRICT uz_save = uzp_save.dataPtr();
519523
const ParticleReal zz = zinject_plane_levels[lev];
520524
amrex::ParallelFor( pti.numParticles(), [=] AMREX_GPU_DEVICE (long i)
521525
{
@@ -527,6 +531,8 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
527531
uzpp[i] = uz_save[i];
528532
}
529533
});
534+
535+
amrex::Gpu::synchronize();
530536
}
531537
}
532538
}

Source/Particles/WarpXParticleContainer.H

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,10 +368,10 @@ protected:
368368
std::string ionization_product_name;
369369
int ion_atomic_number;
370370
int ionization_initial_level = 0;
371-
amrex::Gpu::ManagedVector<amrex::Real> ionization_energies;
372-
amrex::Gpu::ManagedVector<amrex::Real> adk_power;
373-
amrex::Gpu::ManagedVector<amrex::Real> adk_prefactor;
374-
amrex::Gpu::ManagedVector<amrex::Real> adk_exp_prefactor;
371+
amrex::Gpu::DeviceVector<amrex::Real> ionization_energies;
372+
amrex::Gpu::DeviceVector<amrex::Real> adk_power;
373+
amrex::Gpu::DeviceVector<amrex::Real> adk_prefactor;
374+
amrex::Gpu::DeviceVector<amrex::Real> adk_exp_prefactor;
375375
std::string physical_element;
376376

377377
int do_resampling = 0;
@@ -402,9 +402,9 @@ protected:
402402
amrex::Vector<amrex::FArrayBox> local_jz;
403403

404404
public:
405-
using DataContainer = amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>;
406405
using PairIndex = std::pair<int, int>;
407-
using TmpParticleTile = std::array<DataContainer, TmpIdx::nattribs>;
406+
using TmpParticleTile = std::array<amrex::Gpu::DeviceVector<amrex::ParticleReal>,
407+
TmpIdx::nattribs>;
408408
using TmpParticles = amrex::Vector<std::map<PairIndex, TmpParticleTile> >;
409409

410410
protected:

0 commit comments

Comments
 (0)