Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ jobs:
export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
which nvcc || echo "nvcc not in PATH!"

git clone https://github.com/AMReX-Codes/amrex.git ../amrex
cd amrex && git checkout --detach 18d0a2861d31c52c65752a1d5856f54e08699ce3 && cd -
git clone https://github.com/kngott/amrex.git ../amrex
cd amrex && git checkout --detach kngott/graphviz && cd -
make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 2

build_nvhpc21-11-nvcc:
Expand Down
2 changes: 1 addition & 1 deletion Regression/WarpX-GPU-tests.ini
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ emailBody = Check https://ccse.lbl.gov/pub/GpuRegressionTesting/WarpX/ for more

[AMReX]
dir = /home/regtester/git/amrex/
branch = 18d0a2861d31c52c65752a1d5856f54e08699ce3
branch = kngott/graphviz

[source]
dir = /home/regtester/git/WarpX
Expand Down
2 changes: 1 addition & 1 deletion Regression/WarpX-tests.ini
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ emailBody = Check https://ccse.lbl.gov/pub/RegressionTesting/WarpX/ for more det

[AMReX]
dir = /home/regtester/AMReX_RegTesting/amrex/
branch = 18d0a2861d31c52c65752a1d5856f54e08699ce3
branch = kngott/graphviz

[source]
dir = /home/regtester/AMReX_RegTesting/warpx
Expand Down
30 changes: 30 additions & 0 deletions Source/BoundaryConditions/PML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <AMReX_RealVect.H>
#include <AMReX_SPACE.H>
#include <AMReX_VisMF.H>
#include <AMReX_Graph.H> // kngott/graphviz

#include <algorithm>
#include <cmath>
Expand Down Expand Up @@ -1184,13 +1185,30 @@ PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom,
MultiFab::Add(totpmlmf,pml,2,0,1,0); // Sum the third split component
}

// record metrics of this comm op in a graph before and after load balance steps
//auto & warpx = WarpX::GetInstance();
//auto const & lb_intervals = warpx.load_balance_intervals;
//auto const cur_step = warpx.istep[0];
//auto & graph = warpx.graph;

// Copy from the sum of PML split field to valid cells of regular grid
if (do_pml_in_domain){
// Valid cells of the PML and of the regular grid overlap
// Copy from valid cells of the PML to valid cells of the regular grid
ablastr::utils::communication::ParallelCopy(reg, totpmlmf, 0, 0, 1, IntVect(0), IntVect(0),
WarpX::do_single_precision_comms,
period);

// record metrics of this comm op in a graph before and after load balance steps
/*
if (lb_intervals.contains(cur_step+2) || // before LB
lb_intervals.contains(cur_step+1) // after LB
)
{
graph.addParallelCopy("PML-in-domain-comm", "tmpregmf", "totpmlmf", 0.0,
reg, totpmlmf, 0, 0, 1, IntVect(0), ngr, period);
//graph.print_table("comm_data");
}*/
} else {
// Valid cells of the PML only overlap with guard cells of regular grid
// (and outermost valid cell of the regular grid, for nodal direction)
Expand All @@ -1201,6 +1219,18 @@ PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom,
ablastr::utils::communication::ParallelCopy(tmpregmf, totpmlmf, 0, 0, 1, IntVect(0), ngr,
WarpX::do_single_precision_comms,
period);

// record metrics of this comm op in a graph before and after load balance steps
/*
if (lb_intervals.contains(cur_step+2) || // before LB
lb_intervals.contains(cur_step+1) // after LB
)
{
graph.addParallelCopy("PML-comm", "tmpregmf", "totpmlmf", 0.0,
tmpregmf, totpmlmf, 0, 0, 1, IntVect(0), ngr, period);
//graph.print_table("comm_data");
}*/

#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
Expand Down
121 changes: 94 additions & 27 deletions Source/Parallelization/WarpXRegrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,28 @@
#include <AMReX_Vector.H>
#include <AMReX_iMultiFab.H>

#include <AMReX_Graph.H> // kngott/graphviz

#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include <memory>
#include <string>
#include <utility>
#include <vector>

using namespace amrex;

void
WarpX::LoadBalance ()
{
WarpX::LoadBalance () {
WARPX_PROFILE_REGION("LoadBalance");
WARPX_PROFILE("WarpX::LoadBalance()");

AMREX_ALWAYS_ASSERT(costs[0] != nullptr);

#ifdef AMREX_USE_MPI
if (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Heuristic)
{
if (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Heuristic) {
// compute the costs on a per-rank basis
ComputeCostsHeuristic(costs);
}
Expand All @@ -67,56 +68,51 @@ WarpX::LoadBalance ()
int loadBalancedAnyLevel = false;

const int nLevels = finestLevel();
for (int lev = 0; lev <= nLevels; ++lev)
{
for (int lev = 0; lev <= nLevels; ++lev) {
int doLoadBalance = false;

// Compute the new distribution mapping
DistributionMapping newdm;
const amrex::Real nboxes = costs[lev]->size();
const amrex::Real nprocs = ParallelContext::NProcsSub();
const int nmax = static_cast<int>(std::ceil(nboxes/nprocs*load_balance_knapsack_factor));
const int nmax = static_cast<int>(std::ceil(nboxes / nprocs * load_balance_knapsack_factor));
// These store efficiency (meaning, the average 'cost' over all ranks,
// normalized to max cost) for current and proposed distribution mappings
amrex::Real currentEfficiency = 0.0;
amrex::Real proposedEfficiency = 0.0;

newdm = (load_balance_with_sfc)
? DistributionMapping::makeSFC(*costs[lev],
currentEfficiency, proposedEfficiency,
false,
ParallelDescriptor::IOProcessorNumber())
: DistributionMapping::makeKnapSack(*costs[lev],
currentEfficiency, proposedEfficiency,
nmax,
false,
ParallelDescriptor::IOProcessorNumber());
? DistributionMapping::makeSFC(*costs[lev],
currentEfficiency, proposedEfficiency,
false,
ParallelDescriptor::IOProcessorNumber())
: DistributionMapping::makeKnapSack(*costs[lev],
currentEfficiency, proposedEfficiency,
nmax,
false,
ParallelDescriptor::IOProcessorNumber());
// As specified in the above calls to makeSFC and makeKnapSack, the new
// distribution mapping is NOT communicated to all ranks; the loadbalanced
// dm is up-to-date only on root, and we can decide whether to broadcast
if ((load_balance_efficiency_ratio_threshold > 0.0)
&& (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber()))
{
doLoadBalance = (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency);
&& (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber())) {
doLoadBalance = (proposedEfficiency >
load_balance_efficiency_ratio_threshold * currentEfficiency);
}

ParallelDescriptor::Bcast(&doLoadBalance, 1,
ParallelDescriptor::IOProcessorNumber());

if (doLoadBalance)
{
if (doLoadBalance) {
Vector<int> pmap;
if (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber())
{
if (ParallelDescriptor::MyProc() == ParallelDescriptor::IOProcessorNumber()) {
pmap = newdm.ProcessorMap();
} else
{
} else {
pmap.resize(static_cast<std::size_t>(nboxes));
}
ParallelDescriptor::Bcast(pmap.data(), pmap.size(), ParallelDescriptor::IOProcessorNumber());

if (ParallelDescriptor::MyProc() != ParallelDescriptor::IOProcessorNumber())
{
if (ParallelDescriptor::MyProc() != ParallelDescriptor::IOProcessorNumber()) {
newdm = DistributionMapping(pmap);
}

Expand All @@ -128,6 +124,77 @@ WarpX::LoadBalance ()

loadBalancedAnyLevel = loadBalancedAnyLevel || doLoadBalance;
}

// record metrics of costs in a graph at balance steps
amrex::Graph graph;
//{
// loadBalancedAnyLevel
// currentEfficiency
// proposedEfficiency

// load balance costs
for (int lev = 0; lev <= finest_level; ++lev) {
std::string name = "costs_lev";
name.append(std::to_string(lev));
graph.addFab(*costs[lev], name, sizeof(amrex::Real));
std::vector<double> costs_local(costs[lev]->local_size());
for (int n=0; n<costs[lev]->local_size(); ++n)
{
costs_local[n] = costs[lev]->data()[n];
}
double const scaling = 1.0;
bool const available_locally = false; // not all available on one MPI rank, with respect to costs_local
graph.addNodeWeight(name, "cost_value", costs_local, scaling, available_locally);
}

// E and B filling patterns (from WarpXComm.cpp)
for (int lev = 0; lev <= finest_level; ++lev)
{
std::array<amrex::MultiFab *, 3> mf;
amrex::Periodicity period;
amrex::IntVect ng = guard_cells.ng_alloc_EB;
// no MR or fine level of MR
//if (patch_type == PatchType::fine)
//{
mf = {Efield_fp[lev][0].get(), Efield_fp[lev][1].get(), Efield_fp[lev][2].get()};
period = Geom(lev).periodicity();
//}
//else // coarse patch (part of MR)
//{
// mf = {Efield_cp[lev][0].get(), Efield_cp[lev][1].get(), Efield_cp[lev][2].get()};
// period = Geom(lev-1).periodicity();
//}
int const i = 0; // just the Ex component
const amrex::IntVect nghost = (safe_guard_cells) ? mf[i]->nGrowVect() : ng;
//FillBoundary(*mf[i], nghost, WarpX::do_single_precision_comms, period, nodal_sync);
std::string mf_name = "Efield_fp_lvl";
mf_name.append(std::to_string(lev));
double const scaling = 1.0;

graph.addFillBoundary("FillBoundaryE",
mf_name,
scaling,
*mf[i],
nghost,
period);

}

// PML comm patterns (TODO) - see PML.cpp
// with and without do_pml_in_domain
//{
//graph.addParallelCopy("PML-comm", "tmpregmf", "totpmlmf", 0.0,
// tmpregmf, totpmlmf, 0, 0, 1, IntVect(0), ngr, period);
//}

// Capture Number of Particles per Box (TODO)

// Capture Particle Comm Patterns (TODO)

std::string graph_dir_name = "comm_data_step";
graph_dir_name.append(std::to_string(istep[0]+1));
graph.print_table(graph_dir_name);

if (loadBalancedAnyLevel)
{
mypc->Redistribute();
Expand Down
2 changes: 2 additions & 0 deletions Source/WarpX.H
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
#include <AMReX_BaseFwd.H>
#include <AMReX_AmrCoreFwd.H>

#include <AMReX_Graph.H> // kngott/graphviz

#include <array>
#include <iostream>
#include <limits>
Expand Down
4 changes: 2 additions & 2 deletions cmake/dependencies/AMReX.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -238,10 +238,10 @@ set(WarpX_amrex_src ""
"Local path to AMReX source directory (preferred if set)")

# Git fetcher
set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git"
set(WarpX_amrex_repo "https://github.com/kngott/amrex.git"
CACHE STRING
"Repository URI to pull and build AMReX from if(WarpX_amrex_internal)")
set(WarpX_amrex_branch "18d0a2861d31c52c65752a1d5856f54e08699ce3"
set(WarpX_amrex_branch "kngott/graphviz"
CACHE STRING
"Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)")

Expand Down
4 changes: 2 additions & 2 deletions run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ export SETUPTOOLS_USE_DISTUTILS="stdlib"
python3 -m pip install --upgrade -r warpx/Regression/requirements.txt

# Clone AMReX and warpx-data
git clone https://github.com/AMReX-Codes/amrex.git
cd amrex && git checkout --detach 18d0a2861d31c52c65752a1d5856f54e08699ce3 && cd -
git clone https://github.com/kngott/amrex.git
cd amrex && git checkout --detach kngott/graphviz && cd -
# warpx-data contains various required data sets
git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git

Expand Down