Skip to content

Commit 00c4d2d

Browse files
committed
Use atomic wrappers in RAJA SPH calls, modify evalderiv test
1 parent 7399fe5 commit 00c4d2d

3 files changed

Lines changed: 60 additions & 27 deletions

File tree

src/SPH/SPH_RAJA.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -359,20 +359,20 @@ evaluateDerivativesImpl(const typename Dimension::Scalar time,
359359

360360
// Contribution to the sum density.
361361
if (nodeListi == nodeListj) {
362-
RAJA::atomicAdd<RAJA::auto_atomic>(&rhoSumi, mj*Wi);
363-
RAJA::atomicAdd<RAJA::auto_atomic>(&rhoSumj, mi*Wj);
364-
RAJA::atomicAdd<RAJA::auto_atomic>(&normi, mi/rhoi*Wi);
365-
RAJA::atomicAdd<RAJA::auto_atomic>(&normj, mj/rhoj*Wj);
362+
GPUUtils::AtomicAddOp::apply(&rhoSumi, mj*Wi);
363+
GPUUtils::AtomicAddOp::apply(&rhoSumj, mi*Wj);
364+
GPUUtils::AtomicAddOp::apply(&normi, mi/rhoi*Wi);
365+
GPUUtils::AtomicAddOp::apply(&normj, mj/rhoj*Wj);
366366
}
367367

368368
const auto Qacci = 0.5*(QPiij*gradWQi);
369369
const auto Qaccj = 0.5*(QPiji*gradWQj);
370370
const auto workQi = vij.dot(Qacci);
371371
const auto workQj = vij.dot(Qaccj);
372-
RAJA::atomicMax<RAJA::auto_atomic>(&maxViscousPressurei, Qi);
373-
RAJA::atomicMax<RAJA::auto_atomic>(&maxViscousPressurej, Qj);
374-
RAJA::atomicAdd<RAJA::auto_atomic>(&effViscousPressurei, mj*Qi*WQi/rhoj);
375-
RAJA::atomicAdd<RAJA::auto_atomic>(&effViscousPressurej, mi*Qj*WQj/rhoi);
372+
GPUUtils::AtomicMaxOp::apply(&maxViscousPressurei, Qi);
373+
GPUUtils::AtomicMaxOp::apply(&maxViscousPressurej, Qj);
374+
GPUUtils::AtomicAddOp::apply(&effViscousPressurei, mj*Qi*WQi/rhoj);
375+
GPUUtils::AtomicAddOp::apply(&effViscousPressurej, mi*Qj*WQj/rhoi);
376376

377377
// Determine an effective pressure including a term to fight the tensile instability.
378378
const auto Ri = epsTensile*FastMath::pow4(Wi/(Hdeti*WnPerh))*(Pi < 0.0 ? -Pi : 0.0);
@@ -391,8 +391,8 @@ evaluateDerivativesImpl(const typename Dimension::Scalar time,
391391
if (compatibleEnergy) pairAccelerations[kk] = -mj*deltaDvDt; // Acceleration for i (j anti-symmetric)
392392

393393
// Specific thermal energy evolution.
394-
RAJA::atomicAdd<RAJA::auto_atomic>(&DepsDti, mj*(Prhoi*vij.dot(gradWi) + workQi));
395-
RAJA::atomicAdd<RAJA::auto_atomic>(&DepsDtj, mi*(Prhoj*vij.dot(gradWj) + workQj));
394+
GPUUtils::AtomicAddOp::apply(&DepsDti, mj*(Prhoi*vij.dot(gradWi) + workQi));
395+
GPUUtils::AtomicAddOp::apply(&DepsDtj, mi*(Prhoj*vij.dot(gradWj) + workQj));
396396

397397
// Velocity gradient.
398398
const auto deltaDvDxi = mj*vij.dyad(gradWi);
@@ -407,8 +407,8 @@ evaluateDerivativesImpl(const typename Dimension::Scalar time,
407407
// Estimate of delta v (for XSPH).
408408
if (XSPH and (sameMatij)) {
409409
const auto wXSPHij = 0.5*(mi/rhoi*Wi + mj/rhoj*Wj);
410-
RAJA::atomicAdd<RAJA::auto_atomic>(&XSPHWeightSumi, wXSPHij);
411-
RAJA::atomicAdd<RAJA::auto_atomic>(&XSPHWeightSumj, wXSPHij);
410+
GPUUtils::AtomicAddOp::apply(&XSPHWeightSumi, wXSPHij);
411+
GPUUtils::AtomicAddOp::apply(&XSPHWeightSumj, wXSPHij);
412412
XSPHDeltaVi.atomicSub(wXSPHij*vij);
413413
XSPHDeltaVj.atomicAdd(wXSPHij*vij);
414414
}

src/SPH/SolidSPH_RAJA.cc

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,24 @@ evaluateDerivativesImpl(const typename Dimension::Scalar /*time*/,
248248
auto DvDxQView = DvDxQ.view();
249249
auto fClQView = fClQ.view();
250250
auto fCqQView = fCqQ.view();
251+
mass.move(chai::GPU);
252+
position.move(chai::GPU);
253+
velocity.move(chai::GPU);
254+
massDensity.move(chai::GPU);
255+
H.move(chai::GPU);
256+
pressure.move(chai::GPU);
257+
soundSpeed.move(chai::GPU);
258+
omega.move(chai::GPU);
259+
S.move(chai::GPU);
260+
mu.move(chai::GPU);
261+
specificThermalEnergy.move(chai::GPU);
262+
damage.move(chai::GPU);
263+
pTypes.move(chai::GPU);
264+
fragID.move(chai::GPU);
265+
DvDxQView.move(chai::GPU);
266+
fClQView.move(chai::GPU);
267+
fCqQView.move(chai::GPU);
268+
251269
CHECK(mass.size() == numNodeLists);
252270
CHECK(position.size() == numNodeLists);
253271
CHECK(velocity.size() == numNodeLists);
@@ -299,6 +317,21 @@ evaluateDerivativesImpl(const typename Dimension::Scalar /*time*/,
299317
auto DSDt = DSDt_v.view();
300318
auto pairAccelerations = pairAccelerations_v.view();
301319
auto rhoSumCorrection = rhoSumCorrection_v.view();
320+
rhoSum.move(chai::GPU);
321+
DxDt.move(chai::GPU);
322+
DrhoDt.move(chai::GPU);
323+
DvDt.move(chai::GPU);
324+
DepsDt.move(chai::GPU);
325+
DvDx.move(chai::GPU);
326+
localDvDx.move(chai::GPU);
327+
M.move(chai::GPU);
328+
maxViscousPressure.move(chai::GPU);
329+
effViscousPressure.move(chai::GPU);
330+
XSPHWeightSum.move(chai::GPU);
331+
XSPHDeltaV.move(chai::GPU);
332+
DSDt.move(chai::GPU);
333+
pairAccelerations.move(chai::GPU);
334+
rhoSumCorrection.move(chai::GPU);
302335
CHECK(rhoSum_v.size() == numNodeLists);
303336
CHECK(DxDt_v.size() == numNodeLists);
304337
CHECK(DrhoDt_v.size() == numNodeLists);
@@ -466,22 +499,22 @@ evaluateDerivativesImpl(const typename Dimension::Scalar /*time*/,
466499
fClQView, fCqQView, DvDxQView);
467500
// Contribution to the sum density (only if the same material).
468501
if (nodeListi == nodeListj) {
469-
RAJA::atomicAdd<RAJA::auto_atomic>(&rhoSumi, mj*Wi);
470-
RAJA::atomicAdd<RAJA::auto_atomic>(&rhoSumj, mi*Wj);
502+
GPUUtils::AtomicAddOp::apply(&rhoSumi, mj*Wi);
503+
GPUUtils::AtomicAddOp::apply(&rhoSumj, mi*Wj);
471504
}
472505

473506
// Contribution to the sum density correction
474-
RAJA::atomicAdd<RAJA::auto_atomic>(&rhoSumCorrectioni, mj*WQi / rhoj);
475-
RAJA::atomicAdd<RAJA::auto_atomic>(&rhoSumCorrectionj, mi*WQj / rhoi);
507+
GPUUtils::AtomicAddOp::apply(&rhoSumCorrectioni, mj*WQi / rhoj);
508+
GPUUtils::AtomicAddOp::apply(&rhoSumCorrectionj, mi*WQj / rhoi);
476509

477510
const auto Qacci = 0.5*(QPiij*gradWQi);
478511
const auto Qaccj = 0.5*(QPiji*gradWQj);
479512
const auto workQi = vij.dot(Qacci);
480513
const auto workQj = vij.dot(Qaccj);
481-
RAJA::atomicMax<RAJA::auto_atomic>(&maxViscousPressurei, Qi);
482-
RAJA::atomicMax<RAJA::auto_atomic>(&maxViscousPressurej, Qj);
483-
RAJA::atomicAdd<RAJA::auto_atomic>(&effViscousPressurei, mj*Qi*WQi/rhoj);
484-
RAJA::atomicAdd<RAJA::auto_atomic>(&effViscousPressurej, mi*Qj*WQj/rhoi);
514+
GPUUtils::AtomicMaxOp::apply(&maxViscousPressurei, Qi);
515+
GPUUtils::AtomicMaxOp::apply(&maxViscousPressurej, Qj);
516+
GPUUtils::AtomicAddOp::apply(&effViscousPressurei, mj*Qi*WQi/rhoj);
517+
GPUUtils::AtomicAddOp::apply(&effViscousPressurej, mi*Qj*WQj/rhoi);
485518
// Compute the stress tensors.
486519
SymTensor sigmai, sigmaj;
487520
if (sameMatij) {
@@ -516,9 +549,9 @@ evaluateDerivativesImpl(const typename Dimension::Scalar /*time*/,
516549
const auto deltaDvDxj = fDij * vij.dyad(gradWGj);
517550

518551
// Specific thermal energy evolution.
519-
RAJA::atomicSub<RAJA::auto_atomic>(&DepsDti,
552+
GPUUtils::AtomicSubOp::apply(&DepsDti,
520553
mj*(sigmarhoi.doubledot(deltaDvDxi.Symmetric()) - workQi));
521-
RAJA::atomicSub<RAJA::auto_atomic>(&DepsDtj,
554+
GPUUtils::AtomicSubOp::apply(&DepsDtj,
522555
mi*(sigmarhoj.doubledot(deltaDvDxj.Symmetric()) - workQj));
523556

524557
// Velocity gradient.
@@ -532,8 +565,8 @@ evaluateDerivativesImpl(const typename Dimension::Scalar /*time*/,
532565
// Estimate of delta v (for XSPH).
533566
if (XSPH and sameMatij) {
534567
const auto wXSPHij = 0.5*(mi/rhoi*Wi + mj/rhoj*Wj);
535-
RAJA::atomicAdd<RAJA::auto_atomic>(&XSPHWeightSumi, wXSPHij);
536-
RAJA::atomicAdd<RAJA::auto_atomic>(&XSPHWeightSumj, wXSPHij);
568+
GPUUtils::AtomicAddOp::apply(&XSPHWeightSumi, wXSPHij);
569+
GPUUtils::AtomicAddOp::apply(&XSPHWeightSumj, wXSPHij);
537570
XSPHDeltaVi.atomicSub(wXSPHij*vij);
538571
XSPHDeltaVj.atomicAdd(wXSPHij*vij);
539572
}

tests/unit/SPH/evalDerivsRun.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#ATS:dim = 3
22
#ATS:dimstr = f"{dim}d"
3-
#ATS:ntotals = [50, 60, 70]
3+
#ATS:ntotals = [50, 60, 70, 80]
44
#ATS:for nxv in ntotals:
55
#ATS: nx = int(nxv**dim)
66
#ATS: test_name = f"EVALDERIV_{nxv}"
77
#ATS: cali_name = f"{test_name}.cali"
88
#ATS: inputs = f"--raja True --ntotal {nx} --testDim {dimstr} --adiakData 'test_name: {test_name}' --caliperFilename {cali_name}"
9-
#ATS: test(SELF, label=test_name, clas=inputs, ngpu=1, np=1, nt=1, caliper_filename=cali_name)
9+
#ATS: test(SELF, label=test_name, clas=inputs, ngpu=1, np=1, nt=1, caliper_filename=cali_name, independent=False)
1010

1111
#-------------------------------------------------------------------------------
1212
# Isolated evaluateDerivatives for performance testing.
@@ -61,7 +61,7 @@
6161
initVel = False,
6262

6363
# Parameters for iterating H.
64-
iterateH = True,
64+
iterateH = False,
6565
maxHIterations = 200,
6666
Htolerance = 1.0e-4
6767
)

0 commit comments

Comments
 (0)