Skip to content

Commit bf7e98c

Browse files
Fix bug in stalling of the GPUs when running out of hitslots (#460)
This fixes a bug in the stalling of the GPU in case the hitBuffer is overflowing. Since the calculation of `nextStepMightFail` is based on unsigned int parameters, the subtraction was causing false negatives in case of wraparounds if `hitBufferSafetyFactor * maxInFlight ` was larger than ` gpuState.fHitScoring->HitCapacity() / numThread`. By changing the condition to an addition ``` bool nextStepMightFail = gpuState.stats->hitBufferOccupancy + hitBufferSafetyFactor * maxInFlight >= gpuState.fHitScoring->HitCapacity() / numThreads; ``` this is safely resolved. Furthermore, as a track can cause multiple (3) hits per track, (one for each secondary, one for the energy it deposits itself), the previous hardcoded hitBufferSafetyFactor of 1.5 was dangerous, so now it is an UI parameter. It doesn't make sense to hardcode a higher (safe) value, as it would slow down the settings where a lower factor is safe. Furthermore, two printouts that are per track are increased to a higher verbosity, as those render larger simulations useless, but other printouts (e.g., whether the hitbuffer is copied) were at the same level.
1 parent 8a5bcb7 commit bf7e98c

File tree

6 files changed

+35
-15
lines changed

6 files changed

+35
-15
lines changed

include/AdePT/core/AdePTConfiguration.hh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public:
4242
void SetMillionsOfHitSlots(double millionSlots) { fMillionsOfHitSlots = millionSlots; }
4343
void SetHitBufferFlushThreshold(float threshold) { fHitBufferFlushThreshold = threshold; }
4444
void SetCPUCapacityFactor(float CPUCapacityFactor) { fCPUCapacityFactor = CPUCapacityFactor; }
45+
void SetHitBufferSafetyFactor(double HitBufferSafetyFactor) { fHitBufferSafetyFactor = HitBufferSafetyFactor; }
4546
void SetAdePTSeed(int seed) { fAdePTSeed = static_cast<uint64_t>(seed); }
4647

4748
void SetCUDAStackLimit(int limit) { fCUDAStackLimit = limit; }
@@ -81,6 +82,7 @@ public:
8182
double GetWDTKineticEnergyLimit() { return fWDTKineticEnergyLimit; }
8283
float GetHitBufferFlushThreshold() { return fHitBufferFlushThreshold; }
8384
float GetCPUCapacityFactor() { return fCPUCapacityFactor; }
85+
double GetHitBufferSafetyFactor() { return fHitBufferSafetyFactor; }
8486
double GetMillionsOfTrackSlots() { return fMillionsOfTrackSlots; }
8587
double GetMillionsOfLeakSlots() { return fMillionsOfLeakSlots; }
8688
double GetMillionsOfHitSlots() { return fMillionsOfHitSlots; }
@@ -108,6 +110,7 @@ private:
108110
uint64_t fAdePTSeed{1234567};
109111
float fHitBufferFlushThreshold{0.8};
110112
float fCPUCapacityFactor{2.5};
113+
double fHitBufferSafetyFactor{1.5};
111114
double fMillionsOfTrackSlots{1};
112115
double fMillionsOfLeakSlots{1};
113116
double fMillionsOfHitSlots{1};

include/AdePT/core/AsyncAdePTTransport.cuh

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,8 @@ void HitProcessingLoop(HitProcessingContext *const context, GPUstate &gpuState,
912912
void TransportLoop(int trackCapacity, int leakCapacity, int scoringCapacity, int numThreads, TrackBuffer &trackBuffer,
913913
GPUstate &gpuState, std::vector<std::atomic<EventState>> &eventStates,
914914
std::condition_variable &cvG4Workers, std::vector<AdePTScoring> &scoring, int adeptSeed,
915-
int debugLevel, bool returnAllSteps, bool returnLastStep, unsigned short lastNParticlesOnCPU)
915+
int debugLevel, bool returnAllSteps, bool returnLastStep, unsigned short lastNParticlesOnCPU,
916+
const double hitBufferSafetyFactor)
916917
{
917918
// NVTXTracer tracer{"TransportLoop"};
918919

@@ -1624,9 +1625,11 @@ void TransportLoop(int trackCapacity, int leakCapacity, int scoringCapacity, int
16241625
"/adept/setMillionsOfHitSlots or reduce /run/numberOfThreads!"
16251626
<< std::endl;
16261627

1627-
// next step could fail because there are more tracks in flight than hit slots left..
1628-
bool nextStepMightFail =
1629-
gpuState.stats->hitBufferOccupancy >= gpuState.fHitScoring->HitCapacity() / numThreads - 1.5 * maxInFlight;
1628+
// next step could fail because there are too tracks in flight. A track can cause multiple hits (one for each
1629+
// secondary and one for itself). The safety factor should be as low as possible to prevent stalling, but must
1630+
// be as high as needed to avoid crashes.
1631+
bool nextStepMightFail = gpuState.stats->hitBufferOccupancy + hitBufferSafetyFactor * maxInFlight >=
1632+
gpuState.fHitScoring->HitCapacity() / numThreads;
16301633

16311634
if (!gpuState.fHitScoring->ReadyToSwapBuffers() && !nextStepMightFail) {
16321635
hitProcessing->cv.notify_one();
@@ -1777,7 +1780,7 @@ std::thread LaunchGPUWorker(int trackCapacity, int leakCapacity, int scoringCapa
17771780
TrackBuffer &trackBuffer, GPUstate &gpuState,
17781781
std::vector<std::atomic<EventState>> &eventStates, std::condition_variable &cvG4Workers,
17791782
std::vector<AdePTScoring> &scoring, int adeptSeed, int debugLevel, bool returnAllSteps,
1780-
bool returnLastStep, unsigned short lastNParticlesOnCPU)
1783+
bool returnLastStep, unsigned short lastNParticlesOnCPU, const double hitBufferSafetyFactor)
17811784
{
17821785
return std::thread{&TransportLoop,
17831786
trackCapacity,
@@ -1793,7 +1796,8 @@ std::thread LaunchGPUWorker(int trackCapacity, int leakCapacity, int scoringCapa
17931796
debugLevel,
17941797
returnAllSteps,
17951798
returnLastStep,
1796-
lastNParticlesOnCPU};
1799+
lastNParticlesOnCPU,
1800+
hitBufferSafetyFactor};
17971801
}
17981802

17991803
void FreeGPU(std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter> &gpuState, G4HepEmState &g4hepem_state,

include/AdePT/core/AsyncAdePTTransport.hh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ private:
7878
///< Filling fraction of the ScoringCapacity on host when the hits are copied out and not taken directly by the
7979
///< G4workers
8080
double fCPUCopyFraction{0.5};
81+
///< Needed to stall the GPU, in case the nPartInFlight * fHitBufferSafetyFactor > available HitSlots
82+
double fHitBufferSafetyFactor{1.5};
8183

8284
void Initialize(G4HepEmTrackingManagerSpecialized *hepEmTM);
8385
void InitBVH();

include/AdePT/core/AsyncAdePTTransport.icc

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ std::pair<GPUHit *, GPUHit *> GetGPUHitsFromBuffer(unsigned int, unsigned int, A
5656
void CloseGPUBuffer(unsigned int, AsyncAdePT::GPUstate &, GPUHit *, const bool);
5757
std::thread LaunchGPUWorker(int, int, int, int, AsyncAdePT::TrackBuffer &, AsyncAdePT::GPUstate &,
5858
std::vector<std::atomic<AsyncAdePT::EventState>> &, std::condition_variable &,
59-
std::vector<AdePTScoring> &, int, int, bool, bool, unsigned short);
59+
std::vector<AdePTScoring> &, int, int, bool, bool, unsigned short, const double);
6060
std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter> InitializeGPU(
6161
int trackCapacity, int leakCapacity, int scoringCapacity, int numThreads, AsyncAdePT::TrackBuffer &trackBuffer,
6262
std::vector<AdePTScoring> &scoring, double CPUCapacityFactor, double CPUCopyFraction);
@@ -100,7 +100,8 @@ AsyncAdePTTransport<IntegrationLayer>::AsyncAdePTTransport(AdePTConfiguration &c
100100
fReturnAllSteps{configuration.GetCallUserSteppingAction()},
101101
fReturnFirstAndLastStep{configuration.GetCallUserTrackingAction() || configuration.GetCallUserSteppingAction()},
102102
fBfieldFile{configuration.GetCovfieBfieldFile()}, fCPUCapacityFactor{configuration.GetCPUCapacityFactor()},
103-
fCPUCopyFraction{configuration.GetHitBufferFlushThreshold()}
103+
fCPUCopyFraction{configuration.GetHitBufferFlushThreshold()},
104+
fHitBufferSafetyFactor{configuration.GetHitBufferSafetyFactor()}
104105
{
105106
if (fNThread > kMaxThreads)
106107
throw std::invalid_argument("AsyncAdePTTransport limited to " + std::to_string(kMaxThreads) + " threads");
@@ -157,7 +158,7 @@ void AsyncAdePTTransport<IntegrationLayer>::AddTrack(
157158
static_cast<short>(threadId)};
158159
if (fDebugLevel >= 2) {
159160
fGPUNetEnergy[threadId] += energy;
160-
if (fDebugLevel >= 6) {
161+
if (fDebugLevel >= 8) {
161162
G4cout << "\n[_in," << eventId << "," << trackId << "]: " << track << "\tGPU net energy " << std::setprecision(6)
162163
<< fGPUNetEnergy[threadId] << G4endl;
163164
}
@@ -317,11 +318,12 @@ void AsyncAdePTTransport<IntegrationLayer>::Initialize(G4HepEmTrackingManagerSpe
317318

318319
assert(fBuffer != nullptr);
319320

320-
fGPUstate = async_adept_impl::InitializeGPU(fTrackCapacity, fLeakCapacity, fScoringCapacity, fNThread, *fBuffer,
321-
fScoring, fCPUCapacityFactor, fCPUCopyFraction);
322-
fGPUWorker = async_adept_impl::LaunchGPUWorker(
323-
fTrackCapacity, fLeakCapacity, fScoringCapacity, fNThread, *fBuffer, *fGPUstate, fEventStates, fCV_G4Workers,
324-
fScoring, fAdePTSeed, fDebugLevel, fReturnAllSteps, fReturnFirstAndLastStep, fLastNParticlesOnCPU);
321+
fGPUstate = async_adept_impl::InitializeGPU(fTrackCapacity, fLeakCapacity, fScoringCapacity, fNThread, *fBuffer,
322+
fScoring, fCPUCapacityFactor, fCPUCopyFraction);
323+
fGPUWorker =
324+
async_adept_impl::LaunchGPUWorker(fTrackCapacity, fLeakCapacity, fScoringCapacity, fNThread, *fBuffer, *fGPUstate,
325+
fEventStates, fCV_G4Workers, fScoring, fAdePTSeed, fDebugLevel, fReturnAllSteps,
326+
fReturnFirstAndLastStep, fLastNParticlesOnCPU, fHitBufferSafetyFactor);
325327
}
326328

327329
template <typename IntegrationLayer>
@@ -412,7 +414,7 @@ void AsyncAdePTTransport<IntegrationLayer>::Flush(G4int threadId, G4int eventId)
412414
for (const auto &track : tracks) {
413415

414416
fGPUNetEnergy[threadId] -= track.eKin;
415-
if (fDebugLevel >= 5) {
417+
if (fDebugLevel >= 8) {
416418
G4cout << "\n[out," << track.eventId << "," << trackId++ << "]: " << track << "\tGPU net energy "
417419
<< std::setprecision(6) << fGPUNetEnergy[threadId] << G4endl;
418420
}

include/AdePT/integration/AdePTConfigurationMessenger.hh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ private:
5656
G4UIcmdWithADouble *fSetMillionsOfHitSlotsCmd;
5757
G4UIcmdWithADouble *fSetHitBufferFlushThresholdCmd;
5858
G4UIcmdWithADouble *fSetCPUCapacityFactorCmd;
59+
G4UIcmdWithADouble *fSetHitBufferSafetyFactorCmd;
5960

6061
// Temporary method for setting the VecGeom geometry.
6162
// In the future the geometry will be converted from Geant4 rather than loaded from GDML.

src/AdePTConfigurationMessenger.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ AdePTConfigurationMessenger::AdePTConfigurationMessenger(AdePTConfiguration *ade
9292
fSetCPUCapacityFactorCmd->SetParameterName("CPUCapacityFactor", false);
9393
fSetCPUCapacityFactorCmd->SetRange("CPUCapacityFactor>=2.5");
9494

95+
fSetHitBufferSafetyFactorCmd = new G4UIcmdWithADouble("/adept/setHitBufferSafetyFactor", this);
96+
fSetHitBufferSafetyFactorCmd->SetGuidance(
97+
"Sets the HitBuffer safety factor for stalling the GPU. If nParticlesInFlight * HitBufferSafetyFactor > "
98+
"NumHitSlotsLeft, the GPU will stall. Default: 1.5 ");
99+
95100
fSetGDMLCmd = new G4UIcmdWithAString("/adept/setVecGeomGDML", this);
96101
fSetGDMLCmd->SetGuidance("Temporary method for setting the geometry to use with VecGeom");
97102

@@ -155,6 +160,7 @@ AdePTConfigurationMessenger::~AdePTConfigurationMessenger()
155160
delete fSetFinishOnCpuCmd;
156161
delete fSetSpeedOfLightCmd;
157162
delete fSetCPUCapacityFactorCmd;
163+
delete fSetHitBufferSafetyFactorCmd;
158164
delete fSetMultipleStepsInMSCWithTransportationCmd;
159165
delete fSetEnergyLossFluctuationCmd;
160166
delete fSetMaxWDTIterCmd;
@@ -202,6 +208,8 @@ void AdePTConfigurationMessenger::SetNewValue(G4UIcommand *command, G4String new
202208
fAdePTConfiguration->SetHitBufferFlushThreshold(fSetHitBufferFlushThresholdCmd->GetNewDoubleValue(newValue));
203209
} else if (command == fSetCPUCapacityFactorCmd) {
204210
fAdePTConfiguration->SetCPUCapacityFactor(fSetCPUCapacityFactorCmd->GetNewDoubleValue(newValue));
211+
} else if (command == fSetHitBufferSafetyFactorCmd) {
212+
fAdePTConfiguration->SetHitBufferSafetyFactor(fSetHitBufferSafetyFactorCmd->GetNewDoubleValue(newValue));
205213
} else if (command == fSetGDMLCmd) {
206214
fAdePTConfiguration->SetVecGeomGDML(newValue);
207215
} else if (command == fSetCovfieFileCmd) {

0 commit comments

Comments
 (0)