@@ -284,51 +284,57 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
284
284
285
285
// create allocator
286
286
auto const & allocator = config.getUntrackedParameter <edm::ParameterSet>(" allocator" );
287
- auto binGrowth = allocator.getUntrackedParameter <unsigned int >(" binGrowth" );
288
- auto minBin = allocator.getUntrackedParameter <unsigned int >(" minBin" );
289
- auto maxBin = allocator.getUntrackedParameter <unsigned int >(" maxBin" );
290
- size_t maxCachedBytes = allocator.getUntrackedParameter <unsigned int >(" maxCachedBytes" );
291
- auto maxCachedFraction = allocator.getUntrackedParameter <double >(" maxCachedFraction" );
292
- auto debug = allocator.getUntrackedParameter <bool >(" debug" );
293
-
294
- size_t minCachedBytes = std::numeric_limits<size_t >::max ();
295
- int currentDevice;
296
- cudaCheck (cudaGetDevice (¤tDevice));
297
- for (int i = 0 ; i < numberOfDevices_; ++i) {
298
- size_t freeMemory, totalMemory;
299
- cudaCheck (cudaSetDevice (i));
300
- cudaCheck (cudaMemGetInfo (&freeMemory, &totalMemory));
301
- minCachedBytes = std::min (minCachedBytes, static_cast <size_t >(maxCachedFraction * freeMemory));
302
- }
303
- cudaCheck (cudaSetDevice (currentDevice));
304
- if (maxCachedBytes > 0 ) {
305
- minCachedBytes = std::min (minCachedBytes, maxCachedBytes);
306
- }
307
- log << " cub::CachingDeviceAllocator settings\n "
308
- << " bin growth " << binGrowth << " \n "
309
- << " min bin " << minBin << " \n "
310
- << " max bin " << maxBin << " \n "
311
- << " resulting bins:\n " ;
312
- for (auto bin = minBin; bin <= maxBin; ++bin) {
313
- auto binSize = notcub::CachingDeviceAllocator::IntPow (binGrowth, bin);
314
- if (binSize >= (1 <<30 ) and binSize % (1 <<30 ) == 0 ) {
315
- log << " " << std::setw (8 ) << (binSize >> 30 ) << " GB\n " ;
316
- } else if (binSize >= (1 <<20 ) and binSize % (1 <<20 ) == 0 ) {
317
- log << " " << std::setw (8 ) << (binSize >> 20 ) << " MB\n " ;
318
- } else if (binSize >= (1 <<10 ) and binSize % (1 <<10 ) == 0 ) {
319
- log << " " << std::setw (8 ) << (binSize >> 10 ) << " kB\n " ;
320
- } else {
321
- log << " " << std::setw (9 ) << binSize << " B\n " ;
287
+ auto allocatorEnabled = allocator.getUntrackedParameter <bool >(" enabled" );
288
+ if (allocatorEnabled) {
289
+ auto binGrowth = allocator.getUntrackedParameter <unsigned int >(" binGrowth" );
290
+ auto minBin = allocator.getUntrackedParameter <unsigned int >(" minBin" );
291
+ auto maxBin = allocator.getUntrackedParameter <unsigned int >(" maxBin" );
292
+ size_t maxCachedBytes = allocator.getUntrackedParameter <unsigned int >(" maxCachedBytes" );
293
+ auto maxCachedFraction = allocator.getUntrackedParameter <double >(" maxCachedFraction" );
294
+ auto debug = allocator.getUntrackedParameter <bool >(" debug" );
295
+
296
+ size_t minCachedBytes = std::numeric_limits<size_t >::max ();
297
+ int currentDevice;
298
+ cudaCheck (cudaGetDevice (¤tDevice));
299
+ for (int i = 0 ; i < numberOfDevices_; ++i) {
300
+ size_t freeMemory, totalMemory;
301
+ cudaCheck (cudaSetDevice (i));
302
+ cudaCheck (cudaMemGetInfo (&freeMemory, &totalMemory));
303
+ minCachedBytes = std::min (minCachedBytes, static_cast <size_t >(maxCachedFraction * freeMemory));
304
+ }
305
+ cudaCheck (cudaSetDevice (currentDevice));
306
+ if (maxCachedBytes > 0 ) {
307
+ minCachedBytes = std::min (minCachedBytes, maxCachedBytes);
322
308
}
309
+ log << " cub::CachingDeviceAllocator settings\n "
310
+ << " bin growth " << binGrowth << " \n "
311
+ << " min bin " << minBin << " \n "
312
+ << " max bin " << maxBin << " \n "
313
+ << " resulting bins:\n " ;
314
+ for (auto bin = minBin; bin <= maxBin; ++bin) {
315
+ auto binSize = notcub::CachingDeviceAllocator::IntPow (binGrowth, bin);
316
+ if (binSize >= (1 <<30 ) and binSize % (1 <<30 ) == 0 ) {
317
+ log << " " << std::setw (8 ) << (binSize >> 30 ) << " GB\n " ;
318
+ } else if (binSize >= (1 <<20 ) and binSize % (1 <<20 ) == 0 ) {
319
+ log << " " << std::setw (8 ) << (binSize >> 20 ) << " MB\n " ;
320
+ } else if (binSize >= (1 <<10 ) and binSize % (1 <<10 ) == 0 ) {
321
+ log << " " << std::setw (8 ) << (binSize >> 10 ) << " kB\n " ;
322
+ } else {
323
+ log << " " << std::setw (9 ) << binSize << " B\n " ;
324
+ }
325
+ }
326
+ log << " maximum amount of cached memory: " << (minCachedBytes >> 20 ) << " MB\n " ;
327
+
328
+ allocator_ = std::make_unique<Allocator>(notcub::CachingDeviceAllocator::IntPow (binGrowth, maxBin),
329
+ binGrowth, minBin, maxBin, minCachedBytes,
330
+ false , // do not skip cleanup
331
+ debug
332
+ );
333
+ log << " \n " ;
334
+ }
335
+ else {
336
+ log << " cub::CachingDeviceAllocator disabled\n " ;
323
337
}
324
- log << " maximum amount of cached memory: " << (minCachedBytes >> 20 ) << " MB\n " ;
325
-
326
- allocator_ = std::make_unique<Allocator>(notcub::CachingDeviceAllocator::IntPow (binGrowth, maxBin),
327
- binGrowth, minBin, maxBin, minCachedBytes,
328
- false , // do not skip cleanup
329
- debug
330
- );
331
- log << " \n " ;
332
338
333
339
log << " CUDAService fully initialized" ;
334
340
enabled_ = true ;
@@ -341,7 +347,9 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
341
347
CUDAService::~CUDAService () {
342
348
if (enabled_) {
343
349
// Explicitly destruct the allocator before the device resets below
344
- allocator_.reset ();
350
+ if (allocator_) {
351
+ allocator_.reset ();
352
+ }
345
353
346
354
for (int i = 0 ; i < numberOfDevices_; ++i) {
347
355
cudaCheck (cudaSetDevice (i));
@@ -368,6 +376,7 @@ void CUDAService::fillDescriptions(edm::ConfigurationDescriptions & descriptions
368
376
desc.addUntracked <edm::ParameterSetDescription>(" limits" , limits)->setComment (" See the documentation of cudaDeviceSetLimit for more information.\n Setting any of these options to -1 keeps the default value." );
369
377
370
378
edm::ParameterSetDescription allocator;
379
+ allocator.addUntracked <bool >(" enabled" , true )->setComment (" Enable caching. Setting this to false means to call cudaMalloc for each allocation etc." );
371
380
allocator.addUntracked <unsigned int >(" binGrowth" , 8 )->setComment (" Growth factor (bin_growth in cub::CachingDeviceAllocator" );
372
381
allocator.addUntracked <unsigned int >(" minBin" , 1 )->setComment (" Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CacingDeviceAllocator" );
373
382
allocator.addUntracked <unsigned int >(" maxBin" , 9 )->setComment (" Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail." );
@@ -431,29 +440,53 @@ struct CUDAService::Allocator {
431
440
};
432
441
433
442
void *CUDAService::allocate_device (int dev, size_t nbytes, cuda::stream_t <>& stream) {
434
- if (nbytes > allocator_->maxAllocation ) {
435
- throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
436
- }
437
-
438
443
void *ptr = nullptr ;
439
- cuda::throw_if_error (allocator_->deviceAllocator .DeviceAllocate (dev, &ptr, nbytes, stream.id ()));
444
+ if (allocator_) {
445
+ if (nbytes > allocator_->maxAllocation ) {
446
+ throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
447
+ }
448
+
449
+ cuda::throw_if_error (allocator_->deviceAllocator .DeviceAllocate (dev, &ptr, nbytes, stream.id ()));
450
+ }
451
+ else {
452
+ cuda::device::current::scoped_override_t <> setDeviceForThisScope (dev);
453
+ cuda::throw_if_error (cudaMalloc (&ptr, nbytes));
454
+ }
440
455
return ptr;
441
456
}
442
457
443
458
void CUDAService::free_device (int device, void *ptr) {
444
- cuda::throw_if_error (allocator_->deviceAllocator .DeviceFree (device, ptr));
459
+ if (allocator_) {
460
+ cuda::throw_if_error (allocator_->deviceAllocator .DeviceFree (device, ptr));
461
+ }
462
+ else {
463
+ cuda::device::current::scoped_override_t <> setDeviceForThisScope (device);
464
+ cuda::throw_if_error (cudaFree (ptr));
465
+ }
445
466
}
446
467
447
468
void *CUDAService::allocate_host (size_t nbytes, cuda::stream_t <>& stream) {
448
- if (nbytes > allocator_->maxAllocation ) {
449
- throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
469
+ void *ptr = nullptr ;
470
+
471
+ if (allocator_) {
472
+ if (nbytes > allocator_->maxAllocation ) {
473
+ throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
474
+ }
475
+
476
+ cuda::throw_if_error (allocator_->hostAllocator .HostAllocate (&ptr, nbytes, stream.id ()));
477
+ }
478
+ else {
479
+ cuda::throw_if_error (cudaMallocHost (&ptr, nbytes));
450
480
}
451
481
452
- void *ptr = nullptr ;
453
- cuda::throw_if_error (allocator_->hostAllocator .HostAllocate (&ptr, nbytes, stream.id ()));
454
482
return ptr;
455
483
}
456
484
457
485
void CUDAService::free_host (void *ptr) {
458
- cuda::throw_if_error (allocator_->hostAllocator .HostFree (ptr));
486
+ if (allocator_) {
487
+ cuda::throw_if_error (allocator_->hostAllocator .HostFree (ptr));
488
+ }
489
+ else {
490
+ cuda::throw_if_error (cudaFreeHost (ptr));
491
+ }
459
492
}
0 commit comments