@@ -284,51 +284,57 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
284284
285285 // create allocator
286286 auto const & allocator = config.getUntrackedParameter <edm::ParameterSet>(" allocator" );
287- auto binGrowth = allocator.getUntrackedParameter <unsigned int >(" binGrowth" );
288- auto minBin = allocator.getUntrackedParameter <unsigned int >(" minBin" );
289- auto maxBin = allocator.getUntrackedParameter <unsigned int >(" maxBin" );
290- size_t maxCachedBytes = allocator.getUntrackedParameter <unsigned int >(" maxCachedBytes" );
291- auto maxCachedFraction = allocator.getUntrackedParameter <double >(" maxCachedFraction" );
292- auto debug = allocator.getUntrackedParameter <bool >(" debug" );
293-
294- size_t minCachedBytes = std::numeric_limits<size_t >::max ();
295- int currentDevice;
296- cudaCheck (cudaGetDevice (¤tDevice));
297- for (int i = 0 ; i < numberOfDevices_; ++i) {
298- size_t freeMemory, totalMemory;
299- cudaCheck (cudaSetDevice (i));
300- cudaCheck (cudaMemGetInfo (&freeMemory, &totalMemory));
301- minCachedBytes = std::min (minCachedBytes, static_cast <size_t >(maxCachedFraction * freeMemory));
302- }
303- cudaCheck (cudaSetDevice (currentDevice));
304- if (maxCachedBytes > 0 ) {
305- minCachedBytes = std::min (minCachedBytes, maxCachedBytes);
306- }
307- log << " cub::CachingDeviceAllocator settings\n "
308- << " bin growth " << binGrowth << " \n "
309- << " min bin " << minBin << " \n "
310- << " max bin " << maxBin << " \n "
311- << " resulting bins:\n " ;
312- for (auto bin = minBin; bin <= maxBin; ++bin) {
313- auto binSize = notcub::CachingDeviceAllocator::IntPow (binGrowth, bin);
314- if (binSize >= (1 <<30 ) and binSize % (1 <<30 ) == 0 ) {
315- log << " " << std::setw (8 ) << (binSize >> 30 ) << " GB\n " ;
316- } else if (binSize >= (1 <<20 ) and binSize % (1 <<20 ) == 0 ) {
317- log << " " << std::setw (8 ) << (binSize >> 20 ) << " MB\n " ;
318- } else if (binSize >= (1 <<10 ) and binSize % (1 <<10 ) == 0 ) {
319- log << " " << std::setw (8 ) << (binSize >> 10 ) << " kB\n " ;
320- } else {
321- log << " " << std::setw (9 ) << binSize << " B\n " ;
287+ auto allocatorEnabled = allocator.getUntrackedParameter <bool >(" enabled" );
288+ if (allocatorEnabled) {
289+ auto binGrowth = allocator.getUntrackedParameter <unsigned int >(" binGrowth" );
290+ auto minBin = allocator.getUntrackedParameter <unsigned int >(" minBin" );
291+ auto maxBin = allocator.getUntrackedParameter <unsigned int >(" maxBin" );
292+ size_t maxCachedBytes = allocator.getUntrackedParameter <unsigned int >(" maxCachedBytes" );
293+ auto maxCachedFraction = allocator.getUntrackedParameter <double >(" maxCachedFraction" );
294+ auto debug = allocator.getUntrackedParameter <bool >(" debug" );
295+
296+ size_t minCachedBytes = std::numeric_limits<size_t >::max ();
297+ int currentDevice;
298+ cudaCheck (cudaGetDevice (¤tDevice));
299+ for (int i = 0 ; i < numberOfDevices_; ++i) {
300+ size_t freeMemory, totalMemory;
301+ cudaCheck (cudaSetDevice (i));
302+ cudaCheck (cudaMemGetInfo (&freeMemory, &totalMemory));
303+ minCachedBytes = std::min (minCachedBytes, static_cast <size_t >(maxCachedFraction * freeMemory));
304+ }
305+ cudaCheck (cudaSetDevice (currentDevice));
306+ if (maxCachedBytes > 0 ) {
307+ minCachedBytes = std::min (minCachedBytes, maxCachedBytes);
322308 }
309+ log << " cub::CachingDeviceAllocator settings\n "
310+ << " bin growth " << binGrowth << " \n "
311+ << " min bin " << minBin << " \n "
312+ << " max bin " << maxBin << " \n "
313+ << " resulting bins:\n " ;
314+ for (auto bin = minBin; bin <= maxBin; ++bin) {
315+ auto binSize = notcub::CachingDeviceAllocator::IntPow (binGrowth, bin);
316+ if (binSize >= (1 <<30 ) and binSize % (1 <<30 ) == 0 ) {
317+ log << " " << std::setw (8 ) << (binSize >> 30 ) << " GB\n " ;
318+ } else if (binSize >= (1 <<20 ) and binSize % (1 <<20 ) == 0 ) {
319+ log << " " << std::setw (8 ) << (binSize >> 20 ) << " MB\n " ;
320+ } else if (binSize >= (1 <<10 ) and binSize % (1 <<10 ) == 0 ) {
321+ log << " " << std::setw (8 ) << (binSize >> 10 ) << " kB\n " ;
322+ } else {
323+ log << " " << std::setw (9 ) << binSize << " B\n " ;
324+ }
325+ }
326+ log << " maximum amount of cached memory: " << (minCachedBytes >> 20 ) << " MB\n " ;
327+
328+ allocator_ = std::make_unique<Allocator>(notcub::CachingDeviceAllocator::IntPow (binGrowth, maxBin),
329+ binGrowth, minBin, maxBin, minCachedBytes,
330+ false , // do not skip cleanup
331+ debug
332+ );
333+ log << " \n " ;
334+ }
335+ else {
336+ log << " cub::CachingDeviceAllocator disabled\n " ;
323337 }
324- log << " maximum amount of cached memory: " << (minCachedBytes >> 20 ) << " MB\n " ;
325-
326- allocator_ = std::make_unique<Allocator>(notcub::CachingDeviceAllocator::IntPow (binGrowth, maxBin),
327- binGrowth, minBin, maxBin, minCachedBytes,
328- false , // do not skip cleanup
329- debug
330- );
331- log << " \n " ;
332338
333339 log << " CUDAService fully initialized" ;
334340 enabled_ = true ;
@@ -341,7 +347,9 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
341347CUDAService::~CUDAService () {
342348 if (enabled_) {
343349 // Explicitly destruct the allocator before the device resets below
344- allocator_.reset ();
350+ if (allocator_) {
351+ allocator_.reset ();
352+ }
345353
346354 for (int i = 0 ; i < numberOfDevices_; ++i) {
347355 cudaCheck (cudaSetDevice (i));
@@ -368,6 +376,7 @@ void CUDAService::fillDescriptions(edm::ConfigurationDescriptions & descriptions
368376 desc.addUntracked <edm::ParameterSetDescription>(" limits" , limits)->setComment (" See the documentation of cudaDeviceSetLimit for more information.\n Setting any of these options to -1 keeps the default value." );
369377
370378 edm::ParameterSetDescription allocator;
379+ allocator.addUntracked <bool >(" enabled" , true )->setComment (" Enable caching. Setting this to false means to call cudaMalloc for each allocation etc." );
371380 allocator.addUntracked <unsigned int >(" binGrowth" , 8 )->setComment (" Growth factor (bin_growth in cub::CachingDeviceAllocator" );
372381 allocator.addUntracked <unsigned int >(" minBin" , 1 )->setComment (" Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CacingDeviceAllocator" );
373382 allocator.addUntracked <unsigned int >(" maxBin" , 9 )->setComment (" Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail." );
@@ -431,29 +440,53 @@ struct CUDAService::Allocator {
431440};
432441
433442void *CUDAService::allocate_device (int dev, size_t nbytes, cuda::stream_t <>& stream) {
434- if (nbytes > allocator_->maxAllocation ) {
435- throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
436- }
437-
438443 void *ptr = nullptr ;
439- cuda::throw_if_error (allocator_->deviceAllocator .DeviceAllocate (dev, &ptr, nbytes, stream.id ()));
444+ if (allocator_) {
445+ if (nbytes > allocator_->maxAllocation ) {
446+ throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
447+ }
448+
449+ cuda::throw_if_error (allocator_->deviceAllocator .DeviceAllocate (dev, &ptr, nbytes, stream.id ()));
450+ }
451+ else {
452+ cuda::device::current::scoped_override_t <> setDeviceForThisScope (dev);
453+ cuda::throw_if_error (cudaMalloc (&ptr, nbytes));
454+ }
440455 return ptr;
441456}
442457
443458void CUDAService::free_device (int device, void *ptr) {
444- cuda::throw_if_error (allocator_->deviceAllocator .DeviceFree (device, ptr));
459+ if (allocator_) {
460+ cuda::throw_if_error (allocator_->deviceAllocator .DeviceFree (device, ptr));
461+ }
462+ else {
463+ cuda::device::current::scoped_override_t <> setDeviceForThisScope (device);
464+ cuda::throw_if_error (cudaFree (ptr));
465+ }
445466}
446467
447468void *CUDAService::allocate_host (size_t nbytes, cuda::stream_t <>& stream) {
448- if (nbytes > allocator_->maxAllocation ) {
449- throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
469+ void *ptr = nullptr ;
470+
471+ if (allocator_) {
472+ if (nbytes > allocator_->maxAllocation ) {
473+ throw std::runtime_error (" Tried to allocate " +std::to_string (nbytes)+" bytes, but the allocator maximum is " +std::to_string (allocator_->maxAllocation ));
474+ }
475+
476+ cuda::throw_if_error (allocator_->hostAllocator .HostAllocate (&ptr, nbytes, stream.id ()));
477+ }
478+ else {
479+ cuda::throw_if_error (cudaMallocHost (&ptr, nbytes));
450480 }
451481
452- void *ptr = nullptr ;
453- cuda::throw_if_error (allocator_->hostAllocator .HostAllocate (&ptr, nbytes, stream.id ()));
454482 return ptr;
455483}
456484
457485void CUDAService::free_host (void *ptr) {
458- cuda::throw_if_error (allocator_->hostAllocator .HostFree (ptr));
486+ if (allocator_) {
487+ cuda::throw_if_error (allocator_->hostAllocator .HostFree (ptr));
488+ }
489+ else {
490+ cuda::throw_if_error (cudaFreeHost (ptr));
491+ }
459492}
0 commit comments