Skip to content

Commit 59fe318

Browse files
makortelfwyzard
authored andcommitted
Add a flag to disable caching for the allocators (#205)
1 parent 7067416 commit 59fe318

File tree

1 file changed

+88
-55
lines changed

1 file changed

+88
-55
lines changed

HeterogeneousCore/CUDAServices/src/CUDAService.cc

+88-55
Original file line numberDiff line numberDiff line change
@@ -284,51 +284,57 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
284284

285285
// create allocator
286286
auto const& allocator = config.getUntrackedParameter<edm::ParameterSet>("allocator");
287-
auto binGrowth = allocator.getUntrackedParameter<unsigned int>("binGrowth");
288-
auto minBin = allocator.getUntrackedParameter<unsigned int>("minBin");
289-
auto maxBin = allocator.getUntrackedParameter<unsigned int>("maxBin");
290-
size_t maxCachedBytes = allocator.getUntrackedParameter<unsigned int>("maxCachedBytes");
291-
auto maxCachedFraction = allocator.getUntrackedParameter<double>("maxCachedFraction");
292-
auto debug = allocator.getUntrackedParameter<bool>("debug");
293-
294-
size_t minCachedBytes = std::numeric_limits<size_t>::max();
295-
int currentDevice;
296-
cudaCheck(cudaGetDevice(&currentDevice));
297-
for (int i = 0; i < numberOfDevices_; ++i) {
298-
size_t freeMemory, totalMemory;
299-
cudaCheck(cudaSetDevice(i));
300-
cudaCheck(cudaMemGetInfo(&freeMemory, &totalMemory));
301-
minCachedBytes = std::min(minCachedBytes, static_cast<size_t>(maxCachedFraction * freeMemory));
302-
}
303-
cudaCheck(cudaSetDevice(currentDevice));
304-
if (maxCachedBytes > 0) {
305-
minCachedBytes = std::min(minCachedBytes, maxCachedBytes);
306-
}
307-
log << "cub::CachingDeviceAllocator settings\n"
308-
<< " bin growth " << binGrowth << "\n"
309-
<< " min bin " << minBin << "\n"
310-
<< " max bin " << maxBin << "\n"
311-
<< " resulting bins:\n";
312-
for (auto bin = minBin; bin <= maxBin; ++bin) {
313-
auto binSize = notcub::CachingDeviceAllocator::IntPow(binGrowth, bin);
314-
if (binSize >= (1<<30) and binSize % (1<<30) == 0) {
315-
log << " " << std::setw(8) << (binSize >> 30) << " GB\n";
316-
} else if (binSize >= (1<<20) and binSize % (1<<20) == 0) {
317-
log << " " << std::setw(8) << (binSize >> 20) << " MB\n";
318-
} else if (binSize >= (1<<10) and binSize % (1<<10) == 0) {
319-
log << " " << std::setw(8) << (binSize >> 10) << " kB\n";
320-
} else {
321-
log << " " << std::setw(9) << binSize << " B\n";
287+
auto allocatorEnabled = allocator.getUntrackedParameter<bool>("enabled");
288+
if(allocatorEnabled) {
289+
auto binGrowth = allocator.getUntrackedParameter<unsigned int>("binGrowth");
290+
auto minBin = allocator.getUntrackedParameter<unsigned int>("minBin");
291+
auto maxBin = allocator.getUntrackedParameter<unsigned int>("maxBin");
292+
size_t maxCachedBytes = allocator.getUntrackedParameter<unsigned int>("maxCachedBytes");
293+
auto maxCachedFraction = allocator.getUntrackedParameter<double>("maxCachedFraction");
294+
auto debug = allocator.getUntrackedParameter<bool>("debug");
295+
296+
size_t minCachedBytes = std::numeric_limits<size_t>::max();
297+
int currentDevice;
298+
cudaCheck(cudaGetDevice(&currentDevice));
299+
for (int i = 0; i < numberOfDevices_; ++i) {
300+
size_t freeMemory, totalMemory;
301+
cudaCheck(cudaSetDevice(i));
302+
cudaCheck(cudaMemGetInfo(&freeMemory, &totalMemory));
303+
minCachedBytes = std::min(minCachedBytes, static_cast<size_t>(maxCachedFraction * freeMemory));
304+
}
305+
cudaCheck(cudaSetDevice(currentDevice));
306+
if (maxCachedBytes > 0) {
307+
minCachedBytes = std::min(minCachedBytes, maxCachedBytes);
322308
}
309+
log << "cub::CachingDeviceAllocator settings\n"
310+
<< " bin growth " << binGrowth << "\n"
311+
<< " min bin " << minBin << "\n"
312+
<< " max bin " << maxBin << "\n"
313+
<< " resulting bins:\n";
314+
for (auto bin = minBin; bin <= maxBin; ++bin) {
315+
auto binSize = notcub::CachingDeviceAllocator::IntPow(binGrowth, bin);
316+
if (binSize >= (1<<30) and binSize % (1<<30) == 0) {
317+
log << " " << std::setw(8) << (binSize >> 30) << " GB\n";
318+
} else if (binSize >= (1<<20) and binSize % (1<<20) == 0) {
319+
log << " " << std::setw(8) << (binSize >> 20) << " MB\n";
320+
} else if (binSize >= (1<<10) and binSize % (1<<10) == 0) {
321+
log << " " << std::setw(8) << (binSize >> 10) << " kB\n";
322+
} else {
323+
log << " " << std::setw(9) << binSize << " B\n";
324+
}
325+
}
326+
log << " maximum amount of cached memory: " << (minCachedBytes >> 20) << " MB\n";
327+
328+
allocator_ = std::make_unique<Allocator>(notcub::CachingDeviceAllocator::IntPow(binGrowth, maxBin),
329+
binGrowth, minBin, maxBin, minCachedBytes,
330+
false, // do not skip cleanup
331+
debug
332+
);
333+
log << "\n";
334+
}
335+
else {
336+
log << "cub::CachingDeviceAllocator disabled\n";
323337
}
324-
log << " maximum amount of cached memory: " << (minCachedBytes >> 20) << " MB\n";
325-
326-
allocator_ = std::make_unique<Allocator>(notcub::CachingDeviceAllocator::IntPow(binGrowth, maxBin),
327-
binGrowth, minBin, maxBin, minCachedBytes,
328-
false, // do not skip cleanup
329-
debug
330-
);
331-
log << "\n";
332338

333339
log << "CUDAService fully initialized";
334340
enabled_ = true;
@@ -341,7 +347,9 @@ CUDAService::CUDAService(edm::ParameterSet const& config, edm::ActivityRegistry&
341347
CUDAService::~CUDAService() {
342348
if (enabled_) {
343349
// Explicitly destruct the allocator before the device resets below
344-
allocator_.reset();
350+
if(allocator_) {
351+
allocator_.reset();
352+
}
345353

346354
for (int i = 0; i < numberOfDevices_; ++i) {
347355
cudaCheck(cudaSetDevice(i));
@@ -368,6 +376,7 @@ void CUDAService::fillDescriptions(edm::ConfigurationDescriptions & descriptions
368376
desc.addUntracked<edm::ParameterSetDescription>("limits", limits)->setComment("See the documentation of cudaDeviceSetLimit for more information.\nSetting any of these options to -1 keeps the default value.");
369377

370378
edm::ParameterSetDescription allocator;
379+
allocator.addUntracked<bool>("enabled", true)->setComment("Enable caching. Setting this to false means to call cudaMalloc for each allocation etc.");
371380
allocator.addUntracked<unsigned int>("binGrowth", 8)->setComment("Growth factor (bin_growth in cub::CachingDeviceAllocator");
372381
allocator.addUntracked<unsigned int>("minBin", 1)->setComment("Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CacingDeviceAllocator");
373382
allocator.addUntracked<unsigned int>("maxBin", 9)->setComment("Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail.");
@@ -431,29 +440,53 @@ struct CUDAService::Allocator {
431440
};
432441

433442
void *CUDAService::allocate_device(int dev, size_t nbytes, cuda::stream_t<>& stream) {
434-
if(nbytes > allocator_->maxAllocation) {
435-
throw std::runtime_error("Tried to allocate "+std::to_string(nbytes)+" bytes, but the allocator maximum is "+std::to_string(allocator_->maxAllocation));
436-
}
437-
438443
void *ptr = nullptr;
439-
cuda::throw_if_error(allocator_->deviceAllocator.DeviceAllocate(dev, &ptr, nbytes, stream.id()));
444+
if(allocator_) {
445+
if(nbytes > allocator_->maxAllocation) {
446+
throw std::runtime_error("Tried to allocate "+std::to_string(nbytes)+" bytes, but the allocator maximum is "+std::to_string(allocator_->maxAllocation));
447+
}
448+
449+
cuda::throw_if_error(allocator_->deviceAllocator.DeviceAllocate(dev, &ptr, nbytes, stream.id()));
450+
}
451+
else {
452+
cuda::device::current::scoped_override_t<> setDeviceForThisScope(dev);
453+
cuda::throw_if_error(cudaMalloc(&ptr, nbytes));
454+
}
440455
return ptr;
441456
}
442457

443458
void CUDAService::free_device(int device, void *ptr) {
444-
cuda::throw_if_error(allocator_->deviceAllocator.DeviceFree(device, ptr));
459+
if(allocator_) {
460+
cuda::throw_if_error(allocator_->deviceAllocator.DeviceFree(device, ptr));
461+
}
462+
else {
463+
cuda::device::current::scoped_override_t<> setDeviceForThisScope(device);
464+
cuda::throw_if_error(cudaFree(ptr));
465+
}
445466
}
446467

447468
void *CUDAService::allocate_host(size_t nbytes, cuda::stream_t<>& stream) {
448-
if(nbytes > allocator_->maxAllocation) {
449-
throw std::runtime_error("Tried to allocate "+std::to_string(nbytes)+" bytes, but the allocator maximum is "+std::to_string(allocator_->maxAllocation));
469+
void *ptr = nullptr;
470+
471+
if(allocator_) {
472+
if(nbytes > allocator_->maxAllocation) {
473+
throw std::runtime_error("Tried to allocate "+std::to_string(nbytes)+" bytes, but the allocator maximum is "+std::to_string(allocator_->maxAllocation));
474+
}
475+
476+
cuda::throw_if_error(allocator_->hostAllocator.HostAllocate(&ptr, nbytes, stream.id()));
477+
}
478+
else {
479+
cuda::throw_if_error(cudaMallocHost(&ptr, nbytes));
450480
}
451481

452-
void *ptr = nullptr;
453-
cuda::throw_if_error(allocator_->hostAllocator.HostAllocate(&ptr, nbytes, stream.id()));
454482
return ptr;
455483
}
456484

457485
void CUDAService::free_host(void *ptr) {
458-
cuda::throw_if_error(allocator_->hostAllocator.HostFree(ptr));
486+
if(allocator_) {
487+
cuda::throw_if_error(allocator_->hostAllocator.HostFree(ptr));
488+
}
489+
else {
490+
cuda::throw_if_error(cudaFreeHost(ptr));
491+
}
459492
}

0 commit comments

Comments
 (0)