Open
Description
Currently, the program has hardcoded max grid and block dimensions.
However, this should be dynamically pulled, to support different GPU architectures.
As an example,
from numba import cuda
device = cuda.get_current_device()
print("name = %s" % device.name)
print("maxThreadsPerBlock = %s" % str(device.MAX_THREADS_PER_BLOCK))
print("maxBlockDimX = %s" % str(device.MAX_BLOCK_DIM_X))
print("maxBlockDimY = %s" % str(device.MAX_BLOCK_DIM_Y))
print("maxBlockDimZ = %s" % str(device.MAX_BLOCK_DIM_Z))
print("maxGridDimX = %s" % str(device.MAX_GRID_DIM_X))
print("maxGridDimY = %s" % str(device.MAX_GRID_DIM_Y))
print("maxGridDimZ = %s" % str(device.MAX_GRID_DIM_Z))