|
| 1 | +import math |
| 2 | +import os |
| 3 | + |
| 4 | +import kvikio |
| 5 | +import kvikio.defaults |
| 6 | +import numpy as np |
| 7 | +from cupyx.profiler import benchmark |
| 8 | +from tifffile import TiffFile |
| 9 | + |
| 10 | +from demo_implementation import read_openslide, read_tifffile, read_tiled, get_n_tiles, get_tile_buffers |
| 11 | + |
| 12 | +data_dir = os.environ.get('WHOLE_SLIDE_DATA_DIR', os.path.dirname('__file__')) |
| 13 | +fname = os.path.join(data_dir, 'resize.tiff') |
| 14 | +if not os.path.exists(fname): |
| 15 | + raise RuntimeError(f"Could not find data file: {fname}") |
| 16 | + |
| 17 | +level = 0 |
| 18 | +max_duration = 8 |
| 19 | + |
| 20 | +with TiffFile(fname) as tif: |
| 21 | + page = tif.pages[level] |
| 22 | + page_shape = page.shape |
| 23 | + tile_shape = (page.tilelength, page.tilewidth, page.samplesperpixel) |
| 24 | + total_tiles = math.prod(get_n_tiles(page)) |
| 25 | +print(f"Resolution level {level}\n") |
| 26 | +print(f"\tshape: {page_shape}") |
| 27 | +print(f"\tstored as {total_tiles} tiles of shape {tile_shape}") |
| 28 | + |
| 29 | +# make sure we are not in compatibility mode to ensure cuFile is being used |
| 30 | +# (when compat_mode() is True, POSIX will be used instead of libcufile.so) |
| 31 | +kvikio.defaults.compat_mode_reset(False) |
| 32 | +assert not kvikio.defaults.compat_mode() |
| 33 | + |
| 34 | +# set the number of threads to use |
| 35 | +kvikio.defaults.num_threads_reset(16) |
| 36 | + |
| 37 | + |
| 38 | +print(f"\t{kvikio.defaults.compat_mode() = }") |
| 39 | +print(f"\t{kvikio.defaults.get_num_threads() = }") |
| 40 | + |
| 41 | +preregister_buffers = False |
| 42 | +if preregister_buffers: |
| 43 | + tile_buffers = get_tile_buffers(fname, level, n_buffer=256) |
| 44 | + for b in tile_buffers: |
| 45 | + kvikio.memory_register(b) |
| 46 | +else: |
| 47 | + tile_buffers = None |
| 48 | + |
| 49 | +# print(f"\tkvikio task size = {kvikio.defaults.task_size()/1024**2} MB") |
| 50 | + |
| 51 | +times = [] |
| 52 | +labels = [] |
| 53 | +perf_openslide = benchmark( |
| 54 | + read_openslide, |
| 55 | + (fname, level), |
| 56 | + n_warmup=0, |
| 57 | + n_repeat=100, |
| 58 | + max_duration=max_duration |
| 59 | +) |
| 60 | +times.append(perf_openslide.gpu_times.mean()) |
| 61 | +labels.append('openslide') |
| 62 | +print(f"duration ({labels[-1]}) = {times[-1]}") |
| 63 | + |
| 64 | +perf_tifffile = benchmark( |
| 65 | + read_tifffile, |
| 66 | + (fname, level), |
| 67 | + n_warmup=0, |
| 68 | + n_repeat=100, |
| 69 | + max_duration=max_duration |
| 70 | +) |
| 71 | +times.append(perf_tifffile.gpu_times.mean()) |
| 72 | +labels.append('tifffile') |
| 73 | +print(f"duration ({labels[-1]}) = {times[-1]}") |
| 74 | + |
| 75 | +for gds_enabled in [False, True]: |
| 76 | + kvikio.defaults.compat_mode_reset(not gds_enabled) |
| 77 | + assert kvikio.defaults.compat_mode() == (not gds_enabled) |
| 78 | + |
| 79 | + p = benchmark( |
| 80 | + read_tiled, |
| 81 | + (fname, [level]), |
| 82 | + kwargs=dict(backend='kvikio-raw_read', tile_buffers=tile_buffers), |
| 83 | + n_warmup=1, |
| 84 | + n_repeat=100, |
| 85 | + max_duration=max_duration, |
| 86 | + ) |
| 87 | + if gds_enabled: |
| 88 | + perf_kvikio_raw = p |
| 89 | + else: |
| 90 | + perf_kvikio_raw_nogds = p |
| 91 | + times.append(p.gpu_times.mean()) |
| 92 | + labels.append(f"kvikio-read_raw ({gds_enabled=})") |
| 93 | + print(f"duration ({labels[-1]}) = {times[-1]}") |
| 94 | + |
| 95 | + for mm in [8, 16, 32, 64]: |
| 96 | + kvikio.defaults.task_size_reset(4096 * mm) |
| 97 | + |
| 98 | + p = benchmark( |
| 99 | + read_tiled, |
| 100 | + (fname, [level]), |
| 101 | + kwargs=dict(backend='kvikio-read', tile_buffers=tile_buffers), |
| 102 | + n_warmup=1, |
| 103 | + n_repeat=100, |
| 104 | + max_duration=max_duration, |
| 105 | + ) |
| 106 | + if gds_enabled: |
| 107 | + perf_kvikio_read = p |
| 108 | + else: |
| 109 | + perf_kvikio_read_nogds = p |
| 110 | + times.append(p.gpu_times.mean()) |
| 111 | + labels.append( |
| 112 | + f"kvikio-read (task size={kvikio.defaults.task_size() // 1024} kB)" |
| 113 | + f" ({gds_enabled=})" |
| 114 | + ) |
| 115 | + print(f"duration ({labels[-1]}) = {times[-1]}") |
| 116 | + |
| 117 | + # Go back to 4MB task size in pread case |
| 118 | + kvikio.defaults.task_size_reset(512 * 1024) |
| 119 | + if gds_enabled: |
| 120 | + perf_kvikio_pread = [] |
| 121 | + else: |
| 122 | + perf_kvikio_pread_nogds = [] |
| 123 | + n_buffers = [1, 4, 16, 64, 256] |
| 124 | + for n_buffer in n_buffers: |
| 125 | + p = benchmark( |
| 126 | + read_tiled, |
| 127 | + (fname, [level]), |
| 128 | + kwargs=dict(backend='kvikio-pread', |
| 129 | + n_buffer=n_buffer, |
| 130 | + tile_buffers=tile_buffers), |
| 131 | + n_warmup=1, |
| 132 | + n_repeat=100, |
| 133 | + max_duration=max_duration |
| 134 | + ) |
| 135 | + if gds_enabled: |
| 136 | + perf_kvikio_pread.append(p) |
| 137 | + else: |
| 138 | + perf_kvikio_pread_nogds.append(p) |
| 139 | + times.append(p.gpu_times.mean()) |
| 140 | + labels.append(f"kvikio-pread ({n_buffer=}) ({gds_enabled=})") |
| 141 | + print(f"duration ({labels[-1]}) = {times[-1]}") |
| 142 | + |
| 143 | +if preregister_buffers: |
| 144 | + for b in tile_buffers: |
| 145 | + kvikio.memory_deregister(b) |
| 146 | + |
| 147 | +kvikio.defaults.compat_mode_reset(False) |
| 148 | + |
| 149 | +out_name = 'read_times.npz' |
| 150 | +# auto-increment filename to avoid overwriting old results |
| 151 | +cnt = 1 |
| 152 | +while os.path.exists(out_name): |
| 153 | + out_name = f'read_times{cnt}.npz' |
| 154 | + cnt += 1 |
| 155 | +np.savez(out_name, times=np.asarray(times), labels=np.asarray(labels)) |
| 156 | + |
| 157 | + |
| 158 | +""" |
| 159 | +Resolution level 0 with Cache clearing, but reads are not 4096-byte aligned |
| 160 | +
|
| 161 | + shape: (26420, 19920, 3) |
| 162 | + stored as 2028 tiles of shape (512, 512, 3) |
| 163 | + kvikio.defaults.compat_mode() = False |
| 164 | + kvikio.defaults.get_num_threads() = 18 |
| 165 | + kvikio task size = 4.0 MB |
| 166 | +duration (openslide) = 28.921716796875 |
| 167 | +duration (tifffile) = 3.818202718098958 |
| 168 | +duration (tiled-tifffile) = 3.885939778645833 |
| 169 | +duration (kvikio-read_raw (gds_enabled=False)) = 3.4184929199218748 |
| 170 | +duration (kvikio-read (gds_enabled=False)) = 3.813303955078125 |
| 171 | +duration (kvikio-pread (n_buffer=1) (gds_enabled=False)) = 3.9369333496093746 |
| 172 | +duration (kvikio-pread (n_buffer=2) (gds_enabled=False)) = 4.028409342447917 |
| 173 | +duration (kvikio-pread (n_buffer=4) (gds_enabled=False)) = 2.785054626464844 |
| 174 | +duration (kvikio-pread (n_buffer=8) (gds_enabled=False)) = 1.7379150390625 |
| 175 | +duration (kvikio-pread (n_buffer=16) (gds_enabled=False)) = 1.2908187103271485 |
| 176 | +duration (kvikio-pread (n_buffer=32) (gds_enabled=False)) = 1.0635023193359374 |
| 177 | +duration (kvikio-pread (n_buffer=64) (gds_enabled=False)) = 0.9369119762073862 |
| 178 | +duration (kvikio-pread (n_buffer=128) (gds_enabled=False)) = 0.8773154449462891 |
| 179 | +duration (kvikio-read_raw (gds_enabled=True)) = 3.4003018391927085 |
| 180 | +duration (kvikio-read (gds_enabled=True)) = 3.763134847005208 |
| 181 | +duration (kvikio-pread (n_buffer=1) (gds_enabled=True)) = 3.7581602376302086 |
| 182 | +duration (kvikio-pread (n_buffer=2) (gds_enabled=True)) = 4.107709065755208 |
| 183 | +duration (kvikio-pread (n_buffer=4) (gds_enabled=True)) = 2.609207336425781 |
| 184 | +duration (kvikio-pread (n_buffer=8) (gds_enabled=True)) = 1.744682902018229 |
| 185 | +duration (kvikio-pread (n_buffer=16) (gds_enabled=True)) = 1.2838030700683594 |
| 186 | +duration (kvikio-pread (n_buffer=32) (gds_enabled=True)) = 1.05522587890625 |
| 187 | +duration (kvikio-pread (n_buffer=64) (gds_enabled=True)) = 0.9214399691495029 |
| 188 | +duration (kvikio-pread (n_buffer=128) (gds_enabled=True)) = 0.8695069885253907 |
| 189 | +
|
| 190 | +
|
| 191 | +Resolution level 0 with 4096-byte aligned reads |
| 192 | +
|
| 193 | + shape: (26420, 19920, 3) |
| 194 | + stored as 2028 tiles of shape (512, 512, 3) |
| 195 | + kvikio.defaults.compat_mode() = False |
| 196 | + kvikio.defaults.get_num_threads() = 18 |
| 197 | + kvikio task size = 4.0 MB |
| 198 | +duration (kvikio-read_raw (gds_enabled=False)) = 3.4100815429687494 |
| 199 | +duration (kvikio-read (gds_enabled=False)) = 3.8238279622395837 |
| 200 | +duration (kvikio-pread (n_buffer=1) (gds_enabled=False)) = 3.740669270833333 |
| 201 | +duration (kvikio-pread (n_buffer=4) (gds_enabled=False)) = 2.672812255859375 |
| 202 | +duration (kvikio-pread (n_buffer=16) (gds_enabled=False)) = 1.3131573791503905 |
| 203 | +duration (kvikio-pread (n_buffer=64) (gds_enabled=False)) = 0.9273524225408379 |
| 204 | +duration (kvikio-pread (n_buffer=256) (gds_enabled=False)) = 0.8461123250325521 |
| 205 | +duration (kvikio-read_raw (gds_enabled=True)) = 4.179492513020834 |
| 206 | +duration (kvikio-read (gds_enabled=True)) = 4.889711263020834 |
| 207 | +duration (kvikio-pread (n_buffer=1) (gds_enabled=True)) = 4.816523600260417 |
| 208 | +duration (kvikio-pread (n_buffer=4) (gds_enabled=True)) = 2.2351694824218753 |
| 209 | +duration (kvikio-pread (n_buffer=16) (gds_enabled=True)) = 1.1082978149414064 |
| 210 | +duration (kvikio-pread (n_buffer=64) (gds_enabled=True)) = 0.670870166015625 |
| 211 | +duration (kvikio-pread (n_buffer=256) (gds_enabled=True)) = 0.5998859683766086 |
| 212 | +
|
| 213 | +
|
| 214 | + pread with default 4MB "task size" |
| 215 | + Resolution level 0 |
| 216 | + shape: (26420, 19920, 3) |
| 217 | + stored as 2028 tiles of shape (512, 512, 3) |
| 218 | + kvikio.defaults.compat_mode() = False |
| 219 | + kvikio.defaults.get_num_threads() = 18 |
| 220 | + kvikio task size = 4 MB |
| 221 | + duration (kvikio-pread (n_buffer=1) (gds_enabled=True)) = 4.8583107096354174 |
| 222 | + duration (kvikio-pread (n_buffer=4) (gds_enabled=True)) = 2.1224323242187504 |
| 223 | + duration (kvikio-pread (n_buffer=16) (gds_enabled=True)) = 1.1164629991319446 |
| 224 | + duration (kvikio-pread (n_buffer=64) (gds_enabled=True)) = 0.6734547526041668 |
| 225 | + duration (kvikio-pread (n_buffer=256) (gds_enabled=True)) = 0.601566697064568 |
| 226 | + (cucim) grelee@grelee-dt:~/Dropbox/NVIDIA/demos/gds/gds-cucim-demo$ python benchmark_read.py |
| 227 | + Resolution level 0 |
| 228 | +
|
| 229 | + pread with 64kB "task size" |
| 230 | + shape: (26420, 19920, 3) |
| 231 | + stored as 2028 tiles of shape (512, 512, 3) |
| 232 | + kvikio.defaults.compat_mode() = False |
| 233 | + kvikio.defaults.get_num_threads() = 18 |
| 234 | + kvikio task size = 0.064 MB |
| 235 | + duration (kvikio-pread (n_buffer=1) (gds_enabled=True)) = 3.0912179565429687 |
| 236 | + duration (kvikio-pread (n_buffer=4) (gds_enabled=True)) = 1.3932305145263673 |
| 237 | + duration (kvikio-pread (n_buffer=16) (gds_enabled=True)) = 0.9027577819824221 |
| 238 | + duration (kvikio-pread (n_buffer=64) (gds_enabled=True)) = 0.7827104492187501 |
| 239 | + duration (kvikio-pread (n_buffer=256) (gds_enabled=True)) = 0.756464599609375 |
| 240 | +""" |
0 commit comments