Skip to content

Commit dd7ed83

Browse files
committed
Merge pull request #313 from boazmohar/readTiffPage
tiffs with npages not dividing by pageCount will give a warning instead an error
2 parents 00c19ce + 05bf7cb commit dd7ed83

File tree

3 files changed

+40
-20
lines changed

3 files changed

+40
-20
lines changed

test/test_images_io.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,16 @@ def test_from_tif_multi_planes(eng):
114114
assert [x.sum() for x in data.toarray()] == [1140006, 1119161, 1098917]
115115

116116

117+
def test_from_tif_multi_planes_discard_extra(eng):
118+
path = os.path.join(resources, 'multilayer_tif', 'dotdotdot_lzw.tif')
119+
data = fromtif(path, nplanes=2, engine=eng, discard_extra=True)
120+
assert data.shape[0] == 1
121+
assert data.shape[1] == 2
122+
with pytest.raises(BaseException) as error_msg:
123+
data = fromtif(path, nplanes=2, engine=eng, discard_extra=False)
124+
assert 'nplanes' in str(error_msg.value)
125+
126+
117127
def test_from_tif_multi_planes_many(eng):
118128
path = os.path.join(resources, 'multilayer_tif', 'dotdotdot_lzw*.tif')
119129
data = fromtif(path, nplanes=3, engine=eng)

thunder/images/readers.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import itertools
2+
import logging
23
from io import BytesIO
34
from numpy import frombuffer, prod, random, asarray, expand_dims
45

@@ -283,8 +284,8 @@ def frombinary(path, shape=None, dtype=None, ext='bin', start=None, stop=None, r
283284
raise ValueError("Last dimension '%d' must be divisible by nplanes '%d'" %
284285
(shape[-1], nplanes))
285286

286-
def getarray(idxAndBuf):
287-
idx, buf = idxAndBuf
287+
def getarray(idx_buffer_filename):
288+
idx, buf, _ = idx_buffer_filename
288289
ary = frombuffer(buf, dtype=dtype, count=int(prod(shape))).reshape(shape, order=order)
289290
if nplanes is None:
290291
yield (idx,), ary
@@ -294,17 +295,17 @@ def getarray(idxAndBuf):
294295
if shape[-1] % nplanes:
295296
npoints += 1
296297
timepoint = 0
297-
lastPlane = 0
298-
curPlane = 1
299-
while curPlane < ary.shape[-1]:
300-
if curPlane % nplanes == 0:
301-
slices = [slice(None)] * (ary.ndim - 1) + [slice(lastPlane, curPlane)]
298+
last_plane = 0
299+
current_plane = 1
300+
while current_plane < ary.shape[-1]:
301+
if current_plane % nplanes == 0:
302+
slices = [slice(None)] * (ary.ndim - 1) + [slice(last_plane, current_plane)]
302303
yield idx*npoints + timepoint, ary[slices].squeeze()
303304
timepoint += 1
304-
lastPlane = curPlane
305-
curPlane += 1
305+
last_plane = current_plane
306+
current_plane += 1
306307
# yield remaining planes
307-
slices = [slice(None)] * (ary.ndim - 1) + [slice(lastPlane, ary.shape[-1])]
308+
slices = [slice(None)] * (ary.ndim - 1) + [slice(last_plane, ary.shape[-1])]
308309
yield (idx*npoints + timepoint,), ary[slices].squeeze()
309310

310311
recount = False if nplanes is None else True
@@ -315,7 +316,7 @@ def getarray(idxAndBuf):
315316
dims=newdims, dtype=dtype, labels=labels, recount=recount,
316317
engine=engine, credentials=credentials)
317318

318-
def fromtif(path, ext='tif', start=None, stop=None, recursive=False, nplanes=None, npartitions=None, labels=None, engine=None, credentials=None):
319+
def fromtif(path, ext='tif', start=None, stop=None, recursive=False, nplanes=None, npartitions=None, labels=None, engine=None, credentials=None, discard_extra=False):
319320
"""
320321
Loads images from single or multi-page TIF files.
321322
@@ -346,29 +347,38 @@ def fromtif(path, ext='tif', start=None, stop=None, recursive=False, nplanes=Non
346347
347348
labels : array, optional, default = None
348349
Labels for records. If provided, should be one-dimensional.
350+
351+
discard_extra : boolean, optional, default = False
352+
If True and nplanes doesn't divide by the number of pages in a multi-page tiff, the reminder will
353+
be discarded and a warning will be shown. If False, it will raise an error
349354
"""
350355
import skimage.external.tifffile as tifffile
351356

352357
if nplanes is not None and nplanes <= 0:
353358
raise ValueError('nplanes must be positive if passed, got %d' % nplanes)
354359

355-
def getarray(idxAndBuf):
356-
idx, buf = idxAndBuf
360+
def getarray(idx_buffer_filename):
361+
idx, buf, fname = idx_buffer_filename
357362
fbuf = BytesIO(buf)
358363
tfh = tifffile.TiffFile(fbuf)
359364
ary = tfh.asarray()
360365
pageCount = ary.shape[0]
361366
if nplanes is not None:
362-
values = [ary[i:(i+nplanes)] for i in range(0, ary.shape[0], nplanes)]
367+
extra = pageCount % nplanes
368+
if extra:
369+
if discard_extra:
370+
pageCount = pageCount - extra
371+
logging.getLogger('thunder').warn('Ignored %d pages in file %s' % (extra, fname))
372+
else:
373+
raise ValueError("nplanes '%d' does not evenly divide '%d'" % (nplanes, pageCount))
374+
values = [ary[i:(i+nplanes)] for i in range(0, pageCount, nplanes)]
363375
else:
364376
values = [ary]
365377
tfh.close()
366378

367379
if ary.ndim == 3:
368380
values = [val.squeeze() for val in values]
369381

370-
if nplanes and (pageCount % nplanes):
371-
raise ValueError("nplanes '%d' does not evenly divide '%d'" % (nplanes, pageCount))
372382
nvals = len(values)
373383
keys = [(idx*nvals + timepoint,) for timepoint in range(nvals)]
374384
return zip(keys, values)
@@ -408,8 +418,8 @@ def frompng(path, ext='png', start=None, stop=None, recursive=False, npartitions
408418
"""
409419
from scipy.misc import imread
410420

411-
def getarray(idxAndBuf):
412-
idx, buf = idxAndBuf
421+
def getarray(idx_buffer_filename):
422+
idx, buf, _ = idx_buffer_filename
413423
fbuf = BytesIO(buf)
414424
yield (idx,), imread(fbuf)
415425

thunder/readers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,9 @@ def read(self, path, ext=None, start=None, stop=None, recursive=False, npartitio
149149
if spark and isinstance(self.engine, spark):
150150
npartitions = min(npartitions, nfiles) if npartitions else nfiles
151151
rdd = self.engine.parallelize(enumerate(files), npartitions)
152-
return rdd.map(lambda kv: (kv[0], readlocal(kv[1])))
152+
return rdd.map(lambda kv: (kv[0], readlocal(kv[1]), kv[1]))
153153
else:
154-
return [(k, readlocal(v)) for k, v in enumerate(files)]
154+
return [(k, readlocal(v), v) for k, v in enumerate(files)]
155155

156156

157157
class LocalFileReader(object):

0 commit comments

Comments
 (0)