Skip to content

Commit b773576

Browse files
authored
Merge pull request #833 from effigies/nf/get_scaled
NF: Enable data scaling within the target dtype
2 parents 6cf363d + 8be3a0e commit b773576

9 files changed

+236
-69
lines changed

nibabel/arrayproxy.py

+63-22
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
from .deprecated import deprecate_with_version
3535
from .volumeutils import array_from_file, apply_read_scaling
36-
from .fileslice import fileslice
36+
from .fileslice import fileslice, canonical_slicers
3737
from .keywordonly import kw_only_meth
3838
from . import openers
3939

@@ -336,36 +336,77 @@ def _get_fileobj(self):
336336
self.file_like, keep_open=False) as opener:
337337
yield opener
338338

339-
def get_unscaled(self):
340-
""" Read of data from file
341-
342-
This is an optional part of the proxy API
343-
"""
344-
with self._get_fileobj() as fileobj, self._lock:
345-
raw_data = array_from_file(self._shape,
339+
def _get_unscaled(self, slicer):
340+
if canonical_slicers(slicer, self._shape, False) == \
341+
canonical_slicers((), self._shape, False):
342+
with self._get_fileobj() as fileobj, self._lock:
343+
return array_from_file(self._shape,
346344
self._dtype,
347345
fileobj,
348346
offset=self._offset,
349347
order=self.order,
350348
mmap=self._mmap)
351-
return raw_data
349+
with self._get_fileobj() as fileobj:
350+
return fileslice(fileobj,
351+
slicer,
352+
self._shape,
353+
self._dtype,
354+
self._offset,
355+
order=self.order,
356+
lock=self._lock)
357+
358+
def _get_scaled(self, dtype, slicer):
359+
# Ensure scale factors have dtypes
360+
scl_slope = np.asanyarray(self._slope)
361+
scl_inter = np.asanyarray(self._inter)
362+
use_dtype = scl_slope.dtype if dtype is None else dtype
363+
slope = scl_slope.astype(use_dtype)
364+
inter = scl_inter.astype(use_dtype)
365+
# Read array and upcast as necessary for big slopes, intercepts
366+
scaled = apply_read_scaling(self._get_unscaled(slicer=slicer), slope, inter)
367+
if dtype is not None:
368+
scaled = scaled.astype(np.promote_types(scaled.dtype, dtype), copy=False)
369+
return scaled
370+
371+
def get_unscaled(self):
372+
""" Read data from file
373+
374+
This is an optional part of the proxy API
375+
"""
376+
return self._get_unscaled(slicer=())
377+
378+
def get_scaled(self, dtype=None):
379+
""" Read data from file and apply scaling
380+
381+
The dtype of the returned array is the narrowest dtype that can
382+
represent the data without overflow, and is at least as wide as
383+
the dtype parameter.
384+
385+
If dtype is unspecified, it is the wider of the dtypes of the slope
386+
or intercept. This will generally be determined by the parameter
387+
size in the image header, and so should be consistent for a given
388+
image format, but may vary across formats. Notably, these factors
389+
are single-precision (32-bit) floats for NIfTI-1 and double-precision
390+
(64-bit) floats for NIfTI-2.
391+
392+
Parameters
393+
----------
394+
dtype : numpy dtype specifier
395+
A numpy dtype specifier specifying the narrowest acceptable
396+
dtype.
397+
398+
Returns
399+
-------
400+
array
401+
Scaled of image data of data type `dtype`.
402+
"""
403+
return self._get_scaled(dtype=dtype, slicer=())
352404

353405
def __array__(self):
354-
# Read array and scale
355-
raw_data = self.get_unscaled()
356-
return apply_read_scaling(raw_data, self._slope, self._inter)
406+
return self._get_scaled(dtype=None, slicer=())
357407

358408
def __getitem__(self, slicer):
359-
with self._get_fileobj() as fileobj:
360-
raw_data = fileslice(fileobj,
361-
slicer,
362-
self._shape,
363-
self._dtype,
364-
self._offset,
365-
order=self.order,
366-
lock=self._lock)
367-
# Upcast as necessary for big slopes, intercepts
368-
return apply_read_scaling(raw_data, self._slope, self._inter)
409+
return self._get_scaled(dtype=None, slicer=slicer)
369410

370411
def reshape(self, shape):
371412
""" Return an ArrayProxy with a new shape, without modifying data """

nibabel/brikhead.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -262,19 +262,24 @@ def __init__(self, file_like, header, mmap=True, keep_file_open=None):
262262
def scaling(self):
263263
return self._scaling
264264

265-
def __array__(self):
266-
raw_data = self.get_unscaled()
267-
# datatype may change if applying self._scaling
268-
return raw_data if self.scaling is None else raw_data * self.scaling
269-
270-
def __getitem__(self, slicer):
271-
raw_data = super(AFNIArrayProxy, self).__getitem__(slicer)
272-
# apply volume specific scaling (may change datatype!)
273-
if self.scaling is not None:
274-
fake_data = strided_scalar(self._shape)
275-
_, scaling = np.broadcast_arrays(fake_data, self.scaling)
276-
raw_data = raw_data * scaling[slicer]
277-
return raw_data
265+
def _get_scaled(self, dtype, slicer):
266+
raw_data = self._get_unscaled(slicer=slicer)
267+
if self.scaling is None:
268+
if dtype is None:
269+
return raw_data
270+
final_type = np.promote_types(raw_data.dtype, dtype)
271+
return raw_data.astype(final_type, copy=False)
272+
273+
# Broadcast scaling to shape of original data
274+
fake_data = strided_scalar(self._shape)
275+
_, scaling = np.broadcast_arrays(fake_data, self.scaling)
276+
277+
final_type = np.result_type(raw_data, scaling)
278+
if dtype is not None:
279+
final_type = np.promote_types(final_type, dtype)
280+
281+
# Slice scaling to give output shape
282+
return raw_data * scaling[slicer].astype(final_type)
278283

279284

280285
class AFNIHeader(SpatialHeader):

nibabel/dataobj_images.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import numpy as np
1212

13+
from .arrayproxy import is_proxy
1314
from .filebasedimages import FileBasedImage
1415
from .keywordonly import kw_only_meth
1516
from .deprecated import deprecate_with_version
@@ -350,7 +351,14 @@ def get_fdata(self, caching='fill', dtype=np.float64):
350351
if self._fdata_cache is not None:
351352
if self._fdata_cache.dtype.type == dtype.type:
352353
return self._fdata_cache
353-
data = np.asanyarray(self._dataobj).astype(dtype, copy=False)
354+
dataobj = self._dataobj
355+
# Attempt to confine data array to dtype during scaling
356+
# On overflow, may still upcast
357+
if is_proxy(dataobj):
358+
dataobj = dataobj.get_scaled(dtype=dtype)
359+
# Always return requested data type
360+
# For array proxies, will only copy on overflow
361+
data = np.asanyarray(dataobj, dtype=dtype)
354362
if caching == 'fill':
355363
self._fdata_cache = data
356364
return data

nibabel/ecat.py

+26
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,32 @@ def __array__(self):
704704
frame_mapping[i][0])
705705
return data
706706

707+
def get_scaled(self, dtype=None):
708+
""" Read data from file and apply scaling
709+
710+
The dtype of the returned array is the narrowest dtype that can
711+
represent the data without overflow, and is at least as wide as
712+
the dtype parameter.
713+
714+
If dtype is unspecified, it is automatically determined.
715+
716+
Parameters
717+
----------
718+
dtype : numpy dtype specifier
719+
A numpy dtype specifier specifying the narrowest acceptable
720+
dtype.
721+
722+
Returns
723+
-------
724+
array
725+
Scaled of image data of data type `dtype`.
726+
"""
727+
data = self.__array__()
728+
if dtype is None:
729+
return data
730+
final_type = np.promote_types(data.dtype, dtype)
731+
return data.astype(final_type, copy=False)
732+
707733
def __getitem__(self, sliceobj):
708734
""" Return slice `sliceobj` from ECAT data, optimizing if possible
709735
"""

nibabel/minc1.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -261,13 +261,42 @@ def ndim(self):
261261
def is_proxy(self):
262262
return True
263263

264+
def _get_scaled(self, dtype, slicer):
265+
data = self.minc_file.get_scaled_data(slicer)
266+
if dtype is None:
267+
return data
268+
final_type = np.promote_types(data.dtype, dtype)
269+
return data.astype(final_type, copy=False)
270+
271+
def get_scaled(self, dtype=None):
272+
""" Read data from file and apply scaling
273+
274+
The dtype of the returned array is the narrowest dtype that can
275+
represent the data without overflow, and is at least as wide as
276+
the dtype parameter.
277+
278+
If dtype is unspecified, it is automatically determined.
279+
280+
Parameters
281+
----------
282+
dtype : numpy dtype specifier
283+
A numpy dtype specifier specifying the narrowest acceptable
284+
dtype.
285+
286+
Returns
287+
-------
288+
array
289+
Scaled of image data of data type `dtype`.
290+
"""
291+
return self._get_scaled(dtype=dtype, slicer=())
292+
264293
def __array__(self):
265294
''' Read of data from file '''
266-
return self.minc_file.get_scaled_data()
295+
return self._get_scaled(dtype=None, slicer=())
267296

268297
def __getitem__(self, sliceobj):
269298
""" Read slice `sliceobj` of data from file """
270-
return self.minc_file.get_scaled_data(sliceobj)
299+
return self._get_scaled(dtype=None, slicer=sliceobj)
271300

272301

273302
class MincHeader(SpatialHeader):

nibabel/parrec.py

+63-24
Original file line numberDiff line numberDiff line change
@@ -633,38 +633,77 @@ def dtype(self):
633633
def is_proxy(self):
634634
return True
635635

636-
def get_unscaled(self):
637-
with ImageOpener(self.file_like) as fileobj:
638-
return _data_from_rec(fileobj, self._rec_shape, self._dtype,
639-
self._slice_indices, self._shape,
640-
mmap=self._mmap)
641-
642-
def __array__(self):
643-
with ImageOpener(self.file_like) as fileobj:
644-
return _data_from_rec(fileobj,
645-
self._rec_shape,
646-
self._dtype,
647-
self._slice_indices,
648-
self._shape,
649-
scalings=self._slice_scaling,
650-
mmap=self._mmap)
651-
652-
def __getitem__(self, slicer):
636+
def _get_unscaled(self, slicer):
653637
indices = self._slice_indices
654-
if indices[0] != 0 or np.any(np.diff(indices) != 1):
638+
if slicer == ():
639+
with ImageOpener(self.file_like) as fileobj:
640+
rec_data = array_from_file(self._rec_shape, self._dtype, fileobj, mmap=self._mmap)
641+
rec_data = rec_data[..., indices]
642+
return rec_data.reshape(self._shape, order='F')
643+
elif indices[0] != 0 or np.any(np.diff(indices) != 1):
655644
# We can't load direct from REC file, use inefficient slicing
656-
return np.asanyarray(self)[slicer]
645+
return self._get_unscaled(())[slicer]
646+
657647
# Slices all sequential from zero, can use fileslice
658648
# This gives more efficient volume by volume loading, for example
659649
with ImageOpener(self.file_like) as fileobj:
660-
raw_data = fileslice(fileobj, slicer, self._shape, self._dtype, 0,
661-
'F')
650+
return fileslice(fileobj, slicer, self._shape, self._dtype, 0, 'F')
651+
652+
def _get_scaled(self, dtype, slicer):
653+
raw_data = self._get_unscaled(slicer)
654+
if self._slice_scaling is None:
655+
if dtype is None:
656+
return raw_data
657+
final_type = np.promote_types(raw_data.dtype, dtype)
658+
return raw_data.astype(final_type, copy=False)
659+
662660
# Broadcast scaling to shape of original data
663-
slopes, inters = self._slice_scaling
664661
fake_data = strided_scalar(self._shape)
665-
_, slopes, inters = np.broadcast_arrays(fake_data, slopes, inters)
662+
_, slopes, inters = np.broadcast_arrays(fake_data, *self._slice_scaling)
663+
664+
final_type = np.result_type(raw_data, slopes, inters)
665+
if dtype is not None:
666+
final_type = np.promote_types(final_type, dtype)
667+
666668
# Slice scaling to give output shape
667-
return raw_data * slopes[slicer] + inters[slicer]
669+
return raw_data * slopes[slicer].astype(final_type) + inters[slicer].astype(final_type)
670+
671+
672+
def get_unscaled(self):
673+
""" Read data from file
674+
675+
This is an optional part of the proxy API
676+
"""
677+
return self._get_unscaled(slicer=())
678+
679+
def get_scaled(self, dtype=None):
680+
""" Read data from file and apply scaling
681+
682+
The dtype of the returned array is the narrowest dtype that can
683+
represent the data without overflow, and is at least as wide as
684+
the dtype parameter.
685+
686+
If dtype is unspecified, it is the wider of the dtypes of the slopes
687+
or intercepts
688+
689+
Parameters
690+
----------
691+
dtype : numpy dtype specifier
692+
A numpy dtype specifier specifying the narrowest acceptable
693+
dtype.
694+
695+
Returns
696+
-------
697+
array
698+
Scaled of image data of data type `dtype`.
699+
"""
700+
return self._get_scaled(dtype=dtype, slicer=())
701+
702+
def __array__(self):
703+
return self._get_scaled(dtype=None, slicer=())
704+
705+
def __getitem__(self, slicer):
706+
return self._get_scaled(dtype=None, slicer=slicer)
668707

669708

670709
class PARRECHeader(SpatialHeader):

nibabel/tests/test_image_api.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from nose import SkipTest
4545
from nose.tools import (assert_true, assert_false, assert_raises, assert_equal)
4646

47-
from numpy.testing import assert_almost_equal, assert_array_equal, assert_warns
47+
from numpy.testing import assert_almost_equal, assert_array_equal, assert_warns, assert_allclose
4848
from ..testing import clear_and_catch_warnings
4949
from ..tmpdirs import InTemporaryDirectory
5050

@@ -314,18 +314,21 @@ def _check_proxy_interface(self, imaker, meth_name):
314314
# New data dtype, no caching, doesn't use or alter cache
315315
fdata_new_dt = img.get_fdata(caching='unchanged', dtype='f4')
316316
# We get back the original read, not the modified cache
317-
assert_array_equal(fdata_new_dt, proxy_data.astype('f4'))
317+
# Allow for small rounding error when the data is scaled with 32-bit
318+
# factors, rather than 64-bit factors and then cast to float-32
319+
# Use rtol/atol from numpy.allclose
320+
assert_allclose(fdata_new_dt, proxy_data.astype('f4'), rtol=1e-05, atol=1e-08)
318321
assert_equal(fdata_new_dt.dtype, np.float32)
319322
# The original cache stays in place, for default float64
320323
assert_array_equal(img.get_fdata(), 42)
321324
# And for not-default float32, because we haven't cached
322325
fdata_new_dt[:] = 43
323326
fdata_new_dt = img.get_fdata(caching='unchanged', dtype='f4')
324-
assert_array_equal(fdata_new_dt, proxy_data.astype('f4'))
327+
assert_allclose(fdata_new_dt, proxy_data.astype('f4'), rtol=1e-05, atol=1e-08)
325328
# Until we reset with caching='fill', at which point we
326329
# drop the original float64 cache, and have a float32 cache
327330
fdata_new_dt = img.get_fdata(caching='fill', dtype='f4')
328-
assert_array_equal(fdata_new_dt, proxy_data.astype('f4'))
331+
assert_allclose(fdata_new_dt, proxy_data.astype('f4'), rtol=1e-05, atol=1e-08)
329332
# We're using the cache, for dtype='f4' reads
330333
fdata_new_dt[:] = 43
331334
assert_array_equal(img.get_fdata(dtype='f4'), 43)

0 commit comments

Comments
 (0)