Merge pull request #833 from effigies/nf/get_scaled

effigies · web-flow · commit b773576d15ee · 2019-11-15T12:10:45.000-05:00
NF: Enable data scaling within the target dtype
diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py
@@ -33,7 +33,7 @@
 
 from .deprecated import deprecate_with_version
 from .volumeutils import array_from_file, apply_read_scaling
-from .fileslice import fileslice
+from .fileslice import fileslice, canonical_slicers
 from .keywordonly import kw_only_meth
 from . import openers
 
@@ -336,36 +336,77 @@ def _get_fileobj(self):
                     self.file_like, keep_open=False) as opener:
                 yield opener
 
-    def get_unscaled(self):
-        """ Read of data from file
-
-        This is an optional part of the proxy API
-        """
-        with self._get_fileobj() as fileobj, self._lock:
-            raw_data = array_from_file(self._shape,
+    def _get_unscaled(self, slicer):
+        if canonical_slicers(slicer, self._shape, False) == \
+                canonical_slicers((), self._shape, False):
+            with self._get_fileobj() as fileobj, self._lock:
+                return array_from_file(self._shape,
                                        self._dtype,
                                        fileobj,
                                        offset=self._offset,
                                        order=self.order,
                                        mmap=self._mmap)
-        return raw_data
+        with self._get_fileobj() as fileobj:
+            return fileslice(fileobj,
+                             slicer,
+                             self._shape,
+                             self._dtype,
+                             self._offset,
+                             order=self.order,
+                             lock=self._lock)
+
+    def _get_scaled(self, dtype, slicer):
+        # Ensure scale factors have dtypes
+        scl_slope = np.asanyarray(self._slope)
+        scl_inter = np.asanyarray(self._inter)
+        use_dtype = scl_slope.dtype if dtype is None else dtype
+        slope = scl_slope.astype(use_dtype)
+        inter = scl_inter.astype(use_dtype)
+        # Read array and upcast as necessary for big slopes, intercepts
+        scaled = apply_read_scaling(self._get_unscaled(slicer=slicer), slope, inter)
+        if dtype is not None:
+            scaled = scaled.astype(np.promote_types(scaled.dtype, dtype), copy=False)
+        return scaled
+
+    def get_unscaled(self):
+        """ Read data from file
+
+        This is an optional part of the proxy API
+        """
+        return self._get_unscaled(slicer=())
+
+    def get_scaled(self, dtype=None):
+        """ Read data from file and apply scaling
+
+        The dtype of the returned array is the narrowest dtype that can
+        represent the data without overflow, and is at least as wide as
+        the dtype parameter.
+
+        If dtype is unspecified, it is the wider of the dtypes of the slope
+        or intercept. This will generally be determined by the parameter
+        size in the image header, and so should be consistent for a given
+        image format, but may vary across formats. Notably, these factors
+        are single-precision (32-bit) floats for NIfTI-1 and double-precision
+        (64-bit) floats for NIfTI-2.
+
+        Parameters
+        ----------
+        dtype : numpy dtype specifier
+            A numpy dtype specifier specifying the narrowest acceptable
+            dtype.
+
+        Returns
+        -------
+        array
+            Scaled of image data of data type `dtype`.
+        """
+        return self._get_scaled(dtype=dtype, slicer=())
 
     def __array__(self):
-        # Read array and scale
-        raw_data = self.get_unscaled()
-        return apply_read_scaling(raw_data, self._slope, self._inter)
+        return self._get_scaled(dtype=None, slicer=())
 
     def __getitem__(self, slicer):
-        with self._get_fileobj() as fileobj:
-            raw_data = fileslice(fileobj,
-                                 slicer,
-                                 self._shape,
-                                 self._dtype,
-                                 self._offset,
-                                 order=self.order,
-                                 lock=self._lock)
-        # Upcast as necessary for big slopes, intercepts
-        return apply_read_scaling(raw_data, self._slope, self._inter)
+        return self._get_scaled(dtype=None, slicer=slicer)
 
     def reshape(self, shape):
         """ Return an ArrayProxy with a new shape, without modifying data """
diff --git a/nibabel/brikhead.py b/nibabel/brikhead.py
@@ -262,19 +262,24 @@ def __init__(self, file_like, header, mmap=True, keep_file_open=None):
     def scaling(self):
         return self._scaling
 
-    def __array__(self):
-        raw_data = self.get_unscaled()
-        # datatype may change if applying self._scaling
-        return raw_data if self.scaling is None else raw_data * self.scaling
-
-    def __getitem__(self, slicer):
-        raw_data = super(AFNIArrayProxy, self).__getitem__(slicer)
-        # apply volume specific scaling (may change datatype!)
-        if self.scaling is not None:
-            fake_data = strided_scalar(self._shape)
-            _, scaling = np.broadcast_arrays(fake_data, self.scaling)
-            raw_data = raw_data * scaling[slicer]
-        return raw_data
+    def _get_scaled(self, dtype, slicer):
+        raw_data = self._get_unscaled(slicer=slicer)
+        if self.scaling is None:
+            if dtype is None:
+                return raw_data
+            final_type = np.promote_types(raw_data.dtype, dtype)
+            return raw_data.astype(final_type, copy=False)
+
+        # Broadcast scaling to shape of original data
+        fake_data = strided_scalar(self._shape)
+        _, scaling = np.broadcast_arrays(fake_data, self.scaling)
+
+        final_type = np.result_type(raw_data, scaling)
+        if dtype is not None:
+            final_type = np.promote_types(final_type, dtype)
+
+        # Slice scaling to give output shape
+        return raw_data * scaling[slicer].astype(final_type)
 
 
 class AFNIHeader(SpatialHeader):
diff --git a/nibabel/dataobj_images.py b/nibabel/dataobj_images.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 
+from .arrayproxy import is_proxy
 from .filebasedimages import FileBasedImage
 from .keywordonly import kw_only_meth
 from .deprecated import deprecate_with_version
@@ -350,7 +351,14 @@ def get_fdata(self, caching='fill', dtype=np.float64):
         if self._fdata_cache is not None:
             if self._fdata_cache.dtype.type == dtype.type:
                 return self._fdata_cache
-        data = np.asanyarray(self._dataobj).astype(dtype, copy=False)
+        dataobj = self._dataobj
+        # Attempt to confine data array to dtype during scaling
+        # On overflow, may still upcast
+        if is_proxy(dataobj):
+            dataobj = dataobj.get_scaled(dtype=dtype)
+        # Always return requested data type
+        # For array proxies, will only copy on overflow
+        data = np.asanyarray(dataobj, dtype=dtype)
         if caching == 'fill':
             self._fdata_cache = data
         return data
diff --git a/nibabel/ecat.py b/nibabel/ecat.py
@@ -704,6 +704,32 @@ def __array__(self):
                 frame_mapping[i][0])
         return data
 
+    def get_scaled(self, dtype=None):
+        """ Read data from file and apply scaling
+
+        The dtype of the returned array is the narrowest dtype that can
+        represent the data without overflow, and is at least as wide as
+        the dtype parameter.
+
+        If dtype is unspecified, it is automatically determined.
+
+        Parameters
+        ----------
+        dtype : numpy dtype specifier
+            A numpy dtype specifier specifying the narrowest acceptable
+            dtype.
+
+        Returns
+        -------
+        array
+            Scaled of image data of data type `dtype`.
+        """
+        data = self.__array__()
+        if dtype is None:
+            return data
+        final_type = np.promote_types(data.dtype, dtype)
+        return data.astype(final_type, copy=False)
+
     def __getitem__(self, sliceobj):
         """ Return slice `sliceobj` from ECAT data, optimizing if possible
         """
diff --git a/nibabel/minc1.py b/nibabel/minc1.py
@@ -261,13 +261,42 @@ def ndim(self):
     def is_proxy(self):
         return True
 
+    def _get_scaled(self, dtype, slicer):
+        data = self.minc_file.get_scaled_data(slicer)
+        if dtype is None:
+            return data
+        final_type = np.promote_types(data.dtype, dtype)
+        return data.astype(final_type, copy=False)
+
+    def get_scaled(self, dtype=None):
+        """ Read data from file and apply scaling
+
+        The dtype of the returned array is the narrowest dtype that can
+        represent the data without overflow, and is at least as wide as
+        the dtype parameter.
+
+        If dtype is unspecified, it is automatically determined.
+
+        Parameters
+        ----------
+        dtype : numpy dtype specifier
+            A numpy dtype specifier specifying the narrowest acceptable
+            dtype.
+
+        Returns
+        -------
+        array
+            Scaled of image data of data type `dtype`.
+        """
+        return self._get_scaled(dtype=dtype, slicer=())
+
     def __array__(self):
         ''' Read of data from file '''
-        return self.minc_file.get_scaled_data()
+        return self._get_scaled(dtype=None, slicer=())
 
     def __getitem__(self, sliceobj):
         """ Read slice `sliceobj` of data from file """
-        return self.minc_file.get_scaled_data(sliceobj)
+        return self._get_scaled(dtype=None, slicer=sliceobj)
 
 
 class MincHeader(SpatialHeader):
diff --git a/nibabel/parrec.py b/nibabel/parrec.py
@@ -633,38 +633,77 @@ def dtype(self):
     def is_proxy(self):
         return True
 
-    def get_unscaled(self):
-        with ImageOpener(self.file_like) as fileobj:
-            return _data_from_rec(fileobj, self._rec_shape, self._dtype,
-                                  self._slice_indices, self._shape,
-                                  mmap=self._mmap)
-
-    def __array__(self):
-        with ImageOpener(self.file_like) as fileobj:
-            return _data_from_rec(fileobj,
-                                  self._rec_shape,
-                                  self._dtype,
-                                  self._slice_indices,
-                                  self._shape,
-                                  scalings=self._slice_scaling,
-                                  mmap=self._mmap)
-
-    def __getitem__(self, slicer):
+    def _get_unscaled(self, slicer):
         indices = self._slice_indices
-        if indices[0] != 0 or np.any(np.diff(indices) != 1):
+        if slicer == ():
+            with ImageOpener(self.file_like) as fileobj:
+                rec_data = array_from_file(self._rec_shape, self._dtype, fileobj, mmap=self._mmap)
+                rec_data = rec_data[..., indices]
+                return rec_data.reshape(self._shape, order='F')
+        elif indices[0] != 0 or np.any(np.diff(indices) != 1):
             # We can't load direct from REC file, use inefficient slicing
-            return np.asanyarray(self)[slicer]
+            return self._get_unscaled(())[slicer]
+
         # Slices all sequential from zero, can use fileslice
         # This gives more efficient volume by volume loading, for example
         with ImageOpener(self.file_like) as fileobj:
-            raw_data = fileslice(fileobj, slicer, self._shape, self._dtype, 0,
-                                 'F')
+            return fileslice(fileobj, slicer, self._shape, self._dtype, 0, 'F')
+
+    def _get_scaled(self, dtype, slicer):
+        raw_data = self._get_unscaled(slicer)
+        if self._slice_scaling is None:
+            if dtype is None:
+                return raw_data
+            final_type = np.promote_types(raw_data.dtype, dtype)
+            return raw_data.astype(final_type, copy=False)
+
         # Broadcast scaling to shape of original data
-        slopes, inters = self._slice_scaling
         fake_data = strided_scalar(self._shape)
-        _, slopes, inters = np.broadcast_arrays(fake_data, slopes, inters)
+        _, slopes, inters = np.broadcast_arrays(fake_data, *self._slice_scaling)
+
+        final_type = np.result_type(raw_data, slopes, inters)
+        if dtype is not None:
+            final_type = np.promote_types(final_type, dtype)
+
         # Slice scaling to give output shape
-        return raw_data * slopes[slicer] + inters[slicer]
+        return raw_data * slopes[slicer].astype(final_type) + inters[slicer].astype(final_type)
+
+
+    def get_unscaled(self):
+        """ Read data from file
+
+        This is an optional part of the proxy API
+        """
+        return self._get_unscaled(slicer=())
+
+    def get_scaled(self, dtype=None):
+        """ Read data from file and apply scaling
+
+        The dtype of the returned array is the narrowest dtype that can
+        represent the data without overflow, and is at least as wide as
+        the dtype parameter.
+
+        If dtype is unspecified, it is the wider of the dtypes of the slopes
+        or intercepts
+
+        Parameters
+        ----------
+        dtype : numpy dtype specifier
+            A numpy dtype specifier specifying the narrowest acceptable
+            dtype.
+
+        Returns
+        -------
+        array
+            Scaled of image data of data type `dtype`.
+        """
+        return self._get_scaled(dtype=dtype, slicer=())
+
+    def __array__(self):
+        return self._get_scaled(dtype=None, slicer=())
+
+    def __getitem__(self, slicer):
+        return self._get_scaled(dtype=None, slicer=slicer)
 
 
 class PARRECHeader(SpatialHeader):
diff --git a/nibabel/tests/test_image_api.py b/nibabel/tests/test_image_api.py
@@ -44,7 +44,7 @@
 from nose import SkipTest
 from nose.tools import (assert_true, assert_false, assert_raises, assert_equal)
 
-from numpy.testing import assert_almost_equal, assert_array_equal, assert_warns
+from numpy.testing import assert_almost_equal, assert_array_equal, assert_warns, assert_allclose
 from ..testing import clear_and_catch_warnings
 from ..tmpdirs import InTemporaryDirectory
 
@@ -314,18 +314,21 @@ def _check_proxy_interface(self, imaker, meth_name):
         # New data dtype, no caching, doesn't use or alter cache
         fdata_new_dt = img.get_fdata(caching='unchanged', dtype='f4')
         # We get back the original read, not the modified cache
-        assert_array_equal(fdata_new_dt, proxy_data.astype('f4'))
+        # Allow for small rounding error when the data is scaled with 32-bit
+        # factors, rather than 64-bit factors and then cast to float-32
+        # Use rtol/atol from numpy.allclose
+        assert_allclose(fdata_new_dt, proxy_data.astype('f4'), rtol=1e-05, atol=1e-08)
         assert_equal(fdata_new_dt.dtype, np.float32)
         # The original cache stays in place, for default float64
         assert_array_equal(img.get_fdata(), 42)
         # And for not-default float32, because we haven't cached
         fdata_new_dt[:] = 43
         fdata_new_dt = img.get_fdata(caching='unchanged', dtype='f4')
-        assert_array_equal(fdata_new_dt, proxy_data.astype('f4'))
+        assert_allclose(fdata_new_dt, proxy_data.astype('f4'), rtol=1e-05, atol=1e-08)
         # Until we reset with caching='fill', at which point we
         # drop the original float64 cache, and have a float32 cache
         fdata_new_dt = img.get_fdata(caching='fill', dtype='f4')
-        assert_array_equal(fdata_new_dt, proxy_data.astype('f4'))
+        assert_allclose(fdata_new_dt, proxy_data.astype('f4'), rtol=1e-05, atol=1e-08)
         # We're using the cache, for dtype='f4' reads
         fdata_new_dt[:] = 43
         assert_array_equal(img.get_fdata(dtype='f4'), 43)
diff --git a/nibabel/tests/test_proxy_api.py b/nibabel/tests/test_proxy_api.py
diff --git a/nibabel/tests/test_spatialimages.py b/nibabel/tests/test_spatialimages.py