Merge pull request #844 from effigies/fix/arraylike_dtype

effigies · web-flow · commit f1e100839e8a · 2019-12-11T08:15:59.000-05:00
FIX: Accept dtype parameter to ArrayProxy.__array__
diff --git a/Changelog b/Changelog
@@ -30,10 +30,13 @@ References like "pr/298" refer to github pull request numbers.
 
 New features
 ------------
-* ArrayProxy method ``get_scaled()`` scales data with a dtype of a
-  specified precision, promoting as necessary to avoid overflow. This
-  is to used in ``img.get_fdata()`` to control memory usage. (pr/833)
-  (CM, reviewed by Ross Markello)
+* ArrayProxy ``__array__()`` now accepts a ``dtype`` parameter, allowing
+  ``numpy.array(dataobj, dtype=...)`` calls, as well as casting directly
+  with a dtype (for example, ``numpy.float32(dataobj)``) to control the
+  output type. Scale factors (slope, intercept) are applied, but may be
+  cast to narrower types, to control memory usage. This is now the basis
+  of ``img.get_fdata()``, which will scale data in single precision if
+  the output type is ``float32``. (pr/844) (CM, reviewed by ...)
 * GiftiImage method ``agg_data()`` to return usable data arrays (pr/793)
   (Hao-Ting Wang, reviewed by CM)
 * Accept ``os.PathLike`` objects in place of filenames (pr/610) (Cameron
diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py
@@ -378,35 +378,31 @@ def get_unscaled(self):
         """
         return self._get_unscaled(slicer=())
 
-    def get_scaled(self, dtype=None):
-        """ Read data from file and apply scaling
+    def __array__(self, dtype=None):
+        """ Read data from file and apply scaling, casting to ``dtype``
 
-        The dtype of the returned array is the narrowest dtype that can
-        represent the data without overflow, and is at least as wide as
-        the dtype parameter.
+        If ``dtype`` is unspecified, the dtype of the returned array is the
+        narrowest dtype that can represent the data without overflow.
+        Generally, it is the wider of the dtypes of the slopes or intercepts.
 
-        If dtype is unspecified, it is the wider of the dtypes of the slope
-        or intercept. This will generally be determined by the parameter
-        size in the image header, and so should be consistent for a given
-        image format, but may vary across formats. Notably, these factors
-        are single-precision (32-bit) floats for NIfTI-1 and double-precision
-        (64-bit) floats for NIfTI-2.
+        The types of the scale factors will generally be determined by the
+        parameter size in the image header, and so should be consistent for a
+        given image format, but may vary across formats.
 
         Parameters
         ----------
-        dtype : numpy dtype specifier
-            A numpy dtype specifier specifying the narrowest acceptable
-            dtype.
+        dtype : numpy dtype specifier, optional
+            A numpy dtype specifier specifying the type of the returned array.
 
         Returns
         -------
         array
-            Scaled of image data of data type `dtype`.
+            Scaled image data with type `dtype`.
         """
-        return self._get_scaled(dtype=dtype, slicer=())
-
-    def __array__(self):
-        return self._get_scaled(dtype=None, slicer=())
+        arr = self._get_scaled(dtype=dtype, slicer=())
+        if dtype is not None:
+            arr = arr.astype(dtype, copy=False)
+        return arr
 
     def __getitem__(self, slicer):
         return self._get_scaled(dtype=None, slicer=slicer)
diff --git a/nibabel/dataobj_images.py b/nibabel/dataobj_images.py
@@ -10,7 +10,6 @@
 
 import numpy as np
 
-from .arrayproxy import is_proxy
 from .filebasedimages import FileBasedImage
 from .keywordonly import kw_only_meth
 from .deprecated import deprecate_with_version
@@ -351,14 +350,10 @@ def get_fdata(self, caching='fill', dtype=np.float64):
         if self._fdata_cache is not None:
             if self._fdata_cache.dtype.type == dtype.type:
                 return self._fdata_cache
-        dataobj = self._dataobj
-        # Attempt to confine data array to dtype during scaling
-        # On overflow, may still upcast
-        if is_proxy(dataobj):
-            dataobj = dataobj.get_scaled(dtype=dtype)
         # Always return requested data type
-        # For array proxies, will only copy on overflow
-        data = np.asanyarray(dataobj, dtype=dtype)
+        # For array proxies, will attempt to confine data array to dtype
+        # during scaling
+        data = np.asanyarray(self._dataobj, dtype=dtype)
         if caching == 'fill':
             self._fdata_cache = data
         return data
diff --git a/nibabel/ecat.py b/nibabel/ecat.py
@@ -689,46 +689,33 @@ def ndim(self):
     def is_proxy(self):
         return True
 
-    def __array__(self):
+    def __array__(self, dtype=None):
         ''' Read of data from file
 
         This reads ALL FRAMES into one array, can be memory expensive.
 
         If you want to read only some slices, use the slicing syntax
         (``__getitem__``) below, or ``subheader.data_from_fileobj(frame)``
-        '''
-        data = np.empty(self.shape)
-        frame_mapping = get_frame_order(self._subheader._mlist)
-        for i in sorted(frame_mapping):
-            data[:, :, :, i] = self._subheader.data_from_fileobj(
-                frame_mapping[i][0])
-        return data
-
-    def get_scaled(self, dtype=None):
-        """ Read data from file and apply scaling
-
-        The dtype of the returned array is the narrowest dtype that can
-        represent the data without overflow, and is at least as wide as
-        the dtype parameter.
-
-        If dtype is unspecified, it is automatically determined.
 
         Parameters
         ----------
-        dtype : numpy dtype specifier
-            A numpy dtype specifier specifying the narrowest acceptable
-            dtype.
+        dtype : numpy dtype specifier, optional
+            A numpy dtype specifier specifying the type of the returned array.
 
         Returns
         -------
         array
-            Scaled of image data of data type `dtype`.
-        """
-        data = self.__array__()
-        if dtype is None:
-            return data
-        final_type = np.promote_types(data.dtype, dtype)
-        return data.astype(final_type, copy=False)
+            Scaled image data with type `dtype`.
+        '''
+        # dtype=None is interpreted as float64
+        data = np.empty(self.shape)
+        frame_mapping = get_frame_order(self._subheader._mlist)
+        for i in sorted(frame_mapping):
+            data[:, :, :, i] = self._subheader.data_from_fileobj(
+                frame_mapping[i][0])
+        if dtype is not None:
+            data = data.astype(dtype, copy=False)
+        return data
 
     def __getitem__(self, sliceobj):
         """ Return slice `sliceobj` from ECAT data, optimizing if possible
diff --git a/nibabel/minc1.py b/nibabel/minc1.py
@@ -261,42 +261,29 @@ def ndim(self):
     def is_proxy(self):
         return True
 
-    def _get_scaled(self, dtype, slicer):
-        data = self.minc_file.get_scaled_data(slicer)
-        if dtype is None:
-            return data
-        final_type = np.promote_types(data.dtype, dtype)
-        return data.astype(final_type, copy=False)
-
-    def get_scaled(self, dtype=None):
-        """ Read data from file and apply scaling
+    def __array__(self, dtype=None):
+        """ Read data from file and apply scaling, casting to ``dtype``
 
-        The dtype of the returned array is the narrowest dtype that can
-        represent the data without overflow, and is at least as wide as
-        the dtype parameter.
-
-        If dtype is unspecified, it is automatically determined.
+        If ``dtype`` is unspecified, the dtype is automatically determined.
 
         Parameters
         ----------
-        dtype : numpy dtype specifier
-            A numpy dtype specifier specifying the narrowest acceptable
-            dtype.
+        dtype : numpy dtype specifier, optional
+            A numpy dtype specifier specifying the type of the returned array.
 
         Returns
         -------
         array
-            Scaled of image data of data type `dtype`.
+            Scaled image data with type `dtype`.
         """
-        return self._get_scaled(dtype=dtype, slicer=())
-
-    def __array__(self):
-        ''' Read of data from file '''
-        return self._get_scaled(dtype=None, slicer=())
+        arr = self.minc_file.get_scaled_data(sliceobj=())
+        if dtype is not None:
+            arr = arr.astype(dtype, copy=False)
+        return arr
 
     def __getitem__(self, sliceobj):
         """ Read slice `sliceobj` of data from file """
-        return self._get_scaled(dtype=None, slicer=sliceobj)
+        return self.minc_file.get_scaled_data(sliceobj)
 
 
 class MincHeader(SpatialHeader):
diff --git a/nibabel/parrec.py b/nibabel/parrec.py
@@ -676,31 +676,27 @@ def get_unscaled(self):
         """
         return self._get_unscaled(slicer=())
 
-    def get_scaled(self, dtype=None):
-        """ Read data from file and apply scaling
+    def __array__(self, dtype=None):
+        """ Read data from file and apply scaling, casting to ``dtype``
 
-        The dtype of the returned array is the narrowest dtype that can
-        represent the data without overflow, and is at least as wide as
-        the dtype parameter.
-
-        If dtype is unspecified, it is the wider of the dtypes of the slopes
-        or intercepts
+        If ``dtype`` is unspecified, the dtype of the returned array is the
+        narrowest dtype that can represent the data without overflow.
+        Generally, it is the wider of the dtypes of the slopes or intercepts.
 
         Parameters
         ----------
-        dtype : numpy dtype specifier
-            A numpy dtype specifier specifying the narrowest acceptable
-            dtype.
+        dtype : numpy dtype specifier, optional
+            A numpy dtype specifier specifying the type of the returned array.
 
         Returns
         -------
         array
-            Scaled of image data of data type `dtype`.
+            Scaled image data with type `dtype`.
         """
-        return self._get_scaled(dtype=dtype, slicer=())
-
-    def __array__(self):
-        return self._get_scaled(dtype=None, slicer=())
+        arr = self._get_scaled(dtype=dtype, slicer=())
+        if dtype is not None:
+            arr = arr.astype(dtype, copy=False)
+        return arr
 
     def __getitem__(self, slicer):
         return self._get_scaled(dtype=None, slicer=slicer)
diff --git a/nibabel/tests/test_api_validators.py b/nibabel/tests/test_api_validators.py
@@ -19,7 +19,7 @@ def meth(self):
                 for imaker, params in self.obj_params():
                     validator(self, imaker, params)
             meth.__name__ = 'test_' + name[len('validate_'):]
-            meth.__doc__ = 'autogenerated test from ' + name
+            meth.__doc__ = 'autogenerated test from {}.{}'.format(klass.__name__, name)
             return meth
         for name in dir(klass):
             if not name.startswith('validate_'):
diff --git a/nibabel/tests/test_proxy_api.py b/nibabel/tests/test_proxy_api.py
@@ -57,7 +57,7 @@
 
 from numpy.testing import assert_almost_equal, assert_array_equal, assert_allclose
 
-from ..testing import data_path as DATA_PATH, assert_dt_equal
+from ..testing import data_path as DATA_PATH, assert_dt_equal, clear_and_catch_warnings
 
 from ..tmpdirs import InTemporaryDirectory
 
@@ -132,24 +132,38 @@ def validate_asarray(self, pmaker, params):
         # Shape matches expected shape
         assert_equal(out.shape, params['shape'])
 
-    def validate_get_scaled(self, pmaker, params):
+    def validate_array_interface_with_dtype(self, pmaker, params):
         # Check proxy returns expected array from asarray
         prox, fio, hdr = pmaker()
-        out = prox.get_scaled()
-        assert_array_equal(out, params['arr_out'])
-        assert_dt_equal(out.dtype, params['dtype_out'])
-        # Shape matches expected shape
-        assert_equal(out.shape, params['shape'])
+        orig = np.array(prox, dtype=None)
+        assert_array_equal(orig, params['arr_out'])
+        assert_dt_equal(orig.dtype, params['dtype_out'])
+
+        context = None
+        if np.issubdtype(orig.dtype, np.complexfloating):
+            context = clear_and_catch_warnings()
+            context.__enter__()
+            warnings.simplefilter('ignore', np.ComplexWarning)
 
         for dtype in np.sctypes['float'] + np.sctypes['int'] + np.sctypes['uint']:
-            out = prox.get_scaled(dtype=dtype)
+            # Directly coerce with a dtype
+            direct = dtype(prox)
             # Half-precision is imprecise. Obviously. It's a bad idea, but don't break
             # the test over it.
             rtol = 1e-03 if dtype == np.float16 else 1e-05
-            assert_allclose(out, params['arr_out'].astype(out.dtype), rtol=rtol, atol=1e-08)
-            assert_greater_equal(out.dtype, np.dtype(dtype))
-            # Shape matches expected shape
-            assert_equal(out.shape, params['shape'])
+            assert_allclose(direct, orig.astype(dtype), rtol=rtol, atol=1e-08)
+            assert_dt_equal(direct.dtype, np.dtype(dtype))
+            assert_equal(direct.shape, params['shape'])
+            # All three methods should produce equivalent results
+            for arrmethod in (np.array, np.asarray, np.asanyarray):
+                out = arrmethod(prox, dtype=dtype)
+                assert_array_equal(out, direct)
+                assert_dt_equal(out.dtype, np.dtype(dtype))
+                # Shape matches expected shape
+                assert_equal(out.shape, params['shape'])
+
+        if context is not None:
+            context.__exit__()
 
     def validate_header_isolated(self, pmaker, params):
         # Confirm altering input header has no effect