diff --git a/test/test_images.py b/test/test_images.py index 6660b4dc..36dc26a6 100644 --- a/test/test_images.py +++ b/test/test_images.py @@ -1,7 +1,8 @@ import pytest -from numpy import arange, allclose, array, mean, apply_along_axis, float64 +from numpy import arange, allclose, array, mean, apply_along_axis, float64, \ + nanmean, nan, nansum, nanvar, nanmin, nanmax, nanstd -from thunder.images.readers import fromlist, fromarray +from thunder.images.readers import fromlist from thunder.images.images import Images from thunder.series.series import Series @@ -33,6 +34,7 @@ def test_sample(eng): assert allclose(data.sample(1).shape, (1, 2, 2)) assert allclose(data.filter(lambda x: x.max() > 5).sample(1).toarray(), [[1, 10], [1, 10]]) + def test_labels(eng): x = arange(10).reshape(10, 1, 1) data = fromlist(x, labels=range(10), engine=eng) @@ -159,6 +161,60 @@ def test_mean(eng): assert allclose(data.mean().toarray(), original.mean(axis=0)) +def test_nanmean(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmean().shape, (1, 3, 4)) + assert allclose(data.nanmean().toarray(), nanmean(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmean().shape, (1, 3, 4)) + assert allclose(data.nanmean().toarray(), nanmean(original, axis=0)) + + +def test_min(eng): + original = arange(24).reshape((2, 3, 4)) + data = fromlist(list(original), engine=eng) + assert allclose(data.min().shape, (1, 3, 4)) + assert allclose(data.min().toarray(), original.min(axis=0)) + + +def test_nanmin(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmin().shape, (1, 3, 4)) + assert allclose(data.nanmin().toarray(), original.min(axis=0)) + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmin().shape, (1, 3, 4)) + assert allclose(data.nanmin().toarray(), nanmin(original, axis=0)) + + +def test_max(eng): + original = arange(24).reshape((2, 3, 4)) + data = fromlist(list(original), engine=eng) + assert allclose(data.max().shape, (1, 3, 4)) + assert allclose(data.max().toarray(), original.max(axis=0)) + + +def test_nanmax(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmax().shape, (1, 3, 4)) + assert allclose(data.nanmax().toarray(), original.max(axis=0)) + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmax().shape, (1, 3, 4)) + assert allclose(data.nanmax().toarray(), nanmax(original, axis=0)) + + def test_sum(eng): original = arange(24).reshape((2, 3, 4)) data = fromlist(list(original), engine=eng) @@ -166,6 +222,20 @@ def test_sum(eng): assert allclose(data.sum().toarray(), original.sum(axis=0)) +def test_nansum(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nansum().shape, (1, 3, 4)) + assert allclose(data.nansum().toarray(), nansum(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nansum().shape, (1, 3, 4)) + assert allclose(data.nansum().toarray(), nansum(original, axis=0)) + + def test_var(eng): original = arange(24).reshape((2, 3, 4)) data = fromlist(list(original), engine=eng) @@ -173,6 +243,41 @@ def test_var(eng): assert allclose(data.var().toarray(), original.var(axis=0)) +def test_nanvar(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanvar().shape, (1, 3, 4)) + assert allclose(data.nanvar().toarray(), nanvar(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanvar().shape, (1, 3, 4)) + assert allclose(data.nanvar().toarray(), nanvar(original, axis=0)) + + +def test_std(eng): + original = arange(24).reshape((2, 3, 4)) + data = fromlist(list(original), engine=eng) + assert allclose(data.std().shape, (1, 3, 4)) + assert allclose(data.std().toarray(), original.std(axis=0)) + + +def test_nanstd(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanstd().shape, (1, 3, 4)) + assert allclose(data.nanstd().toarray(), nanstd(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanstd().shape, (1, 3, 4)) + assert allclose(data.nanstd().toarray(), nanstd(original, axis=0)) + + def test_subtract(eng): original = arange(24).reshape((4, 6)) data = fromlist([original], engine=eng) @@ -180,6 +285,7 @@ def test_subtract(eng): sub = arange(24).reshape((4, 6)) assert allclose(data.subtract(sub).toarray(), original - sub) + def test_map_as_series(eng): original = arange(4*4).reshape(4, 4) data = fromlist(5*[original], engine=eng) diff --git a/test/test_series.py b/test/test_series.py index c197bc34..a7da07b4 100644 --- a/test/test_series.py +++ b/test/test_series.py @@ -1,5 +1,6 @@ import pytest -from numpy import allclose, arange, array, asarray, dot, cov, corrcoef, float64 +from numpy import allclose, arange, array, asarray, dot, cov, corrcoef, float64, \ + nanmean, nan, nansum, nanvar, nanmin, nanmax, nanstd from thunder.series.readers import fromlist, fromarray from thunder.images.readers import fromlist as img_fromlist @@ -127,6 +128,23 @@ def test_mean(eng): assert str(val.dtype) == 'float64' +def test_nanmean(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanmean().toarray() + expected = nanmean(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanmean().toarray() + expected = nanmean(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_sum(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.sum().toarray() @@ -135,6 +153,23 @@ def test_sum(eng): assert str(val.dtype) == 'int64' +def test_nansum(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nansum().toarray() + expected = nansum(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nansum().toarray() + expected = nansum(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_var(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.var().toarray() @@ -143,6 +178,23 @@ def test_var(eng): assert str(val.dtype) == 'float64' +def test_nanvar(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanvar().toarray() + expected = nanvar(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanvar().toarray() + expected = nanvar(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_std(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.std().toarray() @@ -151,6 +203,23 @@ def test_std(eng): assert str(val.dtype) == 'float64' +def test_nanstd(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanstd().toarray() + expected = nanstd(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanstd().toarray() + expected = nanstd(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_max(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.max().toarray() @@ -158,12 +227,47 @@ def test_max(eng): assert allclose(val, expected) +def test_nanmax(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanmax().toarray() + expected = nanmax(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanmax().toarray() + expected = nanmax(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_min(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.min().toarray() expected = data.toarray().min(axis=0) assert allclose(val, expected) + +def test_nanmin(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanmin().toarray() + expected = nanmin(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanmin().toarray() + expected = nanmin(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_labels(eng): x = [array([0, 1]), array([2, 3]), array([4, 5]), array([6, 7])] data = fromlist(x, labels=[0, 1, 2, 3], engine=eng) diff --git a/thunder/base.py b/thunder/base.py index 8b50fb8d..71e01d34 100644 --- a/thunder/base.py +++ b/thunder/base.py @@ -323,6 +323,42 @@ def min(self): """ raise NotImplementedError + def nanmean(self): + """ + Mean of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nansum(self): + """ + Sum of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanvar(self): + """ + Variance of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanstd(self): + """ + Standard deviation computed of values along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanmax(self): + """ + Maximum of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanmin(self): + """ + Minimum of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + def map(self, func, **kwargs): """ Map a function over elements. diff --git a/thunder/images/images.py b/thunder/images/images.py index fe49849a..a62dd2f4 100644 --- a/thunder/images/images.py +++ b/thunder/images/images.py @@ -1,8 +1,6 @@ import logging from numpy import ndarray, arange, amax, amin, size, asarray, random, prod, \ - apply_along_axis -from itertools import product - + apply_along_axis, nanmean, nanstd, nanmin, nanmax, nansum, nanvar, expand_dims from ..base import Data @@ -244,6 +242,60 @@ def min(self): """ return self._constructor(self.values.min(axis=0, keepdims=True)) + def nanmean(self): + """ + Compute the mean across images ignoring the NaNs + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmean(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanmean(self.values, axis=0), axis=0)) + + def nanmax(self): + """ + Compute the max across images ignoring the NaNs + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmax(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanmax(self.values, axis=0), axis=0)) + + def nanmin(self): + """ + Compute the min across images ignoring the NaNs + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmin(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanmin(self.values, axis=0), axis=0)) + + def nanstd(self): + """ + Compute the standard deviation across images ignoring the NaNs + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmstd(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanstd(self.values, axis=0), axis=0)) + + def nansum(self): + """ + Compute the sum across images ignoring the NaNs + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmsum(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nansum(self.values, axis=0), axis=0)) + + def nanvar(self): + """ + Compute the sum across images ignoring the NaNs + """ + if self.mode == 'spark': + return self._constructor(self.values.nanvar(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanvar(self.values, axis=0), axis=0)) + def squeeze(self): """ Remove single-dimensional axes from images. diff --git a/thunder/series/series.py b/thunder/series/series.py index 346ba8ef..ffb7bf3a 100755 --- a/thunder/series/series.py +++ b/thunder/series/series.py @@ -2,7 +2,8 @@ asarray, zeros, corrcoef, where, unique, array_equal, delete, \ ravel, logical_not, unravel_index, prod, random, shape, \ dot, outer, expand_dims, ScalarType, ndarray, sqrt, pi, angle, fft, \ - roll, polyfit, polyval, ceil, float64, fix + roll, polyfit, polyval, ceil, float64, fix, \ + nanmean, nanstd, nanmin, nanmax, nansum, nanvar import logging from itertools import product from bolt.utils import tupleize @@ -239,6 +240,60 @@ def min(self): """ return self._constructor(self.values.min(axis=self.baseaxes, keepdims=True)) + def nanmean(self): + """ + Compute the mean across records + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmean(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanmean(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) + + def nanvar(self): + """ + Compute the variance across records + """ + if self.mode == 'spark': + return self._constructor(self.values.nanvar(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanvar(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) + + def nanstd(self): + """ + Compute the standard deviation across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nanstd(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanstd(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) + + def nansum(self): + """ + Compute the sum across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nansum(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nansum(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) + + def nanmax(self): + """ + Compute the max across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmax(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanmax(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) + + def nanmin(self): + """ + Compute the min across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmin(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanmin(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) + def between(self, left, right): """ Select subset of values within the given index range.