From b36db20699b266ee5dff68396bb1571e6904a4ee Mon Sep 17 00:00:00 2001 From: Steve Varner Date: Wed, 4 May 2016 16:46:23 -0400 Subject: [PATCH 1/8] Adding code to correctly handle NaN values in images. --- thunder/images/images.py | 147 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 1 deletion(-) diff --git a/thunder/images/images.py b/thunder/images/images.py index 9c1872ef..215ca60a 100644 --- a/thunder/images/images.py +++ b/thunder/images/images.py @@ -1,11 +1,57 @@ import logging from numpy import ndarray, arange, amax, amin, size, asarray, random, prod, \ - apply_along_axis + apply_along_axis, nanmean, nanstd, nanmin, nanmax, nansum, nanmedian, inf, subtract + from itertools import product from ..base import Data from ..blocks.local import LocalBlocks +class Dimensions(object): + """ Class for estimating and storing dimensions of data based on the keys """ + + def __init__(self, values=[], n=3): + self.min = tuple(map(lambda i: inf, range(0, n))) + self.max = tuple(map(lambda i: -inf, range(0, n))) + + for v in values: + self.merge(v) + + def merge(self, value): + self.min = tuple(map(min, self.min, value)) + self.max = tuple(map(max, self.max, value)) + return self + + def mergeDims(self, other): + self.min = tuple(map(min, self.min, other.min)) + self.max = tuple(map(max, self.max, other.max)) + return self + + @property + def count(self): + return tuple(map(lambda x: x + 1, map(subtract, self.max, self.min))) + + @classmethod + def fromTuple(cls, tup): + """ Generates a Dimensions object from the passed tuple. """ + mx = [v-1 for v in tup] + mn = [0] * len(tup) + return cls(values=[mx, mn], n=len(tup)) + + def __str__(self): + return str(self.count) + + def __repr__(self): + return str(self.count) + + def __len__(self): + return len(self.min) + + def __iter__(self): + return iter(self.count) + + def __getitem__(self, item): + return self.count[item] class Images(Data): """ @@ -173,6 +219,56 @@ def sample(self, nsamples=100, seed=None): return self._constructor(result) + + + def crop(self, minbound, maxbound): + """ + Crop a spatial region from 2D or 3D data. + + Parameters + ---------- + minbound : list or tuple + Minimum of crop region (x,y) or (x,y,z) + + maxbound : list or tuple + Maximum of crop region (x,y) or (x,y,z) + + Returns + ------- + Images object with cropped images / volume + """ + dims = self.dims + dims = Dimensions.fromTuple(dims) + ndims = len(dims) + dimsCount = dims.count + + if ndims < 2 or ndims > 3: + raise Exception("Cropping only supported on 2D or 3D image data.") + + dimMinMaxTuples = zip(dimsCount, minbound, maxbound) + if len(dimMinMaxTuples) != ndims: + raise ValueError("Number of specified bounds (%d) must equal image dimensionality (%d)" % + (len(dimMinMaxTuples), ndims)) + slices = [] + newdims = [] + for dim, minb, maxb in dimMinMaxTuples: + if maxb > dim: + raise ValueError("Maximum bound (%d) may not exceed image size (%d)" % (maxb, dim)) + if minb < 0: + raise ValueError("Minumum bound (%d) must be positive" % minb) + if minb < maxb: + slise = slice(minb, maxb) + newdims.append(maxb - minb) + elif minb == maxb: + slise = minb # just an integer index, not a slice; this squeezes out singleton dimensions + # don't append to newdims, this dimension will be squeezed out + else: + raise ValueError("Minimum bound (%d) must be <= max bound (%d)" % (minb, maxb)) + slices.append(slise) + + return self.map(lambda v: v[slices], dims=newdims) + + def map(self, func, dims=None, with_keys=False): """ Map an array -> array function over each image. @@ -237,6 +333,55 @@ def min(self): """ return self._constructor(self.values.min(axis=0, keepdims=True)) + + def nanmean(self): + """ + Compute the mean across images ignoring the NaNs + """ + return self._constructor(self.values.nanmean(axis=0, keepdims=True)) + + def nancount(self): + """ + Compute the mean across images ignoring the NaNs + """ + return self._constructor(self.values.nancount(axis=0, keepdims=True)) + + def nanmax(self): + """ + Compute the max across images ignoring the NaNs + """ + return self._constructor(self.values.nanmax(axis=0, keepdims=True)) + + def nanmin(self): + """ + Compute the min across images ignoring the NaNs + """ + return self._constructor(self.values.nanmin(axis=0, keepdims=True)) + + def nanstd(self): + """ + Compute the standard deviation across images ignoring the NaNs + """ + return self._constructor(self.values.nanstd(axis=0, keepdims=True)) + + def nansum(self): + """ + Compute the sum across images ignoring the NaNs + """ + return self._constructor(self.values.nansum(axis=0, keepdims=True)) + + def nanvariance(self): + """ + Compute the sum across images ignoring the NaNs + """ + return self._constructor(self.values.nanvar(axis=0, keepdims=True)) + + def nanmedian(self): + """ + Compute the median across images ignoring the NaNs + """ + return self._constructor(nanmedian(self.values, axis=0)) + def squeeze(self): """ Remove single-dimensional axes from images. From ae713a695a14b0ef913e46535a9814c8f8e8bac4 Mon Sep 17 00:00:00 2001 From: Steve Varner Date: Wed, 4 May 2016 16:58:11 -0400 Subject: [PATCH 2/8] Removing crop function that was mistakenly added back in. --- thunder/images/images.py | 48 ---------------------------------------- 1 file changed, 48 deletions(-) diff --git a/thunder/images/images.py b/thunder/images/images.py index 215ca60a..17637944 100644 --- a/thunder/images/images.py +++ b/thunder/images/images.py @@ -221,54 +221,6 @@ def sample(self, nsamples=100, seed=None): - def crop(self, minbound, maxbound): - """ - Crop a spatial region from 2D or 3D data. - - Parameters - ---------- - minbound : list or tuple - Minimum of crop region (x,y) or (x,y,z) - - maxbound : list or tuple - Maximum of crop region (x,y) or (x,y,z) - - Returns - ------- - Images object with cropped images / volume - """ - dims = self.dims - dims = Dimensions.fromTuple(dims) - ndims = len(dims) - dimsCount = dims.count - - if ndims < 2 or ndims > 3: - raise Exception("Cropping only supported on 2D or 3D image data.") - - dimMinMaxTuples = zip(dimsCount, minbound, maxbound) - if len(dimMinMaxTuples) != ndims: - raise ValueError("Number of specified bounds (%d) must equal image dimensionality (%d)" % - (len(dimMinMaxTuples), ndims)) - slices = [] - newdims = [] - for dim, minb, maxb in dimMinMaxTuples: - if maxb > dim: - raise ValueError("Maximum bound (%d) may not exceed image size (%d)" % (maxb, dim)) - if minb < 0: - raise ValueError("Minumum bound (%d) must be positive" % minb) - if minb < maxb: - slise = slice(minb, maxb) - newdims.append(maxb - minb) - elif minb == maxb: - slise = minb # just an integer index, not a slice; this squeezes out singleton dimensions - # don't append to newdims, this dimension will be squeezed out - else: - raise ValueError("Minimum bound (%d) must be <= max bound (%d)" % (minb, maxb)) - slices.append(slise) - - return self.map(lambda v: v[slices], dims=newdims) - - def map(self, func, dims=None, with_keys=False): """ Map an array -> array function over each image. From 5400570887b29c7ec4af502fc9b83491b99ac5a9 Mon Sep 17 00:00:00 2001 From: "Mohar, Boaz" Date: Tue, 17 May 2016 23:57:38 -0400 Subject: [PATCH 3/8] fixed nan handling in local mode for images added tests --- test/test_images.py | 110 ++++++++++++++++++++++++++++++++++++++- thunder/images/images.py | 95 +++++++++------------------------ 2 files changed, 134 insertions(+), 71 deletions(-) diff --git a/test/test_images.py b/test/test_images.py index 6660b4dc..8c445871 100644 --- a/test/test_images.py +++ b/test/test_images.py @@ -1,7 +1,8 @@ import pytest -from numpy import arange, allclose, array, mean, apply_along_axis, float64 +from numpy import arange, allclose, array, mean, apply_along_axis, float64, nanmean, nan, nansum, nanvar,\ + nanmin, nanmax, nanstd -from thunder.images.readers import fromlist, fromarray +from thunder.images.readers import fromlist from thunder.images.images import Images from thunder.series.series import Series @@ -33,6 +34,7 @@ def test_sample(eng): assert allclose(data.sample(1).shape, (1, 2, 2)) assert allclose(data.filter(lambda x: x.max() > 5).sample(1).toarray(), [[1, 10], [1, 10]]) + def test_labels(eng): x = arange(10).reshape(10, 1, 1) data = fromlist(x, labels=range(10), engine=eng) @@ -159,6 +161,60 @@ def test_mean(eng): assert allclose(data.mean().toarray(), original.mean(axis=0)) +def test_nanmean(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmean().shape, (1, 3, 4)) + assert allclose(data.nanmean().toarray(), nanmean(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmean().shape, (1, 3, 4)) + assert allclose(data.nanmean().toarray(), nanmean(original, axis=0)) + + +def test_min(eng): + original = arange(24).reshape((2, 3, 4)) + data = fromlist(list(original), engine=eng) + assert allclose(data.min().shape, (1, 3, 4)) + assert allclose(data.min().toarray(), original.min(axis=0)) + + +def test_nanmin(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmin().shape, (1, 3, 4)) + assert allclose(data.nanmin().toarray(), original.min(axis=0)) + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmin().shape, (1, 3, 4)) + assert allclose(data.nanmin().toarray(), nanmin(original, axis=0)) + + +def test_max(eng): + original = arange(24).reshape((2, 3, 4)) + data = fromlist(list(original), engine=eng) + assert allclose(data.max().shape, (1, 3, 4)) + assert allclose(data.max().toarray(), original.max(axis=0)) + + +def test_nanmax(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmax().shape, (1, 3, 4)) + assert allclose(data.nanmax().toarray(), original.max(axis=0)) + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanmax().shape, (1, 3, 4)) + assert allclose(data.nanmax().toarray(), nanmax(original, axis=0)) + + def test_sum(eng): original = arange(24).reshape((2, 3, 4)) data = fromlist(list(original), engine=eng) @@ -166,6 +222,20 @@ def test_sum(eng): assert allclose(data.sum().toarray(), original.sum(axis=0)) +def test_nansum(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nansum().shape, (1, 3, 4)) + assert allclose(data.nansum().toarray(), nansum(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nansum().shape, (1, 3, 4)) + assert allclose(data.nansum().toarray(), nansum(original, axis=0)) + + def test_var(eng): original = arange(24).reshape((2, 3, 4)) data = fromlist(list(original), engine=eng) @@ -173,6 +243,41 @@ def test_var(eng): assert allclose(data.var().toarray(), original.var(axis=0)) +def test_nanvar(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanvar().shape, (1, 3, 4)) + assert allclose(data.nanvar().toarray(), nanvar(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanvar().shape, (1, 3, 4)) + assert allclose(data.nanvar().toarray(), nanvar(original, axis=0)) + + +def test_std(eng): + original = arange(24).reshape((2, 3, 4)) + data = fromlist(list(original), engine=eng) + assert allclose(data.std().shape, (1, 3, 4)) + assert allclose(data.std().toarray(), original.std(axis=0)) + + +def test_nanstd(eng): + original = arange(24).reshape((2, 3, 4)).astype(float64) + data = fromlist(list(original), engine=eng) + assert allclose(data.nanstd().shape, (1, 3, 4)) + assert allclose(data.nanstd().toarray(), nanstd(original, axis=0)) + + original[0, 2, 3] = nan + original[1, 0, 2] = nan + original[1, 2, 2] = nan + data = fromlist(list(original), engine=eng) + assert allclose(data.nanstd().shape, (1, 3, 4)) + assert allclose(data.nanstd().toarray(), nanstd(original, axis=0)) + + def test_subtract(eng): original = arange(24).reshape((4, 6)) data = fromlist([original], engine=eng) @@ -180,6 +285,7 @@ def test_subtract(eng): sub = arange(24).reshape((4, 6)) assert allclose(data.subtract(sub).toarray(), original - sub) + def test_map_as_series(eng): original = arange(4*4).reshape(4, 4) data = fromlist(5*[original], engine=eng) diff --git a/thunder/images/images.py b/thunder/images/images.py index f07280ef..a62dd2f4 100644 --- a/thunder/images/images.py +++ b/thunder/images/images.py @@ -1,56 +1,8 @@ import logging from numpy import ndarray, arange, amax, amin, size, asarray, random, prod, \ - apply_along_axis, nanmean, nanstd, nanmin, nanmax, nansum, nanmedian, inf, subtract - -from itertools import product - + apply_along_axis, nanmean, nanstd, nanmin, nanmax, nansum, nanvar, expand_dims from ..base import Data -class Dimensions(object): - """ Class for estimating and storing dimensions of data based on the keys """ - - def __init__(self, values=[], n=3): - self.min = tuple(map(lambda i: inf, range(0, n))) - self.max = tuple(map(lambda i: -inf, range(0, n))) - - for v in values: - self.merge(v) - - def merge(self, value): - self.min = tuple(map(min, self.min, value)) - self.max = tuple(map(max, self.max, value)) - return self - - def mergeDims(self, other): - self.min = tuple(map(min, self.min, other.min)) - self.max = tuple(map(max, self.max, other.max)) - return self - - @property - def count(self): - return tuple(map(lambda x: x + 1, map(subtract, self.max, self.min))) - - @classmethod - def fromTuple(cls, tup): - """ Generates a Dimensions object from the passed tuple. """ - mx = [v-1 for v in tup] - mn = [0] * len(tup) - return cls(values=[mx, mn], n=len(tup)) - - def __str__(self): - return str(self.count) - - def __repr__(self): - return str(self.count) - - def __len__(self): - return len(self.min) - - def __iter__(self): - return iter(self.count) - - def __getitem__(self, item): - return self.count[item] class Images(Data): """ @@ -290,54 +242,59 @@ def min(self): """ return self._constructor(self.values.min(axis=0, keepdims=True)) - def nanmean(self): """ Compute the mean across images ignoring the NaNs """ - return self._constructor(self.values.nanmean(axis=0, keepdims=True)) - - def nancount(self): - """ - Compute the mean across images ignoring the NaNs - """ - return self._constructor(self.values.nancount(axis=0, keepdims=True)) + if self.mode == 'spark': + return self._constructor(self.values.nanmean(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanmean(self.values, axis=0), axis=0)) def nanmax(self): """ Compute the max across images ignoring the NaNs """ - return self._constructor(self.values.nanmax(axis=0, keepdims=True)) + if self.mode == 'spark': + return self._constructor(self.values.nanmax(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanmax(self.values, axis=0), axis=0)) def nanmin(self): """ Compute the min across images ignoring the NaNs """ - return self._constructor(self.values.nanmin(axis=0, keepdims=True)) + if self.mode == 'spark': + return self._constructor(self.values.nanmin(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanmin(self.values, axis=0), axis=0)) def nanstd(self): """ Compute the standard deviation across images ignoring the NaNs """ - return self._constructor(self.values.nanstd(axis=0, keepdims=True)) + if self.mode == 'spark': + return self._constructor(self.values.nanmstd(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanstd(self.values, axis=0), axis=0)) def nansum(self): """ Compute the sum across images ignoring the NaNs """ - return self._constructor(self.values.nansum(axis=0, keepdims=True)) + if self.mode == 'spark': + return self._constructor(self.values.nanmsum(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nansum(self.values, axis=0), axis=0)) - def nanvariance(self): + def nanvar(self): """ Compute the sum across images ignoring the NaNs """ - return self._constructor(self.values.nanvar(axis=0, keepdims=True)) - - def nanmedian(self): - """ - Compute the median across images ignoring the NaNs - """ - return self._constructor(nanmedian(self.values, axis=0)) + if self.mode == 'spark': + return self._constructor(self.values.nanvar(axis=0, keepdims=True)) + else: + return self._constructor(expand_dims(nanvar(self.values, axis=0), axis=0)) def squeeze(self): """ From 20e1b57909aa35be09c5cf54847b330034223649 Mon Sep 17 00:00:00 2001 From: "Mohar, Boaz" Date: Wed, 18 May 2016 10:17:45 -0400 Subject: [PATCH 4/8] Added series nan handling and tests --- test/test_images.py | 4 +- test/test_series.py | 106 ++++++++++++++++++++++++++++++++++++++- thunder/series/series.py | 57 ++++++++++++++++++++- 3 files changed, 163 insertions(+), 4 deletions(-) diff --git a/test/test_images.py b/test/test_images.py index 8c445871..36dc26a6 100644 --- a/test/test_images.py +++ b/test/test_images.py @@ -1,6 +1,6 @@ import pytest -from numpy import arange, allclose, array, mean, apply_along_axis, float64, nanmean, nan, nansum, nanvar,\ - nanmin, nanmax, nanstd +from numpy import arange, allclose, array, mean, apply_along_axis, float64, \ + nanmean, nan, nansum, nanvar, nanmin, nanmax, nanstd from thunder.images.readers import fromlist from thunder.images.images import Images diff --git a/test/test_series.py b/test/test_series.py index c197bc34..a7da07b4 100644 --- a/test/test_series.py +++ b/test/test_series.py @@ -1,5 +1,6 @@ import pytest -from numpy import allclose, arange, array, asarray, dot, cov, corrcoef, float64 +from numpy import allclose, arange, array, asarray, dot, cov, corrcoef, float64, \ + nanmean, nan, nansum, nanvar, nanmin, nanmax, nanstd from thunder.series.readers import fromlist, fromarray from thunder.images.readers import fromlist as img_fromlist @@ -127,6 +128,23 @@ def test_mean(eng): assert str(val.dtype) == 'float64' +def test_nanmean(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanmean().toarray() + expected = nanmean(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanmean().toarray() + expected = nanmean(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_sum(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.sum().toarray() @@ -135,6 +153,23 @@ def test_sum(eng): assert str(val.dtype) == 'int64' +def test_nansum(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nansum().toarray() + expected = nansum(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nansum().toarray() + expected = nansum(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_var(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.var().toarray() @@ -143,6 +178,23 @@ def test_var(eng): assert str(val.dtype) == 'float64' +def test_nanvar(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanvar().toarray() + expected = nanvar(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanvar().toarray() + expected = nanvar(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_std(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.std().toarray() @@ -151,6 +203,23 @@ def test_std(eng): assert str(val.dtype) == 'float64' +def test_nanstd(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanstd().toarray() + expected = nanstd(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanstd().toarray() + expected = nanstd(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_max(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.max().toarray() @@ -158,12 +227,47 @@ def test_max(eng): assert allclose(val, expected) +def test_nanmax(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanmax().toarray() + expected = nanmax(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanmax().toarray() + expected = nanmax(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_min(eng): data = fromlist([arange(8), arange(8)], engine=eng) val = data.min().toarray() expected = data.toarray().min(axis=0) assert allclose(val, expected) + +def test_nanmin(eng): + arr = array([arange(8), arange(8)]).astype(float64) + data = fromarray(arr, engine=eng) + val = data.nanmin().toarray() + expected = nanmin(data.toarray(), axis=0) + assert allclose(val, expected) + assert str(val.dtype) == 'float64' + arr[0, 4] = nan + arr[1, 3] = nan + arr[1, 4] = nan + data = fromarray(arr, engine=eng) + val = data.nanmin().toarray() + expected = nanmin(data.toarray(), axis=0) + assert allclose(val, expected, equal_nan=True) + assert str(val.dtype) == 'float64' + + def test_labels(eng): x = [array([0, 1]), array([2, 3]), array([4, 5]), array([6, 7])] data = fromlist(x, labels=[0, 1, 2, 3], engine=eng) diff --git a/thunder/series/series.py b/thunder/series/series.py index 346ba8ef..8bded51c 100755 --- a/thunder/series/series.py +++ b/thunder/series/series.py @@ -2,7 +2,8 @@ asarray, zeros, corrcoef, where, unique, array_equal, delete, \ ravel, logical_not, unravel_index, prod, random, shape, \ dot, outer, expand_dims, ScalarType, ndarray, sqrt, pi, angle, fft, \ - roll, polyfit, polyval, ceil, float64, fix + roll, polyfit, polyval, ceil, float64, fix, \ + nanmean, nanstd, nanmin, nanmax, nansum, nanvar import logging from itertools import product from bolt.utils import tupleize @@ -239,6 +240,60 @@ def min(self): """ return self._constructor(self.values.min(axis=self.baseaxes, keepdims=True)) + def nanmean(self): + """ + Compute the mean across records + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmean(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanmean(self.values, axis=self.baseaxes), axis=self.baseaxes)) + + def nanvar(self): + """ + Compute the variance across records + """ + if self.mode == 'spark': + return self._constructor(self.values.nanvar(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanvar(self.values, axis=self.baseaxes), axis=self.baseaxes)) + + def nanstd(self): + """ + Compute the standard deviation across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nanstd(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanstd(self.values, axis=self.baseaxes), axis=self.baseaxes)) + + def nansum(self): + """ + Compute the sum across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nansum(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nansum(self.values, axis=self.baseaxes), axis=self.baseaxes)) + + def nanmax(self): + """ + Compute the max across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmax(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanmax(self.values, axis=self.baseaxes), axis=self.baseaxes)) + + def nanmin(self): + """ + Compute the min across records. + """ + if self.mode == 'spark': + return self._constructor(self.values.nanmin(axis=self.baseaxes, keepdims=True)) + else: + return self._constructor(expand_dims(nanmin(self.values, axis=self.baseaxes), axis=self.baseaxes)) + def between(self, left, right): """ Select subset of values within the given index range. From b501e894dbfdab628aac79a72245514cdf30ba9f Mon Sep 17 00:00:00 2001 From: "Mohar, Boaz" Date: Wed, 18 May 2016 10:39:43 -0400 Subject: [PATCH 5/8] bug fix for local nan functions --- thunder/series/series.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/thunder/series/series.py b/thunder/series/series.py index 8bded51c..ffb7bf3a 100755 --- a/thunder/series/series.py +++ b/thunder/series/series.py @@ -247,7 +247,7 @@ def nanmean(self): if self.mode == 'spark': return self._constructor(self.values.nanmean(axis=self.baseaxes, keepdims=True)) else: - return self._constructor(expand_dims(nanmean(self.values, axis=self.baseaxes), axis=self.baseaxes)) + return self._constructor(expand_dims(nanmean(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) def nanvar(self): """ @@ -256,7 +256,7 @@ def nanvar(self): if self.mode == 'spark': return self._constructor(self.values.nanvar(axis=self.baseaxes, keepdims=True)) else: - return self._constructor(expand_dims(nanvar(self.values, axis=self.baseaxes), axis=self.baseaxes)) + return self._constructor(expand_dims(nanvar(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) def nanstd(self): """ @@ -265,7 +265,7 @@ def nanstd(self): if self.mode == 'spark': return self._constructor(self.values.nanstd(axis=self.baseaxes, keepdims=True)) else: - return self._constructor(expand_dims(nanstd(self.values, axis=self.baseaxes), axis=self.baseaxes)) + return self._constructor(expand_dims(nanstd(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) def nansum(self): """ @@ -274,7 +274,7 @@ def nansum(self): if self.mode == 'spark': return self._constructor(self.values.nansum(axis=self.baseaxes, keepdims=True)) else: - return self._constructor(expand_dims(nansum(self.values, axis=self.baseaxes), axis=self.baseaxes)) + return self._constructor(expand_dims(nansum(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) def nanmax(self): """ @@ -283,7 +283,7 @@ def nanmax(self): if self.mode == 'spark': return self._constructor(self.values.nanmax(axis=self.baseaxes, keepdims=True)) else: - return self._constructor(expand_dims(nanmax(self.values, axis=self.baseaxes), axis=self.baseaxes)) + return self._constructor(expand_dims(nanmax(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) def nanmin(self): """ @@ -292,7 +292,7 @@ def nanmin(self): if self.mode == 'spark': return self._constructor(self.values.nanmin(axis=self.baseaxes, keepdims=True)) else: - return self._constructor(expand_dims(nanmin(self.values, axis=self.baseaxes), axis=self.baseaxes)) + return self._constructor(expand_dims(nanmin(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) def between(self, left, right): """ From d97481d3291dfe0c93dd7c0106a159ad2fb596ab Mon Sep 17 00:00:00 2001 From: "Mohar, Boaz" Date: Wed, 18 May 2016 11:02:53 -0400 Subject: [PATCH 6/8] added new methods to base --- thunder/base.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/thunder/base.py b/thunder/base.py index 8b50fb8d..71e01d34 100644 --- a/thunder/base.py +++ b/thunder/base.py @@ -323,6 +323,42 @@ def min(self): """ raise NotImplementedError + def nanmean(self): + """ + Mean of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nansum(self): + """ + Sum of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanvar(self): + """ + Variance of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanstd(self): + """ + Standard deviation computed of values along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanmax(self): + """ + Maximum of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + + def nanmin(self): + """ + Minimum of values computed along the appropriate dimension ignoring NaNs. + """ + raise NotImplementedError + def map(self, func, **kwargs): """ Map a function over elements. From 26510cfe8049a56da40ce7e11c071585757c9de8 Mon Sep 17 00:00:00 2001 From: Mark Heppner Date: Wed, 21 Dec 2016 14:10:19 -0500 Subject: [PATCH 7/8] Fix tests and merge errors --- test/test_images.py | 2 +- thunder/series/series.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_images.py b/test/test_images.py index 7b994dda..18e52e84 100644 --- a/test/test_images.py +++ b/test/test_images.py @@ -308,7 +308,7 @@ def f(x): assert allclose(data.map_as_series(f, chunk_size=size, value_size=4).toarray(), result) def test_reshape_values(eng): - original = fromarray(arange(72).reshape(2, 6, 6), engine=eng) + original = fromlist(arange(72).reshape(2, 6, 6), engine=eng) arr = original.toarray() assert allclose(arr.reshape(2, 12, 3), original.reshape(2, 12, 3).toarray()) diff --git a/thunder/series/series.py b/thunder/series/series.py index 30afded9..5bcdfcaa 100755 --- a/thunder/series/series.py +++ b/thunder/series/series.py @@ -2,7 +2,7 @@ asarray, zeros, corrcoef, where, unique, array_equal, delete, \ ravel, logical_not, unravel_index, prod, random, shape, \ dot, outer, expand_dims, ScalarType, ndarray, sqrt, pi, angle, fft, \ - roll, polyfit, polyval, ceil, float64, fix, floor \ + roll, polyfit, polyval, ceil, float64, fix, floor, \ nanmean, nanstd, nanmin, nanmax, nansum, nanvar import logging from itertools import product @@ -302,7 +302,7 @@ def nanmin(self): return self._constructor(self.values.nanmin(axis=self.baseaxes, keepdims=True)) else: return self._constructor(expand_dims(nanmin(self.values, axis=self.baseaxes), axis=self.baseaxes[0])) - + def reshape(self, *shape): """ Reshape the Series object @@ -326,7 +326,6 @@ def reshape(self, *shape): newlabels = None return self._constructor(self.values.reshape(shape), labels=newlabels).__finalize__(self, noprop=('labels',)) ->>>>>>> 967ff8f3e7c2fabe1705743d95eb2746d4329786 def between(self, left, right): """ From 5066e108ad76a5ce1e330185448f998d3c236ec2 Mon Sep 17 00:00:00 2001 From: Mark Heppner Date: Wed, 21 Dec 2016 15:16:52 -0500 Subject: [PATCH 8/8] Fix nan calls in Spark mode --- thunder/images/images.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thunder/images/images.py b/thunder/images/images.py index 69b14fc8..a037c0f8 100644 --- a/thunder/images/images.py +++ b/thunder/images/images.py @@ -258,7 +258,7 @@ def nanstd(self): Compute the standard deviation across images ignoring the NaNs """ if self.mode == 'spark': - return self._constructor(self.values.nanmstd(axis=0, keepdims=True)) + return self._constructor(self.values.nanstd(axis=0, keepdims=True)) else: return self._constructor(expand_dims(nanstd(self.values, axis=0), axis=0)) @@ -267,7 +267,7 @@ def nansum(self): Compute the sum across images ignoring the NaNs """ if self.mode == 'spark': - return self._constructor(self.values.nanmsum(axis=0, keepdims=True)) + return self._constructor(self.values.nansum(axis=0, keepdims=True)) else: return self._constructor(expand_dims(nansum(self.values, axis=0), axis=0))