Skip to content

Commit 049aba3

Browse files
mdtankersantisoler
andauthored
Add percentile parameter to maxabs() function (#524)
Allow returning a percentile of the absolute value of the provided values, instead of just the maximum absolute value. For example, using the 95% percentile instead of the maximum will create a colorbar that is less washed out and reduces the effects of outliers in the data. If multiple arrays are provided, they need to be concatenated before calculating the quantiles, so it can be significantly slower than just the maximum absolute value determination. --------- Co-authored-by: Santiago Soler <santisoler@fastmail.com>
1 parent abe9297 commit 049aba3

File tree

3 files changed

+103
-7
lines changed

3 files changed

+103
-7
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ notice = '''
2222
#
2323
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
2424
#'''
25+
26+
[tool.pytest.ini_options]
27+
doctest_optionflags = "NUMBER"

verde/tests/test_utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
grid_to_table,
2424
kdtree,
2525
make_xarray_grid,
26+
maxabs,
2627
meshgrid_from_1d,
2728
meshgrid_to_1d,
2829
parse_engine,
@@ -334,3 +335,60 @@ def test_check_ndim_easting_northing():
334335
northing = np.linspace(-5, 5, 16).reshape(4, 4)
335336
with pytest.raises(ValueError):
336337
get_ndim_horizontal_coords(easting, northing)
338+
339+
340+
def test_maxabs_nans():
341+
"""
342+
Test maxabs handles nans correctly
343+
"""
344+
assert float(maxabs((0, 100, 1, 2, np.nan))) == 100
345+
assert float(maxabs((np.nan, -3.2, -1, -2, 3.1))) == 3.2
346+
assert np.isnan(maxabs((np.nan, -3, -1, 3), nan=False))
347+
348+
349+
def test_maxabs_percentile():
350+
"""
351+
Test maxabs with percentile option
352+
"""
353+
# test generic functionality
354+
data = np.arange(1, 101)
355+
assert float(maxabs(data, percentile=100)) == 100
356+
assert pytest.approx(float(maxabs(data, percentile=90)), 0.1) == 90
357+
assert pytest.approx(float(maxabs(data, percentile=50)), 0.1) == 50
358+
359+
# test with nans
360+
data_with_nans = np.append(data, np.nan)
361+
assert float(maxabs(data_with_nans, percentile=100)) == 100
362+
assert pytest.approx(float(maxabs(data_with_nans, percentile=90)), 0.1) == 90
363+
assert pytest.approx(float(maxabs(data_with_nans, percentile=50)), 0.1) == 50
364+
assert (
365+
pytest.approx(float(maxabs(data_with_nans, percentile=90, nan=True)), 0.1) == 90
366+
)
367+
assert np.isnan(float(maxabs(data_with_nans, percentile=90, nan=False)))
368+
369+
# test with varying array sizes
370+
assert (
371+
pytest.approx(
372+
float(maxabs([0, 1, 2, 3, 4], [[-2, 2], [0, 5]], percentile=80)), 0.1
373+
)
374+
== 3.4
375+
)
376+
377+
# test invalid percentile types
378+
msg = "Invalid 'percentile' of type"
379+
with pytest.raises(TypeError, match=msg):
380+
maxabs(data, percentile="90")
381+
msg = "Invalid 'percentile' of type"
382+
with pytest.raises(TypeError, match=msg):
383+
maxabs(data, percentile=[90])
384+
msg = "Invalid 'percentile' of type"
385+
with pytest.raises(TypeError, match=msg):
386+
maxabs(data, percentile=None)
387+
388+
# test invalid percentile values
389+
msg = "Invalid 'percentile' value of"
390+
with pytest.raises(ValueError, match=msg):
391+
maxabs(data, percentile=-10)
392+
msg = "Invalid 'percentile' value of"
393+
with pytest.raises(ValueError, match=msg):
394+
maxabs(data, percentile=110)

verde/utils.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def variance_to_weights(variance, tol=1e-15, dtype="float64"):
176176
return tuple(weights)
177177

178178

179-
def maxabs(*args, nan=True):
179+
def maxabs(*args, nan=True, percentile=100):
180180
"""
181181
Calculate the maximum absolute value of the given array(s).
182182
@@ -187,11 +187,19 @@ def maxabs(*args, nan=True):
187187
args
188188
One or more arrays. If more than one are given, a single maximum will
189189
be calculated across all arrays.
190+
nan : bool, optional
191+
If True, will use the ``nan`` version of numpy functions to ignore
192+
NaNs.
193+
percentile : float, optional
194+
Instead of return the maximum absolute value, return a given
195+
percentile of the absolute values. Must be between 0 and 100. A
196+
value of 100 (default) will give the maximum absolute value, while
197+
a value of 50 will give the median of the absolute values.
190198
191199
Returns
192200
-------
193201
maxabs : float
194-
The maximum absolute value across all arrays.
202+
The maximum (or percentile) absolute value across all arrays.
195203
196204
Examples
197205
--------
@@ -216,14 +224,41 @@ def maxabs(*args, nan=True):
216224
>>> float(result)
217225
nan
218226
227+
If a more robust statistic is desired, you can use ``percentile`` to get
228+
the value at a given percentile instead of the maximum.
229+
230+
>>> result = maxabs((1, -10, 25, 2, 3), percentile=95)
231+
>>> float(result)
232+
21.99
233+
>>> result = maxabs((1, -10, 25, 2, 3), percentile=100)
234+
>>> float(result)
235+
25.0
236+
219237
"""
220238
arrays = [np.atleast_1d(i) for i in args]
221-
if nan:
222-
npmin, npmax = np.nanmin, np.nanmax
239+
240+
if percentile == 100:
241+
if nan:
242+
npmin, npmax = np.nanmin, np.nanmax
243+
else:
244+
npmin, npmax = np.min, np.max
245+
absolute = [npmax(np.abs([npmin(i), npmax(i)])) for i in arrays]
246+
return npmax(absolute)
223247
else:
224-
npmin, npmax = np.min, np.max
225-
absolute = [npmax(np.abs([npmin(i), npmax(i)])) for i in arrays]
226-
return npmax(absolute)
248+
if not isinstance(percentile, (int, float)):
249+
raise TypeError(
250+
f"Invalid 'percentile' of type '{type(percentile).__name__}'. Percentile must be a float or an integer."
251+
)
252+
if percentile < 0 or percentile > 100:
253+
raise ValueError(
254+
f"Invalid 'percentile' value of '{percentile}'. It must be between 0 and 100."
255+
)
256+
if nan:
257+
nppercentile = np.nanpercentile
258+
else:
259+
nppercentile = np.percentile
260+
combined_array = np.concatenate([np.abs(a.ravel()) for a in arrays])
261+
return nppercentile(combined_array, percentile)
227262

228263

229264
def make_xarray_grid(

0 commit comments

Comments
 (0)