Skip to content

Commit b1172de

Browse files
authored
No fill_value=None; use fill value out-of-bounds (#115)
1 parent e6b030a commit b1172de

File tree

5 files changed

+33
-39
lines changed

5 files changed

+33
-39
lines changed

stackstac/nodata_reader.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
from typing import Optional, Tuple, Type, Union, cast
1+
from typing import Tuple, Type, Union, cast
22
import re
33

44
import numpy as np
55
from rasterio.windows import Window
66

77
from .reader_protocol import Reader
88

9-
State = Tuple[np.dtype, Optional[Union[int, float]]]
9+
State = Tuple[np.dtype, Union[int, float]]
1010

1111

1212
class NodataReader:
@@ -17,7 +17,7 @@ def __init__(
1717
self,
1818
*,
1919
dtype: np.dtype,
20-
fill_value: Optional[Union[int, float]] = None,
20+
fill_value: Union[int, float],
2121
**kwargs,
2222
) -> None:
2323
self.dtype = dtype
@@ -36,13 +36,7 @@ def __setstate__(self, state: State) -> None:
3636
self.dtype, self.fill_value = state
3737

3838

39-
def nodata_for_window(
40-
window: Window, fill_value: Optional[Union[int, float]], dtype: np.dtype
41-
):
42-
assert (
43-
fill_value is not None
44-
), "Trying to convert an exception to nodata, but `fill_value` is None"
45-
39+
def nodata_for_window(window: Window, fill_value: Union[int, float], dtype: np.dtype):
4640
height = cast(int, window.height)
4741
width = cast(int, window.width)
4842
# Argument of type "tuple[_T@attrib, _T@attrib]" cannot be assigned to parameter "shape" of type "_ShapeLike"

stackstac/reader_protocol.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,17 @@ class Reader(Pickleable, Protocol):
2626
Protocol for a thread-safe, lazily-loaded object for reading data from a single-band STAC asset.
2727
"""
2828

29+
fill_value: Union[int, float]
30+
dtype: np.dtype
31+
2932
def __init__(
3033
self,
3134
*,
3235
url: str,
3336
spec: RasterSpec,
3437
resampling: Resampling,
3538
dtype: np.dtype,
36-
fill_value: Optional[Union[int, float]],
39+
fill_value: Union[int, float],
3740
rescale: bool,
3841
gdal_env: Optional[LayeredEnv],
3942
errors_as_nodata: Tuple[Exception, ...] = (),
@@ -113,13 +116,15 @@ class FakeReader:
113116
or inherent to the dask graph.
114117
"""
115118

116-
def __init__(self, *, url: str, spec: RasterSpec, **kwargs) -> None:
119+
def __init__(
120+
self, *, dtype: np.dtype, fill_value: Union[int, float], **kwargs
121+
) -> None:
117122
pass
118-
# self.url = url
119-
# self.spec = spec
123+
self.dtype = dtype
124+
self.fill_value = fill_value
120125

121126
def read(self, window: Window, **kwargs) -> np.ndarray:
122-
return np.random.random((window.height, window.width))
127+
return np.random.random((window.height, window.width)).astype(self.dtype)
123128

124129
def close(self) -> None:
125130
pass

stackstac/rio_reader.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ class PickleState(TypedDict):
279279
spec: RasterSpec
280280
resampling: Resampling
281281
dtype: np.dtype
282-
fill_value: Optional[Union[int, float]]
282+
fill_value: Union[int, float]
283283
rescale: bool
284284
gdal_env: Optional[LayeredEnv]
285285
errors_as_nodata: Tuple[Exception, ...]
@@ -302,7 +302,7 @@ def __init__(
302302
spec: RasterSpec,
303303
resampling: Resampling,
304304
dtype: np.dtype,
305-
fill_value: Optional[Union[int, float]],
305+
fill_value: Union[int, float],
306306
rescale: bool,
307307
gdal_env: Optional[LayeredEnv] = None,
308308
errors_as_nodata: Tuple[Exception, ...] = (),
@@ -407,8 +407,6 @@ def read(self, window: Window, **kwargs) -> np.ndarray:
407407

408408
result = result.astype(self.dtype, copy=False)
409409
result = np.ma.filled(result, fill_value=self.fill_value)
410-
# ^ NOTE: if `self.fill_value` was None, rasterio set the masked array's fill value to the
411-
# nodata value of the band, which `np.ma.filled` will then use.
412410
return result
413411

414412
def close(self) -> None:

stackstac/stack.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def stack(
3030
resampling: Resampling = Resampling.nearest,
3131
chunksize: int = 1024,
3232
dtype: np.dtype = np.dtype("float64"),
33-
fill_value: Optional[Union[int, float]] = np.nan,
33+
fill_value: Union[int, float] = np.nan,
3434
rescale: bool = True,
3535
sortby_date: Literal["asc", "desc", False] = "asc",
3636
xy_coords: Literal["center", "topleft", False] = "topleft",
@@ -192,9 +192,7 @@ def stack(
192192
don't set it here---instead, call ``.chunk`` on the DataArray to re-chunk it.
193193
dtype:
194194
The NumPy data type of the output array. Default: ``float64``. Must be a data type
195-
that's compatible with ``fill_value``. Note that if ``fill_value`` is None, whatever nodata
196-
value is set in each asset's file will be used, so that value needs to be compatible
197-
with ``dtype`` as well.
195+
that's compatible with ``fill_value``.
198196
fill_value:
199197
Value to fill nodata/masked pixels with. Default: ``np.nan``.
200198
@@ -249,7 +247,7 @@ def stack(
249247
errors_as_nodata:
250248
Exception patterns to ignore when opening datasets or reading data.
251249
Exceptions matching the pattern will be logged as warnings, and just
252-
produce nodata (``fill_value``). A non-None ``fill_value`` is required when using this.
250+
produce nodata (``fill_value``).
253251
254252
The exception patterns should be instances of an Exception type to catch,
255253
where ``str(exception_pattern)`` is a regex pattern to match against
@@ -282,9 +280,9 @@ def stack(
282280
if sortby_date is not False:
283281
plain_items = sorted(
284282
plain_items,
285-
key=lambda item: item["properties"].get("datetime", ""),
283+
key=lambda item: item["properties"].get("datetime", "") or "",
286284
reverse=sortby_date == "desc",
287-
) # type: ignore
285+
)
288286

289287
asset_table, spec, asset_ids, plain_items = prepare_items(
290288
plain_items,

stackstac/to_dask.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,12 @@ def items_to_dask(
2424
chunksize: int,
2525
resampling: Resampling = Resampling.nearest,
2626
dtype: np.dtype = np.dtype("float64"),
27-
fill_value: Optional[Union[int, float]] = np.nan,
27+
fill_value: Union[int, float] = np.nan,
2828
rescale: bool = True,
2929
reader: Type[Reader] = AutoParallelRioReader,
3030
gdal_env: Optional[LayeredEnv] = None,
3131
errors_as_nodata: Tuple[Exception, ...] = (),
3232
) -> da.Array:
33-
if fill_value is None and errors_as_nodata:
34-
raise ValueError(
35-
"A non-None `fill_value` is required when using `errors_as_nodata`. "
36-
"If an exception occurs, we need to know what to use as the nodata value, "
37-
"since there may not be an open dataset to infer it from."
38-
)
3933
errors_as_nodata = errors_as_nodata or () # be sure it's not None
4034

4135
if fill_value is not None and not np.can_cast(fill_value, dtype):
@@ -114,17 +108,18 @@ def asset_entry_to_reader_and_window(
114108
spec: RasterSpec,
115109
resampling: Resampling,
116110
dtype: np.dtype,
117-
fill_value: Optional[Union[int, float]],
111+
fill_value: Union[int, float],
118112
rescale: bool,
119113
gdal_env: Optional[LayeredEnv],
120114
errors_as_nodata: Tuple[Exception, ...],
121115
reader: Type[ReaderT],
122-
) -> Optional[Tuple[ReaderT, windows.Window]]:
116+
) -> Tuple[ReaderT, windows.Window] | np.ndarray:
123117
asset_entry = asset_entry[0, 0]
124118
# ^ because dask adds extra outer dims in `from_array`
125119
url = asset_entry["url"]
126120
if url is None:
127-
return None
121+
# Signifies empty value
122+
return np.array(fill_value, dtype)
128123

129124
asset_bounds: Bbox = asset_entry["bounds"]
130125
asset_window = windows.from_bounds(*asset_bounds, transform=spec.transform)
@@ -159,11 +154,11 @@ def asset_entry_to_reader_and_window(
159154

160155

161156
def fetch_raster_window(
162-
asset_entry: Optional[Tuple[Reader, windows.Window]],
157+
asset_entry: Tuple[ReaderT, windows.Window] | np.ndarray,
163158
slices: Tuple[slice, ...],
164159
) -> np.ndarray:
165160
current_window = windows.Window.from_slices(*slices)
166-
if asset_entry is not None:
161+
if isinstance(asset_entry, tuple):
167162
reader, asset_window = asset_entry
168163

169164
# check that the window we're fetching overlaps with the asset
@@ -172,7 +167,11 @@ def fetch_raster_window(
172167
data = reader.read(current_window)
173168

174169
return data[None, None]
170+
fill_arr = np.array(reader.fill_value, reader.dtype)
171+
else:
172+
fill_arr: np.ndarray = asset_entry
175173

176174
# no dataset, or we didn't overlap it: return empty data.
177175
# use the broadcast trick for even fewer memz
178-
return np.broadcast_to(np.nan, (1, 1) + windows.shape(current_window))
176+
return np.broadcast_to(fill_arr, (1, 1) + windows.shape(current_window))
177+

0 commit comments

Comments
 (0)