Skip to content

Commit 342e43f

Browse files
authored
✨ PySTACAPIItemLister to list STAC Items matching STAC API search (#111)
* ✨ PySTACAPIItemLister to list STAC Items matching STAC API search An iterable-style DataPipe to list STAC Items matching a STAC API search query! Calls pystac_client.ItemSearch.items() to yield pystac.Item instances. Included a doctest and a unit test that produces a list of STAC Items from a STAC API search that can be iterated over. Added a new section in the API docs too. * 🚑 Fix typo on docs/api.md Should be referencing `zen3geo.datapipes.pystac_client.PySTACAPIItemListerIterDataPipe` * 📝 Use non-deprecated .items() in object-detection-boxes tutorial PySTAC Client has renamed `ItemSearch.get_items()` to `ItemSearch.items()` in stac-utils/pystac-client#206, see also https://github.com/stac-utils/pystac-client/blob/v0.7.1/CHANGELOG.md#deprecated-1. * 📝 Intersphinx link to pystac.STACObject in PySTACItemReader docs Properly linking to https://pystac.readthedocs.io/en/1.0/api/pystac.html#pystac.STACObject in the docstring of PySTACItemReaderIterDataPipe.
1 parent 0caed05 commit 342e43f

File tree

6 files changed

+196
-6
lines changed

6 files changed

+196
-6
lines changed

docs/api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
.. automodule:: zen3geo.datapipes.pystac_client
5252
.. autoclass:: zen3geo.datapipes.PySTACAPISearcher
5353
.. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPISearcherIterDataPipe
54+
.. autoclass:: zen3geo.datapipes.PySTACAPIItemLister
55+
.. autoclass:: zen3geo.datapipes.pystac_client.PySTACAPIItemListerIterDataPipe
5456
:show-inheritance:
5557
```
5658

docs/object-detection-boxes.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,12 @@ catalog = pystac_client.Client.open(
137137
url="https://planetarycomputer.microsoft.com/api/stac/v1",
138138
modifier=planetary_computer.sign_inplace,
139139
)
140-
items = catalog.search(
140+
search = catalog.search(
141141
collections=["ms-buildings"],
142142
query={"msbuildings:region": {"eq": "Brunei"}},
143143
intersects=shapely.geometry.box(minx=114.94, miny=4.88, maxx=114.95, maxy=4.89),
144144
)
145-
item = next(items.get_items())
145+
item = next(search.items())
146146
item
147147
```
148148

zen3geo/datapipes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from zen3geo.datapipes.pyogrio import PyogrioReaderIterDataPipe as PyogrioReader
1313
from zen3geo.datapipes.pystac import PySTACItemReaderIterDataPipe as PySTACItemReader
1414
from zen3geo.datapipes.pystac_client import (
15+
PySTACAPIItemListerIterDataPipe as PySTACAPIItemLister,
1516
PySTACAPISearcherIterDataPipe as PySTACAPISearcher,
1617
)
1718
from zen3geo.datapipes.rioxarray import RioXarrayReaderIterDataPipe as RioXarrayReader

zen3geo/datapipes/pystac.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ class PySTACItemReaderIterDataPipe(IterDataPipe):
2929
Yields
3030
------
3131
stac_item : pystac.Item
32-
An :py:class:`pystac.Item` object containing the specific STACObject
33-
implementation class represented in a JSON format.
32+
A :py:class:`pystac.Item` object containing the specific
33+
:py:class:`pystac.STACObject` implementation class represented in a
34+
JSON format.
3435
3536
Raises
3637
------

zen3geo/datapipes/pystac_client.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class PySTACAPISearcherIterDataPipe(IterDataPipe):
7878
...
7979
>>> # Peform STAC API query using DataPipe
8080
>>> query = dict(
81-
... bbox=[174.5, -41.37, 174.9, -41.19],
81+
... bbox=[174.5, -41.37, 174.9, -41.19], # xmin, ymin, xmax, ymax
8282
... datetime=["2012-02-20T00:00:00Z", "2022-12-22T00:00:00Z"],
8383
... collections=["cop-dem-glo-30"],
8484
... )
@@ -133,3 +133,101 @@ def __iter__(self) -> Iterator:
133133

134134
def __len__(self) -> int:
135135
return len(self.source_datapipe)
136+
137+
138+
@functional_datapipe("list_pystac_items_by_search")
139+
class PySTACAPIItemListerIterDataPipe(IterDataPipe):
140+
"""
141+
Lists the :py:class:`pystac.Item` objects that match the provided STAC API
142+
search parameters (functional name: ``list_pystac_items_by_search``).
143+
144+
Parameters
145+
----------
146+
source_datapipe : IterDataPipe[pystac_client.ItemSearch]
147+
A DataPipe that contains :py:class:`pystac_client.ItemSearch` object
148+
instances that represents
149+
a deferred query to a STAC search endpoint as described in the
150+
`STAC API - Item Search spec <https://github.com/radiantearth/stac-api-spec/tree/main/item-search>`_.
151+
152+
Yields
153+
------
154+
stac_item : pystac.Item
155+
A :py:class:`pystac.Item` object containing the specific
156+
:py:class:`pystac.STACObject` implementation class represented in a
157+
JSON format.
158+
159+
Raises
160+
------
161+
ModuleNotFoundError
162+
If ``pystac_client`` is not installed. See
163+
:doc:`install instructions for pystac-client <pystac_client:index>`,
164+
(e.g. via ``pip install pystac-client``) before using this class.
165+
166+
Example
167+
-------
168+
>>> import pytest
169+
>>> pystac_client = pytest.importorskip("pystac_client")
170+
...
171+
>>> from torchdata.datapipes.iter import IterableWrapper
172+
>>> from zen3geo.datapipes import PySTACAPIItemLister
173+
...
174+
>>> # List STAC Items from a STAC API query
175+
>>> catalog = pystac_client.Client.open(
176+
... url="https://explorer.digitalearth.africa/stac/"
177+
... )
178+
>>> search = catalog.search(
179+
... bbox=[57.2, -20.6, 57.9, -19.9], # xmin, ymin, xmax, ymax
180+
... datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"],
181+
... collections=["s2_l2a"],
182+
... )
183+
>>> dp = IterableWrapper(iterable=[search])
184+
>>> dp_pystac_item_list = dp.list_pystac_items_by_search()
185+
...
186+
>>> # Loop or iterate over the DataPipe stream
187+
>>> it = iter(dp_pystac_item_list)
188+
>>> stac_item = next(it)
189+
>>> stac_item
190+
<Item id=ec16dbf6-9729-5a8f-9d72-5e83a8b9f30d>
191+
>>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE
192+
{'title': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000',
193+
'gsd': 10,
194+
'proj:epsg': 32740,
195+
'platform': 'sentinel-2b',
196+
'view:off_nadir': 0,
197+
'instruments': ['msi'],
198+
'eo:cloud_cover': 0.02,
199+
'odc:file_format': 'GeoTIFF',
200+
'odc:region_code': '40KED',
201+
'constellation': 'sentinel-2',
202+
'sentinel:sequence': '0',
203+
'sentinel:utm_zone': 40,
204+
'sentinel:product_id': 'S2B_MSIL2A_20230103T062449_N0509_R091_T40KED_20230103T075000',
205+
'sentinel:grid_square': 'ED',
206+
'sentinel:data_coverage': 28.61,
207+
'sentinel:latitude_band': 'K',
208+
'created': '2023-01-03T06:24:53Z',
209+
'sentinel:valid_cloud_cover': True,
210+
'sentinel:boa_offset_applied': True,
211+
'sentinel:processing_baseline': '05.09',
212+
'proj:shape': [10980, 10980],
213+
'proj:transform': [10.0, 0.0, 499980.0, 0.0, -10.0, 7900000.0, 0.0, 0.0, 1.0],
214+
'datetime': '2023-01-03T06:24:53Z',
215+
'cubedash:region_code': '40KED'}
216+
"""
217+
218+
def __init__(self, source_datapipe):
219+
if pystac_client is None:
220+
raise ModuleNotFoundError(
221+
"Package `pystac_client` is required to be installed to use this datapipe. "
222+
"Please use `pip install pystac-client` or "
223+
"`conda install -c conda-forge pystac-client` "
224+
"to install the package"
225+
)
226+
self.source_datapipe = source_datapipe
227+
228+
def __iter__(self):
229+
for item_search in self.source_datapipe:
230+
yield from item_search.items()
231+
232+
def __len__(self):
233+
return sum(item_search.matched() for item_search in self.source_datapipe)

zen3geo/tests/test_datapipes_pystac_client.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
import pytest
55
from torchdata.datapipes.iter import IterableWrapper
66

7-
from zen3geo.datapipes import PySTACAPISearcher
7+
from zen3geo.datapipes import PySTACAPIItemLister, PySTACAPISearcher
88

99
pystac_client = pytest.importorskip("pystac_client")
1010

11+
1112
# %%
1213
def test_pystac_client_item_search():
1314
"""
@@ -85,3 +86,90 @@ def test_pystac_client_item_search_open_parameters():
8586
stac_item_search = next(it)
8687
assert stac_item_search.client.title == "Radiant MLHub API"
8788
assert stac_item_search.client.description == "stac-fastapi"
89+
90+
91+
def test_pystac_client_item_lister():
92+
"""
93+
Ensure that PySTACAPIItemLister works to yield pystac.Item instances for
94+
each item matching the given search parameters in a
95+
pystac_client.ItemSearch query.
96+
"""
97+
catalog = pystac_client.Client.open(
98+
url="https://earth-search.aws.element84.com/v1/"
99+
)
100+
search = catalog.search(
101+
bbox=[134.2, 6.9, 134.8, 8.5],
102+
datetime=["2023-01-01T00:00:00Z", "2023-01-31T00:00:00Z"],
103+
collections=["sentinel-2-l1c"],
104+
)
105+
dp = IterableWrapper(iterable=[search])
106+
107+
# Using class constructors
108+
dp_pystac_item_list = PySTACAPIItemLister(source_datapipe=dp)
109+
# Using functional form (recommended)
110+
dp_pystac_item_list = dp.list_pystac_items_by_search()
111+
112+
assert len(dp_pystac_item_list) == 14
113+
it = iter(dp_pystac_item_list)
114+
stac_item = next(it)
115+
assert stac_item.bbox == [
116+
134.093840347073,
117+
6.2442879900058115,
118+
135.08840137750929,
119+
7.237809826458827,
120+
]
121+
assert stac_item.datetime.isoformat() == "2023-01-29T01:35:24.640000+00:00"
122+
assert stac_item.geometry["type"] == "Polygon"
123+
assert stac_item.properties == {
124+
"created": "2023-01-29T06:01:33.679Z",
125+
"platform": "sentinel-2b",
126+
"constellation": "sentinel-2",
127+
"instruments": ["msi"],
128+
"eo:cloud_cover": 92.7676417582305,
129+
"proj:epsg": 32653,
130+
"mgrs:utm_zone": 53,
131+
"mgrs:latitude_band": "N",
132+
"mgrs:grid_square": "MH",
133+
"grid:code": "MGRS-53NMH",
134+
"view:sun_azimuth": 135.719785438016,
135+
"view:sun_elevation": 55.1713941690268,
136+
"s2:degraded_msi_data_percentage": 0.2816,
137+
"s2:product_type": "S2MSI1C",
138+
"s2:processing_baseline": "05.09",
139+
"s2:product_uri": "S2B_MSIL1C_20230129T013449_N0509_R031_T53NMH_20230129T025811.SAFE",
140+
"s2:generation_time": "2023-01-29T02:58:11.000000Z",
141+
"s2:datatake_id": "GS2B_20230129T013449_030802_N05.09",
142+
"s2:datatake_type": "INS-NOBS",
143+
"s2:datastrip_id": "S2B_OPER_MSI_L1C_DS_2BPS_20230129T025811_S20230129T013450_N05.09",
144+
"s2:granule_id": "S2B_OPER_MSI_L1C_TL_2BPS_20230129T025811_A030802_T53NMH_N05.09",
145+
"s2:reflectance_conversion_factor": 1.03193080888673,
146+
"datetime": "2023-01-29T01:35:24.640000Z",
147+
"s2:sequence": "0",
148+
"earthsearch:s3_path": "s3://earthsearch-data/sentinel-2-l1c/53/N/MH/2023/1/S2B_53NMH_20230129_0_L1C",
149+
"earthsearch:payload_id": "roda-sentinel2/workflow-sentinel2-to-stac/15626e44fb54c2182e5ed5d3aec4a209",
150+
"processing:software": {"sentinel2-to-stac": "0.1.0"},
151+
"updated": "2023-01-29T06:01:33.679Z",
152+
}
153+
assert stac_item.assets["visual"].extra_fields["eo:bands"] == [
154+
{
155+
"name": "red",
156+
"common_name": "red",
157+
"description": "Red (band 4)",
158+
"center_wavelength": 0.665,
159+
"full_width_half_max": 0.038,
160+
},
161+
{
162+
"name": "green",
163+
"common_name": "green",
164+
"description": "Green (band 3)",
165+
"center_wavelength": 0.56,
166+
"full_width_half_max": 0.045,
167+
},
168+
{
169+
"name": "blue",
170+
"common_name": "blue",
171+
"description": "Blue (band 2)",
172+
"center_wavelength": 0.49,
173+
"full_width_half_max": 0.098,
174+
},
175+
]

0 commit comments

Comments
 (0)