Skip to content

Commit 62d40b6

Browse files
committed
Add param for filtering collections to harvest
1 parent 301d0c0 commit 62d40b6

1 file changed

Lines changed: 19 additions & 7 deletions

File tree

src/worker/stac/tasks.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ def execute(self, job: ExternalJob, result: JobResultBuilder, config: dict) -> J
3838
log_with_context("Missing required variable: stac_catalog_source", log_context)
3939
return result.failure()
4040

41+
stac_catalog_collections = job.get_variable("stac_catalog_collections")
42+
if not stac_catalog_collections:
43+
log_with_context(
44+
f"No collections provided. All collections from {stac_catalog_source} will be harvested", log_context
45+
)
46+
else:
47+
if len(stac_catalog_collections) > 0:
48+
collections_to_harvest = stac_catalog_collections.replace(" ", "").split(",")
49+
4150
try:
4251
log_with_context(f"Loading STAC catalog from: {stac_catalog_source}", log_context)
4352

@@ -47,10 +56,13 @@ def execute(self, job: ExternalJob, result: JobResultBuilder, config: dict) -> J
4756
try:
4857
# Handle STAC APIs
4958
StacIO.set_default(FSSpecStacIO)
50-
stac_collection_source = [
51-
collection.get_self_href()
52-
for collection in extract_stac_api_collections(stac_catalog_source)
53-
]
59+
stac_collection_source = []
60+
for collection in extract_stac_api_collections(stac_catalog_source):
61+
if collection.id in collections_to_harvest:
62+
log_with_context(f"Processing collection {collection.id}", log_context)
63+
stac_collection_source.append(collection.get_self_href())
64+
else:
65+
log_with_context(f"Ignoring collection {collection.id}", log_context)
5466

5567
except pystac_client.errors.ClientTypeError as e:
5668
# Handle collections
@@ -64,7 +76,7 @@ def execute(self, job: ExternalJob, result: JobResultBuilder, config: dict) -> J
6476
)
6577
else:
6678
raise e
67-
except Exception as e:
79+
except Exception:
6880
StacIO.set_default(FSSpecStacIO)
6981
catalog = Catalog.from_file(stac_catalog_source)
7082
stac_collection_source = [
@@ -172,7 +184,7 @@ def execute(self, job: ExternalJob, result: JobResultBuilder, config: dict) -> J
172184
client = pystac_client.Client.open(catalog_url)
173185
collection = client.get_collection(collection_name)
174186

175-
except Exception as e:
187+
except Exception:
176188
StacIO.set_default(FSSpecStacIO)
177189
collection = Collection.from_file(stac_collection_source)
178190

@@ -302,7 +314,7 @@ def execute(self, job: ExternalJob, result: JobResultBuilder, config: dict) -> J
302314
client = pystac_client.Client.open(catalog_url)
303315
search = client.search(ids=[item_id], collections=[collection_name])
304316
item = [item for item in search.items()][0]
305-
except Exception as e:
317+
except Exception:
306318
StacIO.set_default(FSSpecStacIO)
307319
item = Item.from_file(stac_item_source)
308320

0 commit comments

Comments
 (0)