Skip to content

Commit 98f0613

Browse files
Rup-Narayan-Rajbanshitnagorra
authored andcommitted
Add partial success for ibtrace.
1 parent 20d4a9f commit 98f0613

File tree

3 files changed

+72
-38
lines changed

3 files changed

+72
-38
lines changed

pystac_monty/sources/ibtracs.py

Lines changed: 43 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import csv
44
import io
5+
import itertools
6+
import logging
7+
import typing
58
from typing import Dict, List, Union
69

710
import pytz
@@ -11,6 +14,10 @@
1114
from pystac_monty.extension import HazardDetail, MontyEstimateType, MontyExtension
1215
from pystac_monty.hazard_profiles import MontyHazardProfiles
1316
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
17+
from pystac_monty.validators.ibtracs import IBTracsdataValidator
18+
19+
logger = logging.getLogger(__name__)
20+
1421

1522
STAC_EVENT_ID_PREFIX = "ibtracs-event-"
1623
STAC_HAZARD_ID_PREFIX = "ibtracs-hazard-"
@@ -47,15 +54,14 @@ def _parse_csv(self) -> List[Dict[str, str]]:
4754
csv_data.append(row)
4855
return csv_data
4956

50-
def get_storm_ids(self) -> List[str]:
51-
"""Get a list of unique storm IDs from the data."""
52-
data = self.get_data()
53-
return list(set(row.get("SID", "").strip() for row in data if row.get("SID")))
57+
def parse_row_data(self, rows: list[dict]):
58+
validated_data = []
59+
for row in rows:
60+
obj = IBTracsdataValidator.validate_event(row)
61+
if obj:
62+
validated_data.append(obj)
5463

55-
def get_storm_data(self, storm_id: str) -> List[Dict[str, str]]:
56-
"""Get all data rows for a specific storm ID."""
57-
data = self.get_data()
58-
return [row for row in data if row.get("SID", "").strip() == storm_id]
64+
return validated_data
5965

6066

6167
class IBTrACSTransformer(MontyDataTransformer[IBTrACSDataSource]):
@@ -64,37 +70,41 @@ class IBTrACSTransformer(MontyDataTransformer[IBTrACSDataSource]):
6470
hazard_profiles = MontyHazardProfiles()
6571
source_name = 'ibtracs'
6672

67-
def make_items(self) -> List[Item]:
68-
"""Create STAC Items from IBTrACS data.
73+
# FIXME: This is deprecated
74+
def make_items(self):
75+
return list(self.get_stac_items())
6976

70-
Returns:
71-
List of STAC Items (events and hazards)
72-
"""
73-
items = []
77+
def get_stac_items(self) -> typing.Generator[Item, None, None]:
78+
csv_data = self.data_source._parse_csv()
79+
csv_data.sort(key=lambda x: x.get("SID"))
7480

75-
storm_ids = self.data_source.get_storm_ids()
81+
grouped_rows = {}
82+
for sid, group in itertools.groupby(csv_data, key=lambda x: x.get("SID")):
83+
grouped_rows[sid] = list(group)
7684

77-
# Create event items (one per storm)
78-
for storm_id in storm_ids:
79-
try:
80-
event_hazard_items = []
81-
event_item = self.make_source_event_items(storm_id)
82-
event_hazard_items.append(event_item)
83-
hazard_items = self.make_hazard_items(event_item)
84-
event_hazard_items.extend(hazard_items)
85+
# # TODO: Use sax xml parser for memory efficient usage
86+
failed_items_count = 0
87+
total_items_count = 0
8588

86-
yield event_hazard_items
89+
for storm_id, storm_data in grouped_rows.items():
90+
total_items_count += 1
91+
try:
92+
storm_data = self.data_source.parse_row_data(storm_data)
93+
if event_item := self.make_source_event_items(storm_id, storm_data):
94+
yield event_item
95+
yield from self.make_hazard_items(event_item, storm_data)
8796
except Exception:
88-
logger.info("Transformation failed", exc_info=True)
97+
failed_items_count += 1
98+
logger.error("Failed to process desinventar", exc_info=True)
99+
100+
print(failed_items_count)
89101

90-
def make_source_event_items(self, storm_id) -> List[Item]:
102+
def make_source_event_items(self, storm_id, storm_data) -> List[Item]:
91103
"""Create source event items from IBTrACS data.
92104
93105
Returns:
94106
List of event STAC Items
95107
"""
96-
storm_data = self.data_source.get_storm_data(storm_id)
97-
98108
if not storm_data:
99109
return
100110

@@ -287,7 +297,7 @@ def make_source_event_items(self, storm_id) -> List[Item]:
287297

288298
return item
289299

290-
def make_hazard_items(self, event_item: Item) -> Item:
300+
def make_hazard_items(self, event_item: Item, storm_data) -> list[Item] | None:
291301
"""Create hazard items from IBTrACS data.
292302
293303
Args:
@@ -299,7 +309,6 @@ def make_hazard_items(self, event_item: Item) -> Item:
299309
hazard_items = []
300310

301311
storm_id = event_item.id
302-
storm_data = self.data_source.get_storm_data(storm_id)
303312

304313
if not storm_data:
305314
return
@@ -360,6 +369,8 @@ def make_hazard_items(self, event_item: Item) -> Item:
360369
except (ValueError, TypeError):
361370
wind = 0
362371

372+
wind = 0 if wind.strip() == "" else wind
373+
363374
try:
364375
pressure = float(row.USA_PRES if row.USA_PRES else 0)
365376
except (ValueError, TypeError):
@@ -559,14 +570,13 @@ def _get_countries_from_track(self, track_geometry: Union[LineString, Point]) ->
559570
if isinstance(track_geometry, LineString):
560571
for point in track_geometry.coords:
561572
lon, lat = point
562-
# country_code = self.geocoder.get_iso3_from_geometry(Point(lon, lat))
563-
country_code = "UNK"
573+
country_code = self.geocoder.get_iso3_from_geometry(Point(lon, lat))
564574
if country_code:
565575
countries.append(country_code)
566576
# For Point, check the single point
567577
elif isinstance(track_geometry, Point):
568578
lon, lat = track_geometry.x, track_geometry.y
569-
# country_code = self.geocoder.get_iso3_from_geometry(track_geometry)
579+
country_code = self.geocoder.get_iso3_from_geometry(track_geometry)
570580
country_code = "UNK"
571581
if country_code:
572582
countries.append(country_code)

pystac_monty/validators/glide.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,33 @@ class GlideSetValidator(BaseModelWithExtra):
3838

3939
@field_validator("event")
4040
def validate_enum(cls, value):
41-
if not value in ["EQ", "TC", "FL", "DR", "WF", "VO", "TS", "CW", "EP", "EC", "ET", "FR", "FF", "HT", "IN","LS","MS","ST","SL","AV","SS","AC","TO","VW","WV"]:
41+
if value not in [
42+
"EQ",
43+
"TC",
44+
"FL",
45+
"DR",
46+
"WF",
47+
"VO",
48+
"TS",
49+
"CW",
50+
"EP",
51+
"EC",
52+
"ET",
53+
"FR",
54+
"FF",
55+
"HT",
56+
"IN",
57+
"LS",
58+
"MS",
59+
"ST",
60+
"SL",
61+
"AV",
62+
"SS",
63+
"AC",
64+
"TO",
65+
"VW",
66+
"WV",
67+
]:
4268
raise ValueError(f"Event type {value} is not valid.")
4369
return value
4470

@@ -51,4 +77,4 @@ def validate_event(cls, data: dict) -> bool:
5177
logger.error(f"Validation failed: {e}")
5278
return False
5379
# If all field validators return True, we consider it valid
54-
return True
80+
return True

tests/extensions/test_ibtracs.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def test_event_item_properties(self) -> None:
299299
data_source = IBTrACSDataSource("test_url", SAMPLE_IBTRACS_CSV)
300300
transformer = IBTrACSTransformer(data_source, geocoder)
301301

302-
#items = list(transformer.make_items())
302+
# items = list(transformer.make_items())
303303

304304
# Find the event item
305305
event_items = [item for item in transformer.make_items() if "event" in item.properties.get("roles", [])]
@@ -329,8 +329,6 @@ def test_hazard_item_properties(self) -> None:
329329
data_source = IBTrACSDataSource("test_url", SAMPLE_IBTRACS_CSV)
330330
transformer = IBTrACSTransformer(data_source, geocoder)
331331

332-
items = transformer.make_items()
333-
334332
# Find hazard items
335333
hazard_items = [item for item in transformer.make_items() if "hazard" in item.properties.get("roles", [])]
336334
self.assertGreater(len(hazard_items), 0)

0 commit comments

Comments
 (0)