Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ repos:
- id: ruff-format
types_or: [python, pyi, jupyter, toml]

- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.6.9
hooks:
- id: uv-lock
args: ["--locked", "--offline"]

- repo: https://github.com/RobertCraigie/pyright-python
rev: v1.1.396
hooks:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ dependencies = [
"pytz>=2021.1",
"pandas>=2.2.0",
"lxml>=5.3.0",
"pydantic",
"pydantic>=2.10.6",
]
dynamic = ["version"]

Expand Down
3 changes: 2 additions & 1 deletion pystac_monty/geocoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,8 @@ def get_geometry_by_country_name(self, country_name: str) -> Optional[Dict[str,
return None

def get_iso3_from_geometry(self, geometry: Dict[str, Any]) -> Optional[str]:
raise NotImplementedError("Method not implemented")
# FIXME: Implement this later
return "UNK"

def get_geometry_from_iso3(self, iso3: str) -> Optional[Dict[str, Any]]:
raise NotImplementedError("Method not implemented")
Expand Down
104 changes: 9 additions & 95 deletions pystac_monty/sources/desinventar.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,107 +24,19 @@
)
from pystac_monty.hazard_profiles import MontyHazardProfiles
from pystac_monty.sources.common import MontyDataTransformer

STAC_EVENT_ID_PREFIX = "desinventar-event-"
STAC_HAZARD_ID_PREFIX = "desinventar-hazard-"
STAC_IMPACT_ID_PREFIX = "desinventar-impact-"
from pystac_monty.validators.desinventar import (
STAC_EVENT_ID_PREFIX,
STAC_HAZARD_ID_PREFIX,
STAC_IMPACT_ID_PREFIX,
DataRow,
GeoDataEntry,
)

logger = logging.getLogger(__name__)

T = typing.TypeVar("T")


class GeoDataEntry(TypedDict):
level: Optional[str]
property_code: Optional[str]
shapefile_data: Optional[gpd.GeoDataFrame]


# Properties extracted from desinventar
class DataRow(pydantic.BaseModel):
serial: str
comment: str | None
# source: str | None

deaths: float | None
injured: float | None
missing: float | None
houses_destroyed: float | None
houses_damaged: float | None
directly_affected: float | None
indirectly_affected: float | None
relocated: float | None
evacuated: float | None
losses_in_dollar: float | None
losses_local_currency: float | None
# education_centers: str | None
# hospitals: str | None
damages_in_crops_ha: float | None
lost_cattle: float | None
damages_in_roads_mts: float | None

level0: str | None
level1: str | None
level2: str | None
# name0: str | None
# name1: str | None
# name2: str | None
# latitude: str | None
# longitude: str | None

# haz_maxvalue: str | None
event: str | None
# glide: str | None
location: str | None

# duration: str | None
year: int
month: int | None
day: int | None

# Added fields

iso3: str
data_source_url: str | None

@property
def event_stac_id(self):
return f"{STAC_EVENT_ID_PREFIX}{self.iso3}-{self.serial}"

@property
def event_title(self):
return f"{self.event} in {self.location} on {self.event_start_date}"

@property
def event_description(self):
return f"{self.event} in {self.location}: {self.comment}"

@property
def event_start_date(self):
if self.year is None:
return

start_year = self.year
start_month = self.month or 1
start_day = self.day or 1

try:
start_dt = datetime(start_year, start_month, start_day)
return pytz.utc.localize(start_dt)
except Exception:
return None

@property
def lowest_level(self):
if self.level2 is not None:
return 'level2'
if self.level1 is not None:
return 'level1'
if self.level0 is not None:
return 'level0'
return None


# TODO: move to common utils
def get_list_item_safe(lst: list[T], index: int, default_value: T | None = None) -> T | None:
try:
Expand Down Expand Up @@ -686,6 +598,8 @@ def get_stac_items(self) -> typing.Generator[Item, None, None]:
if event_item := self._create_event_item_from_row(row_data):
yield event_item
yield from self._create_impact_items_from_row(row_data, event_item)
else:
failed_items_count += 1
except Exception:
failed_items_count += 1
logger.error('Failed to process desinventar', exc_info=True)
Expand Down
15 changes: 14 additions & 1 deletion pystac_monty/sources/emdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pystac_monty.hazard_profiles import MontyHazardProfiles
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
from pystac_monty.utils import rename_columns
from pystac_monty.validators.em_dat import EmdatDataValidator

STAC_EVENT_ID_PREFIX = "emdat-event-"
STAC_HAZARD_ID_PREFIX = "emdat-hazard-"
Expand All @@ -40,12 +41,24 @@ def __init__(self, source_url: str, data: Union[str, pd.DataFrame]):
self.df = data
elif isinstance(data, dict):
# If data is a dict, assume it's Json content
data = data["data"]["public_emdat"]["data"]
# data = data["data"]["public_emdat"]["data"]
data = self.source_data_validator(data["data"]["public_emdat"]["data"])
df = pd.DataFrame(data)
self.df = rename_columns(df)
else:
raise ValueError("Data must be either Excel content (str) or pandas DataFrame or Json")

def source_data_validator(self, data):
valid_list = []
error_list = []
for item in data:
if EmdatDataValidator.validate_event(item):
valid_list.append(item)
else:
error_list.append(item)

return valid_list

def get_data(self) -> pd.DataFrame:
return self.df

Expand Down
21 changes: 20 additions & 1 deletion pystac_monty/sources/gdacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
)
from pystac_monty.hazard_profiles import MontyHazardProfiles
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
from pystac_monty.validators.gdacs_events import GdacsDataValidatorEvents
from pystac_monty.validators.gdacs_geometry import GdacsDataValidatorGeometry

# Constants

Expand All @@ -46,7 +48,24 @@ def __init__(self, source_url: str, data: Any, type: GDACSDataSourceType):
super().__init__(source_url, data)
self.type = type
# all gdacs data are json
self.data = json.loads(data)
self.data = self.source_data_validator(json.loads(data))

def source_data_validator(self, data: dict):
# Debug print
if self.type == GDACSDataSourceType.EVENT:
result = GdacsDataValidatorEvents.validate_event(data)
if result:
return data
elif self.type == GDACSDataSourceType.GEOMETRY:
new_data = {} # Store the filtered dictionary
for key, value in data.items():
if key == "features" and isinstance(value, list):
# Validate each feature in the list and skip the ones with 'Figure cause' = 'Conflict'
new_data[key] = [feature for feature in value if GdacsDataValidatorGeometry.validate_event(feature)]
else:
# Keep normal key-value pairs unchanged
new_data[key] = value
return new_data

def get_type(self) -> GDACSDataSourceType:
return self.type
Expand Down
18 changes: 16 additions & 2 deletions pystac_monty/sources/gfd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from pystac_monty.hazard_profiles import MontyHazardProfiles
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
from pystac_monty.validators.gfd import GFDSourceValidator

# Constants

Expand All @@ -28,7 +29,20 @@ class GFDDataSource(MontyDataSource):

def __init__(self, source_url: str, data: Any):
super().__init__(source_url, data)
self.data = json.loads(data)
self.data = self.source_data_validator(json.loads(data))

def source_data_validator(self, data: list[dict]):
"""Validate the source data and collect only the success items"""
# TODO Handle the failed_items
failed_items = []
success_items = []
for item in data:
is_valid = GFDSourceValidator.validate_event(item)
if is_valid:
success_items.append(item)
else:
failed_items.append(item)
return success_items


class GFDTransformer(MontyDataTransformer[GFDDataSource]):
Expand Down Expand Up @@ -85,7 +99,7 @@ def make_source_event_item(self, data: dict) -> Item:
enddate = pytz.utc.localize(datetime.fromtimestamp(data["system:time_end"] / 1000))

item = Item(
id=f'{STAC_EVENT_ID_PREFIX}{data["id"]}',
id=f"{STAC_EVENT_ID_PREFIX}{data['id']}",
geometry=geometry,
bbox=bbox,
datetime=startdate,
Expand Down
16 changes: 16 additions & 0 deletions pystac_monty/sources/gidd.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pystac_monty.hazard_profiles import MontyHazardProfiles
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
from pystac_monty.sources.utils import IDMCUtils
from pystac_monty.validators.gidd import GiddValidator

logger = logging.getLogger(__name__)

Expand All @@ -31,6 +32,21 @@ class GIDDDataSource(MontyDataSource):
def __init__(self, source_url: str, data: Any):
super().__init__(source_url, data)
self.data = json.loads(data)
self.data = self.source_data_validator(json.loads(data))

def source_data_validator(self, data: dict):
"""Validate only the items inside 'features' while keeping other keys unchanged."""

new_data = {} # Store the filtered dictionary

for key, value in data.items():
if key == "features" and isinstance(value, list):
# Validate each feature in the list and skip the ones with 'Figure cause' = 'Conflict'
new_data[key] = [feature for feature in value if GiddValidator.validate_event(feature)]
else:
# Keep normal key-value pairs unchanged
new_data[key] = value
return new_data


class GIDDTransformer(MontyDataTransformer[GIDDDataSource]):
Expand Down
15 changes: 14 additions & 1 deletion pystac_monty/sources/glide.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pystac_monty.extension import HazardDetail, MontyEstimateType, MontyExtension
from pystac_monty.hazard_profiles import MontyHazardProfiles
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
from pystac_monty.validators.glide import GlideSetValidator

STAC_EVENT_ID_PREFIX = "glide-event-"
STAC_HAZARD_ID_PREFIX = "glide-hazard-"
Expand All @@ -17,7 +18,19 @@
class GlideDataSource(MontyDataSource):
def __init__(self, source_url: str, data: Any):
super().__init__(source_url, data)
self.data = json.loads(data)
self.data = self.source_data_validator(json.loads(data))

def source_data_validator(self, data: dict[dict]):
"""Validate the source data and collect only the success items"""
failed_items = []
success_items = []
for item in data["glideset"]:
is_valid = GlideSetValidator.validate_event(item)
if is_valid:
success_items.append(item)
else:
failed_items.append(item)
return {"glideset": success_items}


class GlideTransformer(MontyDataTransformer[GlideDataSource]):
Expand Down
Loading
Loading