Skip to content

Commit f0923ab

Browse files
Merge remote-tracking branch 'origin/main' into ibtracs
2 parents b73aa81 + 5da4fb8 commit f0923ab

File tree

8 files changed

+975
-15
lines changed

8 files changed

+975
-15
lines changed

pystac_monty/geocoding.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Any, Dict, List, Optional, Set, Union
55

66
import fiona # type: ignore
7+
import requests
78
from shapely.geometry import mapping, shape # type: ignore
89
from shapely.ops import unary_union # type: ignore
910

@@ -191,7 +192,7 @@ class GAULGeocoder(MontyGeoCoder):
191192
Loads features dynamically as needed.
192193
"""
193194

194-
def __init__(self, gpkg_path: str, simplify_tolerance: float = 0.01) -> None:
195+
def __init__(self, gpkg_path: Optional[str], service_base_url: Optional[str], simplify_tolerance: float = 0.01) -> None:
195196
"""
196197
Initialize GAULGeocoder
197198
@@ -206,8 +207,16 @@ def __init__(self, gpkg_path: str, simplify_tolerance: float = 0.01) -> None:
206207
self._simplify_tolerance = simplify_tolerance
207208
self._cache: Dict[str, Union[Dict[str, Any], int, None]] = {} # Cache for frequently accessed geometries
208209
self._file_handle = None
209-
self._initialize_path()
210-
self._open_file()
210+
211+
if not gpkg_path and not service_base_url:
212+
raise ValueError("At least the gpkg_path or service_base_url should be set.")
213+
214+
if self.gpkg_path:
215+
self._initialize_path()
216+
self._open_file()
217+
else:
218+
self.service_base_url = service_base_url
219+
self.request_timeout = 30
211220

212221
def __enter__(self) -> "GAULGeocoder":
213222
"""Context manager entry point"""
@@ -380,6 +389,12 @@ def _get_name_to_adm0_mapping(self, name: str) -> Optional[int]:
380389
return adm0_code
381390
return None
382391

392+
def _service_request_handler(self, service_url: str, params: dict):
393+
response = requests.get(service_url, params=params, timeout=self.request_timeout)
394+
if response.status_code == 200:
395+
return response.json()
396+
return None
397+
383398
def get_geometry_from_admin_units(self, admin_units: str) -> Optional[Dict[str, Any]]:
384399
"""
385400
Get geometry from admin units JSON string
@@ -453,6 +468,12 @@ def get_geometry_by_country_name(self, country_name: str) -> Optional[Dict[str,
453468
Returns:
454469
Dictionary containing geometry and bbox if found
455470
"""
471+
472+
if not self.gpkg_path:
473+
params = {"country_name": country_name}
474+
service_url = f"{self.service_base_url}/by_country_name"
475+
return self._service_request_handler(service_url=service_url, params=params)
476+
456477
if not country_name or not self._path:
457478
return None
458479

pystac_monty/sources/emdat.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime
33
from typing import List, Optional, Union
44

5+
import numpy as np
56
import pandas as pd
67
import pytz
78
from pystac import Item, Link
@@ -18,6 +19,7 @@
1819
from pystac_monty.geocoding import MontyGeoCoder
1920
from pystac_monty.hazard_profiles import MontyHazardProfiles
2021
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
22+
from pystac_monty.utils import rename_columns
2123

2224
STAC_EVENT_ID_PREFIX = "emdat-event-"
2325
STAC_HAZARD_ID_PREFIX = "emdat-hazard-"
@@ -37,8 +39,13 @@ def __init__(self, source_url: str, data: Union[str, pd.DataFrame]):
3739
self.df = pd.read_excel(data)
3840
elif isinstance(data, pd.DataFrame):
3941
self.df = data
42+
elif isinstance(data, dict):
43+
# If data is a dict, assume it's Json content
44+
data = data["data"]["public_emdat"]["data"]
45+
df = pd.DataFrame(data)
46+
self.df = rename_columns(df)
4047
else:
41-
raise ValueError("Data must be either Excel content (str) or pandas DataFrame")
48+
raise ValueError("Data must be either Excel content (str) or pandas DataFrame or Json")
4249

4350
def get_data(self) -> pd.DataFrame:
4451
return self.df
@@ -109,7 +116,7 @@ def _create_event_item_from_row(self, row: pd.Series) -> Optional[Item]:
109116
bbox = None
110117

111118
# 1. Try admin units first if geocoder is available
112-
if self.geocoder and not pd.isna(row.get("Admin Units")):
119+
if self.geocoder and np.any(pd.notna(row.get("Admin Units"))):
113120
geom_data = self.geocoder.get_geometry_from_admin_units(row.get("Admin Units"))
114121
if geom_data:
115122
geometry = geom_data["geometry"]

pystac_monty/sources/pdc.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,13 @@ def __init__(self, pdc_data_src: PDCDataSource):
5656
self.exposure_detail = json.loads(f.read())
5757

5858
self.uuid = self.config_data.get("uuid", None)
59-
self.episode_number = int(float(self.config_data.get("exposure_timestamp", 0)))
59+
# Note: We might need to handle this differently if the exposure_timestamp
60+
# is other than numeric (e.g. alphabetic)
61+
# For now, we have assigned 0 to the episode_number
62+
try:
63+
self.episode_number = int(float(self.config_data.get("exposure_timestamp", 0)))
64+
except ValueError:
65+
self.episode_number = 0
6066
if "geojson_file_path" in self.config_data and os.path.exists(self.config_data["geojson_file_path"]):
6167
with open(self.config_data["geojson_file_path"], "r", encoding="utf-8") as f:
6268
self.geojson_data = json.loads(f.read())

pystac_monty/utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
def rename_columns(df):
2+
"""Rename columns of a pandas DataFrame"""
3+
return df.rename(
4+
columns={
5+
"disno": "DisNo.",
6+
"admin_units": "Admin Units",
7+
"latitude": "Latitude",
8+
"longitude": "Longitude",
9+
"country": "Country",
10+
"classif_key": "Classification Key",
11+
"iso": "ISO",
12+
"total_deaths": "Total Deaths",
13+
"no_injured": "No Injured",
14+
"no_affected": "No Affected",
15+
"no_homeless": "No Homeless",
16+
"total_affected": "Total Affected",
17+
"total_dam": "Total Damages ('000 US$)",
18+
"start_year": "Start Year",
19+
"start_month": "Start Month",
20+
"start_day": "Start Day",
21+
"end_year": "End Year",
22+
"end_month": "End Month",
23+
"end_day": "End Day",
24+
"magnitude": "Magnitude",
25+
"magnitude_scale": "Magnitude Sacle",
26+
"name": "Event Name",
27+
"type": "Disaster Type",
28+
"subtype": "Disaster Subtype",
29+
"location": "Location",
30+
}
31+
)

0 commit comments

Comments
 (0)