Skip to content

Commit d6f8e7e

Browse files
committed
feat(corr-id): add geo block id to the correlation id
1 parent 5141afa commit d6f8e7e

File tree

3 files changed

+46
-2
lines changed

3 files changed

+46
-2
lines changed
7.23 MB
Binary file not shown.

pystac_monty/geo_blocks.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import importlib.resources
2+
from functools import lru_cache
3+
from threading import Lock
4+
5+
import pandas as pd
6+
7+
8+
class GeoBlocks:
9+
"""Load Geo blocks dataframe class"""
10+
11+
_df: pd.DataFrame | None = None
12+
_lock: Lock = Lock()
13+
_file_path: str = "geo_blocks-0.2.parquet"
14+
15+
@classmethod
16+
@lru_cache(maxsize=None)
17+
def get_geoblocks_df(cls) -> pd.DataFrame:
18+
"""Returns the Geo blocks dataframe"""
19+
with cls._lock:
20+
if not cls._df:
21+
with importlib.resources.files("pystac_monty").joinpath(cls._file_path).open("rb") as f:
22+
cls._df = pd.read_parquet(f, engine="pyarrow")
23+
24+
assert cls._df.columns.size == 5 # Total columns in the parquet file
25+
return cls._df

pystac_monty/paring.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,40 @@
22

33
from pystac_monty.hazard_profiles import HazardProfiles
44

5+
from .geo_blocks import GeoBlocks
6+
57

68
class Pairing:
9+
"""Generation of correlation id for Events Pairing"""
10+
11+
def _return_bbox_centroid_coordinates(self, bbox: list):
12+
"""Returns the centroid of the bbox"""
13+
return [round((bbox[1] + bbox[3]) / 2.0, 1), round((bbox[0] + bbox[2]) / 2.0, 1)] # [lat, lon]
14+
715
def generate_correlation_id(self, item: Item, hazard_profiles: HazardProfiles) -> str:
16+
"""Generate the correlation ID for events pairing"""
817
# Get the necessary properties for creating the correlation id
918
hazards = item.properties.get("monty:hazard_codes", [])
1019
country_codes = item.properties.get("monty:country_codes", [])
1120
event_datetime = item.datetime
1221
episode_number = item.properties.get("monty:episode_number", 0)
22+
geometry_lat_lon = self._return_bbox_centroid_coordinates(item.bbox)
1323

14-
if not hazards or not country_codes or not event_datetime or not episode_number:
24+
if not hazards or not country_codes or not event_datetime or not episode_number or not geometry_lat_lon:
1525
raise ValueError("Missing required properties to generate correlation id")
1626

1727
hazard_cluster_code = hazard_profiles.get_canonical_hazard_codes(item)[0].upper()
1828
# This should be dynamically determined based on existing events
1929
eventdatestr = event_datetime.strftime("%Y%m%d")
2030

21-
event_id = f"{eventdatestr}-{country_codes[0]}-{hazard_cluster_code}-{episode_number}-GCDB" # noqa: E501
31+
geoblocks_df = GeoBlocks.get_geoblocks_df()
32+
geoblocks_filtered_df = geoblocks_df[
33+
(geoblocks_df["lat_min"] <= geometry_lat_lon[0])
34+
& (geoblocks_df["lat_max"] > geometry_lat_lon[0])
35+
& (geoblocks_df["lon_min"] <= geometry_lat_lon[1])
36+
& (geoblocks_df["lon_max"] > geometry_lat_lon[1])
37+
]
38+
block_id = int(geoblocks_filtered_df["block_id"].iloc[0]) if len(geoblocks_filtered_df) else -1
39+
40+
event_id = f"{eventdatestr}-{country_codes[0]}-{block_id}-{hazard_cluster_code}-{episode_number}-GCDB" # noqa: E501
2241
return event_id

0 commit comments

Comments
 (0)