Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dags/veda_data_pipeline/utils/collection_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def create_cog_collection(self, dataset: Dict[str, Any]) -> dict:
# Override the extents if they exists
if spatial_extent := dataset.get("spatial_extent"):
collection_stac["extent"]["spatial"] = {"bbox": [list(spatial_extent.values())]}

if temporal_extent := dataset.get("temporal_extent"):
collection_stac["extent"]["temporal"] = {
"interval": [
Expand Down
65 changes: 64 additions & 1 deletion dags/veda_data_pipeline/utils/schemas.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Description: Lightweight schema definitions

from datetime import datetime
from typing import List, Union
from typing import List, Union, Any, Dict
from stac_pydantic.collection import Extent, TimeInterval
from pystac.utils import datetime_to_str
from dateutil import parser as date_parser


class DatetimeInterval(TimeInterval):
Expand All @@ -13,3 +15,64 @@ class DatetimeInterval(TimeInterval):
class SpatioTemporalExtent(Extent):
# reimplement stac_pydantic's Extent to leverage datetime types
temporal: DatetimeInterval


def normalize_datetime_to_iso8601(dt: Any) -> Union[str, None, Any]:
"""
Normalize a datetime value to ISO 8601 format with T separator and Z for UTC.

- datetime objects become ISO 8601 string
- '2024-09-12 00:00:00+00' -> '2024-09-12T00:00:00Z'
- '2024-09-12T00:00:00+00:00' -> '2024-09-12T00:00:00Z'

Args:
dt: datetime object or string

Returns:
Normalized ISO 8601 datetime string
"""
if dt is None:
return None

# Convert to datetime object if it's a string
if isinstance(dt, str):
dt = date_parser.parse(dt)
elif not isinstance(dt, datetime):
return dt

# Convert to ISO 8601 format
dt_str = dt.isoformat()

# Convert UTC timezone to Z: +00:00 -> Z
if dt_str.endswith('+00:00'):
dt_str = dt_str[:-6] + 'Z'
elif dt_str.endswith('-00:00'):
dt_str = dt_str[:-6] + 'Z'

return dt_str


def normalize_temporal_extent(collection: Dict[str, Any]) -> Dict[str, Any]:
"""
Normalize temporal extent in a STAC collection to ISO 8601 format

- '2024-09-12 00:00:00+00' -> '2024-09-12T00:00:00Z'
"""
if not isinstance(collection, dict):
return collection

if "extent" in collection and "temporal" in collection["extent"]:
temporal = collection["extent"]["temporal"]
if "interval" in temporal and isinstance(temporal["interval"], list):
normalized_intervals = []
for interval in temporal["interval"]:
if isinstance(interval, list):
normalized_interval = [
normalize_datetime_to_iso8601(dt) for dt in interval
]
normalized_intervals.append(normalized_interval)
else:
normalized_intervals.append(interval)
collection["extent"]["temporal"]["interval"] = normalized_intervals

return collection
Loading