Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ test-pipeline = "pytest tests/sar_pipeline/ --ignore=tests/sar_pipeline/isce3_rt
# isce3-rtc tests that require credentials - not currently setup for github but should be run locally before PRs
test-isce3-rtc = "pytest tests/sar_pipeline/isce3_rtc -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
test-isce3-rtc-full-docker-run = "pytest tests/sar_pipeline/isce3_rtc/test_full_docker_build_and_run.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
test-isce3-rtc-cli-make-rtc-opera-stac-and-upload-bursts = "pytest tests/sar_pipeline/isce3_rtc/test_cli_make_rtc_opera_stac_and_upload_bursts.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
# test downloads from all providers. pygssearch conda environment is required for the AUS_COP_HUB test
test-isce3-rtc-downloads= "pixi run install-pygssearch-env && pytest tests/sar_pipeline/isce3_rtc/test_downloads.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
# nci specific tests that should be run locally on the nci
Expand Down
27 changes: 4 additions & 23 deletions sar_pipeline/pipelines/isce3_rtc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,21 +723,6 @@ def make_rtc_opera_stac_and_upload_bursts(
)
burst_folders = [x for x in results_folder.iterdir() if x.is_dir()]

# check if there is a burst_geoms.json file containing the correct burst geometries from the CDSE.
# This can be created in the get_data_for_scene_and_make_run_config function and will be used
# To accurately set the STAC geometries for RTC_S1 products. Not required for RTC_S1_STATIC.
burst_geoms_file = list(results_folder.glob("*burst_geoms.json"))
if not burst_geoms_file:
logger.warning(
f"Burst geometry file not found. STAC geometries will be set using value from the"
" .h5 metadata file which may not correctly cover the burst data"
)
else:
logger.info(
f"Burst geometry file found and will be used to set STAC geometries"
)
burst_geoms_file = burst_geoms_file[0]

for i, burst_folder in enumerate(burst_folders):
logger.info(
f"Making STAC metadata for burst {i+1} of {len(burst_folders)} : {burst_folder}"
Expand Down Expand Up @@ -795,15 +780,11 @@ def make_rtc_opera_stac_and_upload_bursts(
s3_project_folder=s3_project_folder,
)

# update the geometry with the correct burst geometry from the CDSE if provided
if product == "RTC_S1" and burst_geoms_file:
logger.info("Updating STAC geometry with correct CDSE geometry for burst")
burst_geometry = load_burst_geometry_from_geojson(
burst_geoms_file, burst_folder.name
if product == "RTC_S1":
logger.info(
"Updating STAC geometry using valid data from a product GeoTiff"
)
burst_stac_manager.geometry_4326 = mapping(burst_geometry)
burst_stac_manager.bbox_4326 = burst_geometry.bounds

burst_stac_manager.update_geometry_using_valid_data()
# make the stac item from the .h5 file
burst_stac_manager.make_stac_item_from_h5()
# add properties to the stac doc
Expand Down
46 changes: 44 additions & 2 deletions sar_pipeline/pipelines/isce3_rtc/metadata/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import datetime
import re
import numpy as np
import geopandas as gpd

import dem_handler
import sar_pipeline
Expand All @@ -25,6 +26,8 @@
from sar_pipeline.utils.spatial import (
polygon_str_to_geojson,
reproject_bbox_to_geometry,
get_valid_data_min_rect_polygon_from_tif,
transform_polygon,
)
from sar_pipeline.utils.aws import find_s3_filepaths_from_suffixes
from sar_pipeline.pipelines.isce3_rtc.metadata.filetypes import (
Expand Down Expand Up @@ -119,8 +122,9 @@ def __init__(
"data/projection"
) # code, e.g. 4326, 3031
if self.product == "RTC_S1":
# NOTE - boundingPolygon considers the burst data taken from the .SAFE file
# This geometry is only an approximation.
# NOTE - boundingPolygon considers the burst geometry taken from the .SAFE file
# This geometry is only an approximation. The geometry can be updated using
# The valid data within a file using the update_geometry_using_valid_data method
self.geometry_4326 = polygon_str_to_geojson(
self.h5.search_value("boundingPolygon")
)
Expand Down Expand Up @@ -203,6 +207,44 @@ def _get_product_timeliness_category(
else:
return "NTC"

def update_geometry_using_valid_data(self):
"""The geometries provided in the .h5 are only approximations from
the burst data in radar coordinates. Update these geometries using
the minimum rotated rectangle that encloses the valid data from an actual
tif. This ensures the geometry correctly encloses the data and
search results using the geometry are accurate. For RTC_S1 products,
the first backscatter layer is used. For RTC_S1_STATIC products,
the local incidence angle is used. Note, RTC_S1_STATIC geometries are
already accurate as they utilise the burst grid.
"""

if hasattr(self, "item"):
raise ValueError(
"The STAC item already exists, geometry cannot be updated. Edit "
"the geometry before creating the file with `make_stac_item_from_h5`."
)

if self.product == "RTC_S1":
# update using a valid data in a backscatter tif
# take the tif for the first polarisation
for f in self.burst_folder.iterdir():
if any([pol in f.name for pol in self.polarisations]):
geometry_tif = f
elif self.product == "RTC_S1_STATIC":
# use the local incidence angle layer
for f in self.burst_folder.iterdir():
if "local" in f.name and "incidence" in f.name:
geometry_tif = f
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If my read of this logic is correct, the geometry will be created for the last tif that's found when iterating through polarisation files / incidence angle files in the directory. For dual pol, are we confident that the geocoding is always the same? I'm guessing we are, but just want to check that it's valid to get the geometry from either polarisaiton.


valid_data_geometry = get_valid_data_min_rect_polygon_from_tif(
geometry_tif, n_segments=4
)
valid_data_geometry_4326 = transform_polygon(
valid_data_geometry, self.projection_epsg, 4326
)
self.geometry_4326 = mapping(valid_data_geometry_4326)
self.bbox_4326 = valid_data_geometry_4326.bounds

def make_stac_item_from_h5(self):
"""Make a pystac.item.Item for the given burst using key properties
taken from the .h5 file.
Expand Down
85 changes: 83 additions & 2 deletions sar_pipeline/utils/spatial.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import pyproj
from pyproj import Transformer
from shapely import wkt
from shapely.geometry import mapping, box, Polygon, shape
from shapely.geometry import mapping, box, Polygon, shape, MultiPolygon
from shapely import segmentize
from shapely.ops import unary_union, orient
import numpy as np
import rasterio
from rasterio.features import shapes
import json
from pathlib import Path
import logging


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -211,3 +214,81 @@ def load_burst_geometry_from_geojson(geojson_path: Path, burst_id: str):
return shape(feature["geometry"])

raise KeyError(f"Burst ID {burst_id} not found in {geojson_path}")


def get_valid_data_min_rect_polygon_from_tif(
tif_path: Path,
n_segments: int = 4,
) -> Polygon:
"""
Extract an approximate (rotated) rectangular boundary polygon
around valid (non-nodata) data in a GeoTIFF, ignoring NoData areas.
Additional segments are added to the polygon based on the n_segments
setting. This ensures that re-projections are handled correctly, as these
can be problematic using a simple rectangle as the shape is not preserved.

Parameters
----------
tif_path : Path
Path to the GeoTIFF file.
n_segments : int, optional
The number of segments/points to add along each side of the
minimum bounding rectangle.

Returns
-------
shapely.geometry.Polygon or None
Polygon representing the outer boundary of valid data.
Returns None if no valid pixels are found.
"""
with rasterio.open(tif_path) as ds:
data = ds.read(1)
nodata = ds.nodata

# Build mask where True = valid data
if nodata is None:
# No explicit nodata, assume NaNs mark invalid
mask = ~np.isnan(data)
elif np.isnan(nodata):
# nodata is NaN
mask = ~np.isnan(data)
else:
# normal numeric nodata value
mask = data != nodata

if not np.any(mask):
return None

# Extract shapes of valid data
results = (
{"properties": {"val": v}, "geometry": s}
for i, (s, v) in enumerate(
shapes(mask.astype(np.uint8), mask=mask, transform=ds.transform)
)
if v == 1
)

polys = [shape(feat["geometry"]) for feat in results]
if not polys:
return None

# merge the polygons
merged = unary_union(polys)
if isinstance(merged, MultiPolygon):
merged = unary_union(merged)

# Approximate with minimum rotated rectangle
rect = merged.minimum_rotated_rectangle

# Ensure consistent orientation (CCW)
rect = orient(rect, sign=1.0)

if n_segments:
coords = np.array(rect.exterior.coords)
# Compute pairwise edge lengths
edge_lengths = np.sqrt(np.sum(np.diff(coords, axis=0) ** 2, axis=1))
segment_length = np.min(edge_lengths) / n_segments
segmentized_geometry = segmentize(rect, max_segment_length=segment_length)
return segmentized_geometry
else:
return rect
20 changes: 20 additions & 0 deletions tests/sar_pipeline/isce3_rtc/settings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
from pathlib import Path

# general
CURRENT_DIR = Path(__file__).parent.resolve()
PROJECT_ROOT = CURRENT_DIR.parents[2]

# test data storage
PERSISTENT_S3_PROJECT_FOLDER = (
f"persistent/repositories/sar-pipeline/tests/sar_pipeline/isce3_rtc/results"
Expand All @@ -9,20 +15,34 @@
TEST_1_BURST = "t070_149815_iw3"
TEST_1_POLS = ["HH"]
TEST_1_BURST_ST = "2022-01-01T12:47:52.134049Z"
TEST_1_COLLECTION_NUMBER = 1
TEST_1_S3_RTC_S1_PRODUCT_SUBPATH = (
f"ga_s1_nrb_iw_hh_1/{TEST_1_BURST}/2022/01/01/20220101T124752"
)
TEST_1_S3_RTC_S1_STATIC_PRODUCT_SUBPATH = f"ga_s1_nrb_iw_static_1/{TEST_1_BURST}"
TEST_1_COMPARE_RTC_S1_STATIC_S3_PROJECT_FOLDER = (
f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_1_S3_RTC_S1_STATIC_PRODUCT_SUBPATH}"
)
TEST_1_COMPARE_RTC_S1_S3_PROJECT_FOLDER = (
f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_1_S3_RTC_S1_PRODUCT_SUBPATH}"
)

# dual pol test scene
TEST_2_SCENE = "S1A_IW_SLC__1SDV_20201129T192619_20201129T192647_035467_042557_D8B8"
TEST_2_BURST = "t045_095837_iw1"
TEST_2_POLS = ["VV", "VH"]
TEST_2_BURST_ST = "2020-11-29T19:26:19.993176Z"
TEST_2_COLLECTION_NUMBER = 1
TEST_2_S3_RTC_S1_PRODUCT_SUBPATH = (
f"ga_s1_nrb_iw_vv_vh_1/{TEST_2_BURST}/2020/11/29/20201129T192619/"
)
TEST_2_S3_RTC_S1_STATIC_PRODUCT_SUBPATH = f"ga_s1_nrb_iw_static_1/{TEST_2_BURST}"
TEST_2_COMPARE_RTC_S1_STATIC_S3_PROJECT_FOLDER = (
f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_2_S3_RTC_S1_STATIC_PRODUCT_SUBPATH}"
)
TEST_2_COMPARE_RTC_S1_S3_PROJECT_FOLDER = (
f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_2_S3_RTC_S1_PRODUCT_SUBPATH}"
)

# Set this to True to update the above products in the AWS S3 folder
# Existing products will need to be deleted, otherwise the process will exit early
Expand Down
Loading
Loading