diff --git a/pixi.lock b/pixi.lock index 3fa9614..62c7eff 100644 --- a/pixi.lock +++ b/pixi.lock @@ -5973,8 +5973,8 @@ packages: timestamp: 1740681675666 - pypi: . name: sar-pipeline - version: 0.4.1.dev47+gcf72671cd - sha256: 431ae50e64c089ab093e978c44c60ddc17ce24cff66b59f260e86b4d4a67453a + version: 0.4.1.dev70+ged98aa15b.d20251010 + sha256: 1902578c726d1637275b275994aa9a3b31e11fc0cc1edf3dc4928df8de51a761 requires_dist: - asf-search>=9.0.9 - boto3>=1.37.1 diff --git a/pyproject.toml b/pyproject.toml index 118f6b8..02667cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ test-pipeline = "pytest tests/sar_pipeline/ --ignore=tests/sar_pipeline/isce3_rt # isce3-rtc tests that require credentials - not currently setup for github but should be run locally before PRs test-isce3-rtc = "pytest tests/sar_pipeline/isce3_rtc -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s" test-isce3-rtc-full-docker-run = "pytest tests/sar_pipeline/isce3_rtc/test_full_docker_build_and_run.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s" +test-isce3-rtc-cli-make-rtc-opera-stac-and-upload-bursts = "pytest tests/sar_pipeline/isce3_rtc/test_cli_make_rtc_opera_stac_and_upload_bursts.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s" # test downloads from all providers. pygssearch conda environment is required for the AUS_COP_HUB test test-isce3-rtc-downloads= "pixi run install-pygssearch-env && pytest tests/sar_pipeline/isce3_rtc/test_downloads.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s" # nci specific tests that should be run locally on the nci diff --git a/sar_pipeline/pipelines/isce3_rtc/cli.py b/sar_pipeline/pipelines/isce3_rtc/cli.py index 7ed6629..b81abd7 100644 --- a/sar_pipeline/pipelines/isce3_rtc/cli.py +++ b/sar_pipeline/pipelines/isce3_rtc/cli.py @@ -723,21 +723,6 @@ def make_rtc_opera_stac_and_upload_bursts( ) burst_folders = [x for x in results_folder.iterdir() if x.is_dir()] - # check if there is a burst_geoms.json file containing the correct burst geometries from the CDSE. - # This can be created in the get_data_for_scene_and_make_run_config function and will be used - # To accurately set the STAC geometries for RTC_S1 products. Not required for RTC_S1_STATIC. - burst_geoms_file = list(results_folder.glob("*burst_geoms.json")) - if not burst_geoms_file: - logger.warning( - f"Burst geometry file not found. STAC geometries will be set using value from the" - " .h5 metadata file which may not correctly cover the burst data" - ) - else: - logger.info( - f"Burst geometry file found and will be used to set STAC geometries" - ) - burst_geoms_file = burst_geoms_file[0] - for i, burst_folder in enumerate(burst_folders): logger.info( f"Making STAC metadata for burst {i+1} of {len(burst_folders)} : {burst_folder}" @@ -795,15 +780,11 @@ def make_rtc_opera_stac_and_upload_bursts( s3_project_folder=s3_project_folder, ) - # update the geometry with the correct burst geometry from the CDSE if provided - if product == "RTC_S1" and burst_geoms_file: - logger.info("Updating STAC geometry with correct CDSE geometry for burst") - burst_geometry = load_burst_geometry_from_geojson( - burst_geoms_file, burst_folder.name + if product == "RTC_S1": + logger.info( + "Updating STAC geometry using valid data from a product GeoTiff" ) - burst_stac_manager.geometry_4326 = mapping(burst_geometry) - burst_stac_manager.bbox_4326 = burst_geometry.bounds - + burst_stac_manager.update_geometry_using_valid_data() # make the stac item from the .h5 file burst_stac_manager.make_stac_item_from_h5() # add properties to the stac doc diff --git a/sar_pipeline/pipelines/isce3_rtc/metadata/stac.py b/sar_pipeline/pipelines/isce3_rtc/metadata/stac.py index 903036d..242e1c1 100644 --- a/sar_pipeline/pipelines/isce3_rtc/metadata/stac.py +++ b/sar_pipeline/pipelines/isce3_rtc/metadata/stac.py @@ -10,6 +10,7 @@ import datetime import re import numpy as np +import geopandas as gpd import dem_handler import sar_pipeline @@ -25,6 +26,8 @@ from sar_pipeline.utils.spatial import ( polygon_str_to_geojson, reproject_bbox_to_geometry, + get_valid_data_min_rect_polygon_from_tif, + transform_polygon, ) from sar_pipeline.utils.aws import find_s3_filepaths_from_suffixes from sar_pipeline.pipelines.isce3_rtc.metadata.filetypes import ( @@ -119,8 +122,9 @@ def __init__( "data/projection" ) # code, e.g. 4326, 3031 if self.product == "RTC_S1": - # NOTE - boundingPolygon considers the burst data taken from the .SAFE file - # This geometry is only an approximation. + # NOTE - boundingPolygon considers the burst geometry taken from the .SAFE file + # This geometry is only an approximation. The geometry can be updated using + # The valid data within a file using the update_geometry_using_valid_data method self.geometry_4326 = polygon_str_to_geojson( self.h5.search_value("boundingPolygon") ) @@ -203,6 +207,44 @@ def _get_product_timeliness_category( else: return "NTC" + def update_geometry_using_valid_data(self): + """The geometries provided in the .h5 are only approximations from + the burst data in radar coordinates. Update these geometries using + the minimum rotated rectangle that encloses the valid data from an actual + tif. This ensures the geometry correctly encloses the data and + search results using the geometry are accurate. For RTC_S1 products, + the first backscatter layer is used. For RTC_S1_STATIC products, + the local incidence angle is used. Note, RTC_S1_STATIC geometries are + already accurate as they utilise the burst grid. + """ + + if hasattr(self, "item"): + raise ValueError( + "The STAC item already exists, geometry cannot be updated. Edit " + "the geometry before creating the file with `make_stac_item_from_h5`." + ) + + if self.product == "RTC_S1": + # update using a valid data in a backscatter tif + # take the tif for the first polarisation + for f in self.burst_folder.iterdir(): + if any([pol in f.name for pol in self.polarisations]): + geometry_tif = f + elif self.product == "RTC_S1_STATIC": + # use the local incidence angle layer + for f in self.burst_folder.iterdir(): + if "local" in f.name and "incidence" in f.name: + geometry_tif = f + + valid_data_geometry = get_valid_data_min_rect_polygon_from_tif( + geometry_tif, n_segments=4 + ) + valid_data_geometry_4326 = transform_polygon( + valid_data_geometry, self.projection_epsg, 4326 + ) + self.geometry_4326 = mapping(valid_data_geometry_4326) + self.bbox_4326 = valid_data_geometry_4326.bounds + def make_stac_item_from_h5(self): """Make a pystac.item.Item for the given burst using key properties taken from the .h5 file. diff --git a/sar_pipeline/utils/spatial.py b/sar_pipeline/utils/spatial.py index b3e4f07..bd8704f 100644 --- a/sar_pipeline/utils/spatial.py +++ b/sar_pipeline/utils/spatial.py @@ -1,13 +1,16 @@ import pyproj from pyproj import Transformer from shapely import wkt -from shapely.geometry import mapping, box, Polygon, shape +from shapely.geometry import mapping, box, Polygon, shape, MultiPolygon from shapely import segmentize +from shapely.ops import unary_union, orient +import numpy as np +import rasterio +from rasterio.features import shapes import json from pathlib import Path import logging - logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -211,3 +214,81 @@ def load_burst_geometry_from_geojson(geojson_path: Path, burst_id: str): return shape(feature["geometry"]) raise KeyError(f"Burst ID {burst_id} not found in {geojson_path}") + + +def get_valid_data_min_rect_polygon_from_tif( + tif_path: Path, + n_segments: int = 4, +) -> Polygon: + """ + Extract an approximate (rotated) rectangular boundary polygon + around valid (non-nodata) data in a GeoTIFF, ignoring NoData areas. + Additional segments are added to the polygon based on the n_segments + setting. This ensures that re-projections are handled correctly, as these + can be problematic using a simple rectangle as the shape is not preserved. + + Parameters + ---------- + tif_path : Path + Path to the GeoTIFF file. + n_segments : int, optional + The number of segments/points to add along each side of the + minimum bounding rectangle. + + Returns + ------- + shapely.geometry.Polygon or None + Polygon representing the outer boundary of valid data. + Returns None if no valid pixels are found. + """ + with rasterio.open(tif_path) as ds: + data = ds.read(1) + nodata = ds.nodata + + # Build mask where True = valid data + if nodata is None: + # No explicit nodata, assume NaNs mark invalid + mask = ~np.isnan(data) + elif np.isnan(nodata): + # nodata is NaN + mask = ~np.isnan(data) + else: + # normal numeric nodata value + mask = data != nodata + + if not np.any(mask): + return None + + # Extract shapes of valid data + results = ( + {"properties": {"val": v}, "geometry": s} + for i, (s, v) in enumerate( + shapes(mask.astype(np.uint8), mask=mask, transform=ds.transform) + ) + if v == 1 + ) + + polys = [shape(feat["geometry"]) for feat in results] + if not polys: + return None + + # merge the polygons + merged = unary_union(polys) + if isinstance(merged, MultiPolygon): + merged = unary_union(merged) + + # Approximate with minimum rotated rectangle + rect = merged.minimum_rotated_rectangle + + # Ensure consistent orientation (CCW) + rect = orient(rect, sign=1.0) + + if n_segments: + coords = np.array(rect.exterior.coords) + # Compute pairwise edge lengths + edge_lengths = np.sqrt(np.sum(np.diff(coords, axis=0) ** 2, axis=1)) + segment_length = np.min(edge_lengths) / n_segments + segmentized_geometry = segmentize(rect, max_segment_length=segment_length) + return segmentized_geometry + else: + return rect diff --git a/tests/sar_pipeline/isce3_rtc/settings.py b/tests/sar_pipeline/isce3_rtc/settings.py index 5d528f3..1edfc97 100644 --- a/tests/sar_pipeline/isce3_rtc/settings.py +++ b/tests/sar_pipeline/isce3_rtc/settings.py @@ -1,3 +1,9 @@ +from pathlib import Path + +# general +CURRENT_DIR = Path(__file__).parent.resolve() +PROJECT_ROOT = CURRENT_DIR.parents[2] + # test data storage PERSISTENT_S3_PROJECT_FOLDER = ( f"persistent/repositories/sar-pipeline/tests/sar_pipeline/isce3_rtc/results" @@ -9,20 +15,34 @@ TEST_1_BURST = "t070_149815_iw3" TEST_1_POLS = ["HH"] TEST_1_BURST_ST = "2022-01-01T12:47:52.134049Z" +TEST_1_COLLECTION_NUMBER = 1 TEST_1_S3_RTC_S1_PRODUCT_SUBPATH = ( f"ga_s1_nrb_iw_hh_1/{TEST_1_BURST}/2022/01/01/20220101T124752" ) TEST_1_S3_RTC_S1_STATIC_PRODUCT_SUBPATH = f"ga_s1_nrb_iw_static_1/{TEST_1_BURST}" +TEST_1_COMPARE_RTC_S1_STATIC_S3_PROJECT_FOLDER = ( + f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_1_S3_RTC_S1_STATIC_PRODUCT_SUBPATH}" +) +TEST_1_COMPARE_RTC_S1_S3_PROJECT_FOLDER = ( + f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_1_S3_RTC_S1_PRODUCT_SUBPATH}" +) # dual pol test scene TEST_2_SCENE = "S1A_IW_SLC__1SDV_20201129T192619_20201129T192647_035467_042557_D8B8" TEST_2_BURST = "t045_095837_iw1" TEST_2_POLS = ["VV", "VH"] TEST_2_BURST_ST = "2020-11-29T19:26:19.993176Z" +TEST_2_COLLECTION_NUMBER = 1 TEST_2_S3_RTC_S1_PRODUCT_SUBPATH = ( f"ga_s1_nrb_iw_vv_vh_1/{TEST_2_BURST}/2020/11/29/20201129T192619/" ) TEST_2_S3_RTC_S1_STATIC_PRODUCT_SUBPATH = f"ga_s1_nrb_iw_static_1/{TEST_2_BURST}" +TEST_2_COMPARE_RTC_S1_STATIC_S3_PROJECT_FOLDER = ( + f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_2_S3_RTC_S1_STATIC_PRODUCT_SUBPATH}" +) +TEST_2_COMPARE_RTC_S1_S3_PROJECT_FOLDER = ( + f"{PERSISTENT_S3_PROJECT_FOLDER}/{TEST_2_S3_RTC_S1_PRODUCT_SUBPATH}" +) # Set this to True to update the above products in the AWS S3 folder # Existing products will need to be deleted, otherwise the process will exit early diff --git a/tests/sar_pipeline/isce3_rtc/test_cli_make_rtc_opera_stac_and_upload_bursts.py b/tests/sar_pipeline/isce3_rtc/test_cli_make_rtc_opera_stac_and_upload_bursts.py new file mode 100644 index 0000000..0b02397 --- /dev/null +++ b/tests/sar_pipeline/isce3_rtc/test_cli_make_rtc_opera_stac_and_upload_bursts.py @@ -0,0 +1,229 @@ +from click.testing import CliRunner +from dataclasses import dataclass +from sar_pipeline.pipelines.isce3_rtc.cli import ( + make_rtc_opera_stac_and_upload_bursts, + compare_products, +) +from sar_pipeline.utils.aws import S3Util +from pathlib import Path +from dotenv import load_dotenv +import os +import pytest +import shutil + +import logging + +logging.basicConfig( + level=logging.DEBUG, # or INFO + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) + +logger = logging.getLogger(__name__) + +CURRENT_DIR = Path(__file__).parent.resolve() +PROJECT_ROOT = CURRENT_DIR.parents[2] +TEST_WORKSPACE = CURRENT_DIR / Path("data") +REQUIRED_ENV_VARIABLES = [ + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY", + "AWS_DEFAULT_REGION", +] + +# check if the required env variables are set +missing = [var for var in REQUIRED_ENV_VARIABLES if not os.getenv(var)] + +if missing: + logger.warning(f"Missing required environment variables: {', '.join(missing)}") + logger.info( + f"The following environment variables must be set for test: {REQUIRED_ENV_VARIABLES}" + ) + + # test may be run locally which requires a secret file to set the variables + logger.info( + f"Attempting to load from .env file from the project root : {PROJECT_ROOT / ".env"}" + ) + + try: + # load the environment secrets from a local file + # see docs/workflows/aws.md for required variables + # store in project root in .env file + load_dotenv(PROJECT_ROOT / ".env") + missing = [var for var in REQUIRED_ENV_VARIABLES if not os.getenv(var)] + if missing: + raise ValueError( + ".env was found but some variables are missing. Add the required variables." + ) + else: + logging.info("Environment variables loaded from .env successfully.") + + except: + raise FileExistsError( + "Could not find .env file at project root containing required environment variables for run. " + "Create this file with required variables or ensure environment is configured correctly " + "(for example when running automated tests on GitHub)" + ) + + +@dataclass +class RunConfig: + test_product_s3_bucket: Path + test_product_s3_folder: Path + original_local_product_folder: Path + new_local_product_folder: Path + compare_product_folder: Path + product: str + backscatter_convention: str + collection_number: int + s3_bucket: str + s3_project_folder: str + skip_upload_to_s3: str + make_existing_products: bool + link_static_layers: bool + linked_static_layers_s3_bucket: bool + linked_static_layers_collection_number: str + linked_static_layers_s3_project_folder: str + validate_stac: bool + + +from settings import ( + CURRENT_DIR, + TEST_S3_BUCKET, + TEST_1_COLLECTION_NUMBER, + TEST_1_BURST, + TEST_1_COMPARE_RTC_S1_S3_PROJECT_FOLDER, + PERSISTENT_S3_PROJECT_FOLDER, +) + +ORIGINAL_PRODUCT_RESULTS_DIR = f"{CURRENT_DIR}/data/TMP/test_metadata/original" +NEW_PRODUCT_RESULTS_DIR = f"{CURRENT_DIR}/data/TMP/test_metadata/new" +COMPARE_PRODUCT_FOLDER = f"{CURRENT_DIR}/data/TMP/test_metadata/compare" +PRODUCT = "RTC_S1" + +# custom test against another product by changing these values +# TEST_1_COLLECTION_NUMBER = 0 +# TEST_1_BURST = "t085_182158_iw2" +# TEST_1_COMPARE_RTC_S1_S3_PROJECT_FOLDER = ( +# "experimental/baseline/antarctica/ga_s1_nrb_iw_hh_0/t085_182158_iw2/2021/04/25" +# ) +# PERSISTENT_S3_PROJECT_FOLDER = "experimental/baseline/antarctica" + +TEST_1 = RunConfig( + test_product_s3_bucket=TEST_S3_BUCKET, + test_product_s3_folder=TEST_1_COMPARE_RTC_S1_S3_PROJECT_FOLDER, + original_local_product_folder=f"{ORIGINAL_PRODUCT_RESULTS_DIR}/TEST_1/RTC_S1/{TEST_1_BURST}", + new_local_product_folder=f"{NEW_PRODUCT_RESULTS_DIR}/TEST_1/RTC_S1/{TEST_1_BURST}", + compare_product_folder=f"{COMPARE_PRODUCT_FOLDER}/TEST_1/RTC_S1/{TEST_1_BURST}", + product=PRODUCT, + backscatter_convention="gamma0", + collection_number=TEST_1_COLLECTION_NUMBER, + s3_bucket=TEST_S3_BUCKET, + s3_project_folder=PERSISTENT_S3_PROJECT_FOLDER, + skip_upload_to_s3=True, + make_existing_products=True, + link_static_layers=True, + linked_static_layers_s3_bucket=TEST_S3_BUCKET, + linked_static_layers_collection_number=TEST_1_COLLECTION_NUMBER, + linked_static_layers_s3_project_folder=PERSISTENT_S3_PROJECT_FOLDER, + validate_stac=True, +) + + +@pytest.mark.parametrize("test_run", [TEST_1]) +def test_cli_make_rtc_opera_stac_and_upload_bursts(test_run): + + # first download the test product to a local folder + logger.info( + f"Downloading test product to local folder : {test_run.original_local_product_folder}" + ) + if Path(test_run.original_local_product_folder).exists(): + shutil.rmtree(test_run.original_local_product_folder) + os.makedirs(test_run.original_local_product_folder, exist_ok=True) + S3Downloader = S3Util() + S3Downloader.download_folder( + test_run.s3_bucket, + test_run.test_product_s3_folder, + test_run.original_local_product_folder, + ) + + logger.info(f"Copying product to new folder : {test_run.new_local_product_folder}") + if Path(test_run.new_local_product_folder).exists(): + shutil.rmtree(test_run.new_local_product_folder) + shutil.copytree( + test_run.original_local_product_folder, + test_run.new_local_product_folder, + ) + + # get the run-config + run_config = list(Path(test_run.new_local_product_folder).glob("*config.yaml"))[0] + # runs are at the scene level, so the results folder is the parent of the burst folder + results_folder = Path(test_run.new_local_product_folder).parent + # copy the run config to the parent + shutil.copy(run_config, results_folder / run_config.name) + run_config = results_folder / run_config.name + + runner = CliRunner() + args = ["--results-folder", results_folder] + args += ["--run-config-path", run_config] + args += ["--product", test_run.product] + args += ["--backscatter-convention", test_run.backscatter_convention] + args += ["--collection-number", test_run.collection_number] + args += ["--s3-bucket", test_run.s3_bucket] + args += ["--s3-project-folder", test_run.s3_project_folder] + args += ["--skip-upload-to-s3"] if test_run.skip_upload_to_s3 else [] + args += ["--make-existing-products"] if test_run.make_existing_products else [] + args += ["--link-static-layers"] if test_run.link_static_layers else [] + args += [ + "--linked-static-layers-s3-bucket", + test_run.linked_static_layers_s3_bucket, + ] + args += [ + "--linked-static-layers-collection-number", + test_run.linked_static_layers_collection_number, + ] + args += [ + "--linked-static-layers-s3-project-folder", + test_run.linked_static_layers_s3_project_folder, + ] + args += ["--validate-stac"] if test_run.validate_stac else [] + + logging.info(f"Running make_rtc_opera_stac_and_upload_bursts.") + logging.info( + f"Creating new metadata for the product stored at : {test_run.new_local_product_folder}." + ) + result = runner.invoke( + make_rtc_opera_stac_and_upload_bursts, args, catch_exceptions=True + ) + if result.exception: + logging.exception( + "An error occurred during CLI invocation", exc_info=result.exception + ) + logging.error(result.output) + assert result.exit_code == 0 + logging.info(f"New metadata created.") + + +@pytest.mark.parametrize("test_run", [TEST_1]) +def test_compare_new_product_with_original(test_run): + + logging.info(f"Comparing the original and new products to see changes in metadata") + logging.info( + f"Creating comparison outputs directory : {test_run.compare_product_folder}" + ) + if Path(test_run.compare_product_folder).exists(): + shutil.rmtree(test_run.compare_product_folder) + os.makedirs(test_run.compare_product_folder, exist_ok=True) + + runner = CliRunner() + args = ["--product", "RTC_S1"] + args += ["--local-product-folder-1", test_run.original_local_product_folder] + args += ["--local-product-folder-2", test_run.new_local_product_folder] + args += ["--out-folder", test_run.compare_product_folder] + + result = runner.invoke(compare_products, args, catch_exceptions=False) + if result.exception: + logging.exception( + "An error occurred during CLI invocation", exc_info=result.exception + ) + logging.error(result.output) + + assert result.exit_code == 0