Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
- name: Run tests
shell: micromamba-shell {0}
run: |
pytest
pytest tests/sar_antarctica/
4 changes: 2 additions & 2 deletions sar_antarctica/nci/preparation/find_scene.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import click
from pathlib import Path

from scenes import find_scene_file_from_id
from orbits import find_latest_orbit_for_scene
from sar_antarctica.nci.preparation.scenes import find_scene_file_from_id
from sar_antarctica.nci.preparation.orbits import find_latest_orbit_for_scene

@click.command()
@click.argument("scene_id")
Expand Down
85 changes: 59 additions & 26 deletions sar_antarctica/nci/preparation/orbits.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
from datetime import datetime
from pathlib import Path
import re
from typing import Optional

from scenes import parse_scene_file_dates
from sar_antarctica.nci.preparation.scenes import parse_scene_file_dates, parse_scene_file_sensor

# Constants for NCI
S1_DIR = Path("/g/data/fj7/Copernicus/Sentinel-1/")
POE_DIR = "POEORB"
RES_DIR = "RESORB"
ORBIT_DIRS = [POE_DIR, RES_DIR]
SENSORS = ["S1A", "S1B"]

def parse_orbit_file_dates(orbit_file_name: str) -> tuple[datetime, datetime, datetime]:
"""
Extracts published_date, start_date, and end_date from the given orbit file.
"""Extracts published_date, start_date, and end_date from the given orbit file.
Filename example: S1A_OPER_AUX_POEORB_OPOD_20141207T123431_V20141115T225944_20141117T005944.EOF
- Published: 20141207T123431
- Start: 20141115T225944
- End: 20141117T005944

Args:
file_name (str): The orbit file name as a string.
Parameters
----------
orbit_file_name : str
The orbit file name as a string.

Returns
-------
tuple[datetime, datetime, datetime]
a tuple of datetimes for published, start and end of the orbit file

Returns:
tuple(datetime): a tuple of datetimes for published, start and end of the orbit file
Raises
------
ValueError
Did not find a match to the expected date pattern of published_date followed by start_date and end_date
"""
# Regex pattern to match the dates
pattern = (r"(?P<published_date>\d{8}T\d{6})_V"
Expand All @@ -43,34 +50,60 @@ def parse_orbit_file_dates(orbit_file_name: str) -> tuple[datetime, datetime, da

return (published_date, start_date, stop_date)

def find_latest_orbit_for_scene(scene_id: str, poe_only: bool = True) -> Path:
"""
Identifies the most recent orbit file available for a given scene, based
def find_latest_orbit_for_scene(scene_id: str, orbit_type: Optional[str] = None) -> Path:
"""Identifies the most recent orbit file available for a given scene, based
on the scene's start and end date.

Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6
orbit_type : Optional[str], optional
Any of "POE" for POE orbits, "RES" for RES orbits, or None, by default None

Returns
-------
Path
Full file path to latest orbit file on NCI

Raises
------
ValueError
orbit_type must be one of "POE", "RES" or None
ValueError
No valid orbit file was found
"""

scene_start, scene_stop = parse_scene_file_dates(scene_id)
scene_sensor = parse_scene_file_sensor(scene_id)

relevant_orbits = []

for orbit_dir in ORBIT_DIRS:
if orbit_type == "POE":
orbit_directories = [POE_DIR]
elif orbit_type == "RES":
orbit_directories = [RES_DIR]
elif orbit_type is None:
orbit_directories = [RES_DIR, POE_DIR]
else:
raise ValueError("orbit_type must be one of 'POE', 'RES', or None")

# Find all orbits for the sensor that fall within the date range of the scene
for orbit_dir in orbit_directories:
orbit_dir_path = S1_DIR / orbit_dir
for sensor in SENSORS:
orbit_files_path = orbit_dir_path / sensor
orbit_files = orbit_files_path.glob("*.EOF")
orbit_files_path = orbit_dir_path / scene_sensor
orbit_files = orbit_files_path.glob("*.EOF")

for orbit_file in orbit_files:
for orbit_file in orbit_files:

orbit_published, orbit_start, orbit_stop = parse_orbit_file_dates(orbit_file)
# Check if scene falls within orbit
if scene_start >= orbit_start and scene_stop <= orbit_stop:
orbit_metadata = (orbit_file, orbit_dir, orbit_published)
relevant_orbits.append(orbit_metadata)
orbit_published, orbit_start, orbit_stop = parse_orbit_file_dates(orbit_file)

# Check if scene falls within orbit
if scene_start >= orbit_start and scene_stop <= orbit_stop:
orbit_metadata = (orbit_file, orbit_dir, orbit_published)
relevant_orbits.append(orbit_metadata)

if poe_only:
relevant_orbits = [item for item in relevant_orbits if item[1] == POE_DIR]

# If relevant_orbits is empty, set latest_orbit to None
latest_orbit = max(relevant_orbits, key=lambda x: x[2]) if relevant_orbits else None

Expand Down
71 changes: 67 additions & 4 deletions sar_antarctica/nci/preparation/scenes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,55 @@

SCENE_DIR = Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/")

def parse_scene_file_dates(scene_id: str) -> tuple[datetime, datetime]:
def parse_scene_file_sensor(scene_id: str) -> str:
"""Extract Sentinel-1 sensor string (SA1,S1B,S1C,S1D) from scene ID

Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6

Returns
-------
str
Sensor string. Should be one of S1A, S1B, S1C, or S1D

Raises
------
ValueError
Did not find any of S1A, S1B, S1C, or S1D in the scene ID
"""
Extracts start_date and end_date from the given scene ID.
# Expect files to be prefaced with any of S1A, S1B, S1C, or S1D, followed by underscore
pattern=r"^(S1[A|B|C|D])_"

match = re.match(pattern, scene_id)

if not match:
raise ValueError("No valid sensor was found in the scene ID. Valid sensors are S1A, S1B, S1C, or S1D")

return match.group(1)


def parse_scene_file_dates(scene_id: str) -> tuple[datetime, datetime]:
"""Extracts start_date and end_date from the given scene ID.

Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6

Returns
-------
tuple[datetime, datetime]
A tuple containing the start and stop date for the scene as datetimes
e.g. (datetime(2022,06,12,12,3,48), datetime(2022,06,12,12,4,52))

Raises
------
ValueError
Did not find a match to the expected date pattern of start_date followed by end_date in the scene ID
"""
# Regex pattern to match the dates
pattern = (r"(?P<start_date>\d{8}T\d{6})_"
Expand All @@ -23,8 +69,25 @@ def parse_scene_file_dates(scene_id: str) -> tuple[datetime, datetime]:
return (start_date, stop_date)

def find_scene_file_from_id(scene_id: str) -> Path:
"""
Finds the path to the scene on GADI based on the scene ID
"""Finds the path to the scene on GADI based on the scene ID

Parameters
----------
scene_id : str
Sentinel-1 scene ID
e.g. S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F6

Returns
-------
Path
Location of scene on NCI GADI

Raises
------
RuntimeError
Found more than one file -- expects one
RuntimeError
Found no files -- expects one. Or another Error
"""

# Parse the scene dates -- only start date is needed for search
Expand Down
54 changes: 54 additions & 0 deletions tests/filesystem/test_filesystem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from sar_antarctica.nci.preparation.orbits import find_latest_orbit_for_scene
from sar_antarctica.nci.preparation.scenes import find_scene_file_from_id

import dataclasses
from datetime import datetime
from pathlib import Path
import pytest

@dataclasses.dataclass
class Scene:
id: str
file: Path
sensor: str
start_date: datetime
stop_date: datetime
latest_orbit: Path
latest_poe_orbit: Path
latest_res_orbit: Path


scene_1 = Scene(
id="S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F66",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2022/2022-06/65S115E-70S120E/S1A_EW_GRDM_1SDH_20220612T120348_20220612T120452_043629_053582_0F66.zip"),
sensor="S1A",
start_date=datetime(2022,6,12,12,3,48),
stop_date=datetime(2022,6,12,12,4,52),
latest_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1A/S1A_OPER_AUX_POEORB_OPOD_20220702T081845_V20220611T225942_20220613T005942.EOF"),
latest_poe_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1A/S1A_OPER_AUX_POEORB_OPOD_20220702T081845_V20220611T225942_20220613T005942.EOF"),
latest_res_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/RESORB/S1A/S1A_OPER_AUX_RESORB_OPOD_20220612T143829_V20220612T104432_20220612T140202.EOF"),
)

scene_2 = Scene(
id="S1B_EW_GRDM_1SDH_20191130T165626_20191130T165726_019159_0242A2_2F58",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2019/2019-11/65S160E-70S165E/S1B_EW_GRDM_1SDH_20191130T165626_20191130T165726_019159_0242A2_2F58.zip"),
sensor="S1B",
start_date=datetime(2019,11,30,16,56,26),
stop_date=datetime(2019,11,30,16,57,26),
latest_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1B/S1B_OPER_AUX_POEORB_OPOD_20191220T110516_V20191129T225942_20191201T005942.EOF"),
latest_poe_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/POEORB/S1B/S1B_OPER_AUX_POEORB_OPOD_20191220T110516_V20191129T225942_20191201T005942.EOF"),
latest_res_orbit=Path("/g/data/fj7/Copernicus/Sentinel-1/RESORB/S1B/S1B_OPER_AUX_RESORB_OPOD_20191130T210136_V20191130T154804_20191130T190534.EOF"),
)

scenes = [scene_1, scene_2]

@pytest.mark.parametrize("scene", scenes)
def test_find_latest_orbit_for_scene(scene: Scene):
assert find_latest_orbit_for_scene(scene.id) == scene.latest_orbit
assert find_latest_orbit_for_scene(scene.id, orbit_type="RES") == scene.latest_res_orbit
assert find_latest_orbit_for_scene(scene.id, orbit_type="POE") == scene.latest_poe_orbit


@pytest.mark.parametrize("scene", scenes)
def test_find_scene_file_from_id(scene: Scene):
assert find_scene_file_from_id(scene.id) == scene.file
File renamed without changes.
33 changes: 33 additions & 0 deletions tests/sar_antarctica/test_orbits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from sar_antarctica.nci.preparation.orbits import parse_orbit_file_dates

from pathlib import Path
import pytest
import dataclasses
from datetime import datetime

@dataclasses.dataclass
class Orbit:
file: str
published_date: datetime
start_date: datetime
stop_date: datetime

orbit_1 = Orbit(
file="S1A_OPER_AUX_POEORB_OPOD_20141207T123431_V20141115T225944_20141117T005944.EOF",
published_date=datetime(2014, 12, 7,12,34,31),
start_date=datetime(2014,11,15,22,59,44),
stop_date=datetime(2014,11,17,0,59,44)
)
orbit_2 = Orbit(
file="S1A_OPER_AUX_POEORB_OPOD_20191220T120706_V20191129T225942_20191201T005942.EOF",
published_date=datetime(2019,12,20,12,7,6),
start_date=datetime(2019,11,29,22,59,42),
stop_date=datetime(2019,12,1,0,59,42)
)

orbits = [orbit_1, orbit_2]

@pytest.mark.parametrize("orbit", orbits)
def test_parse_orbit_file_dates(orbit: Orbit):
date_tuple = (orbit.published_date, orbit.start_date, orbit.stop_date)
assert parse_orbit_file_dates(orbit.file) == date_tuple
48 changes: 48 additions & 0 deletions tests/sar_antarctica/test_scenes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import datetime
from sar_antarctica.nci.preparation.scenes import (
parse_scene_file_dates,
parse_scene_file_sensor,
)

import dataclasses
from datetime import datetime
from pathlib import Path
import pytest

@dataclasses.dataclass
class Scene:
id: str
file: Path
sensor: str
start_date: datetime
stop_date: datetime

scene_1 = Scene(
id="S1A_EW_GRDM_1SDH_20200330T165825_20200330T165929_031907_03AF02_8570",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2020/2020-03/70S050E-75S055E/S1A_EW_GRDM_1SDH_20200330T165825_20200330T165929_031907_03AF02_8570.zip"),
sensor="S1A",
start_date=datetime(2020,3,30,16,58,25),
stop_date=datetime(2020,3,30,16,59,29)
)

scene_2 = Scene(
id="S1B_EW_GRDM_1SDH_20210914T112333_20210914T112403_028693_036C96_3EA8",
file=Path("/g/data/fj7/Copernicus/Sentinel-1/C-SAR/GRD/2021/2021-09/60S120E-65S125E/S1B_EW_GRDM_1SDH_20210914T112333_20210914T112403_028693_036C96_3EA8.zip"),
sensor="S1B",
start_date=datetime(2021,9,14,11,23,33),
stop_date=datetime(2021,9,14,11,24,3)
)

scenes = [scene_1, scene_2]

@pytest.mark.parametrize("scene", scenes)
def test_parse_scene_file_dates(scene: Scene):
date_tuple = (scene.start_date, scene.stop_date)
assert parse_scene_file_dates(scene.id) == date_tuple


@pytest.mark.parametrize("scene", scenes)
def test_parse_scene_file_sensor(scene: Scene):
assert parse_scene_file_sensor(scene.id) == scene.sensor


Loading