Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ CDSE_PASSWORD=
AUS_COP_HUB_LOGIN=
AUS_COP_HUB_PASSWORD=
AUS_COP_HUB_CLIENT_ID=odata
AUS_COP_HUB_CLIENT_SECRET=
AUS_COP_HUB_CLIENT_SECRET=
PYGSSEARCH_ENV_EXECUTABLE=
PYGSSEARCH_ENV_NAME=pygsssearch-env
16 changes: 8 additions & 8 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ test-pipeline = "pytest tests/sar_pipeline/ --ignore=tests/sar_pipeline/isce3_rt
# isce3-rtc tests that require credentials - not currently setup for github but should be run locally before PRs
test-isce3-rtc = "pytest tests/sar_pipeline/isce3_rtc -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
test-isce3-rtc-full-docker-run = "pytest tests/sar_pipeline/isce3_rtc/test_full_docker_build_and_run.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
# test queries from all providers. pygssearch conda environment is required for the AUS_COP_HUB test
test-provider-queries="pixi run install-pygssearch-env && pytest tests/sar_pipeline/test_scenes.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
# test downloads from all providers. pygssearch conda environment is required for the AUS_COP_HUB test
test-isce3-rtc-downloads= "pixi run install-pygssearch-env && pytest tests/sar_pipeline/isce3_rtc/test_downloads.py -o log_cli=true --capture=tee-sys --log-cli-level=INFO -v -s"
# nci specific tests that should be run locally on the nci
Expand Down
199 changes: 143 additions & 56 deletions sar_pipeline/preparation/downloads/scenes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from cdsetool.credentials import Credentials
from cdsetool.download import download_features
from cdsetool.monitor import StatusMonitor
from typing import Optional, Union

from sar_pipeline.utils.general import log_timing
from sar_pipeline.utils.sentinel1 import extract_metadata_from_s1_id
Expand All @@ -22,6 +23,12 @@
VALID_SCENE_DATA_SOURCES = ["AUS_COP_HUB", "CDSE", "ASF"]


class MissingEnvironmentManagerError(Exception):
"""Exception raised when no environment manager setttings are supplied."""

pass


class MissingCredentialsError(Exception):
"""Exception raised when no credentials are supplied."""

Expand Down Expand Up @@ -318,19 +325,117 @@ def download_scene_from_cdse(
return scene_zip_path, cdse_scene_metadata


def query_scene_from_aus_cop_hub(
scene: str,
pygssearch_env_executable: Union[str, Path],
pygssearch_env_name: Union[str, Path],
service: str = "https://catalogue.copernicus.gov.au/odata/v1",
) -> tuple[str, dict]:
"""Query the scene and retrieve associated metadata from the Copernicus
Australasia Regional Data Hub. Function makes use of pygssearch -
https://pypi.org/project/pygssearch/ which is installed in a separate conda environment
and called in a subprocess due to package conflicts. The path to the conda executable and environment
should be set in the function.

Parameters
----------
scene : str
scene name. e.g. S1A_IW_SLC__1SSH_20220101T124744_20220101T124814_041267_04E7A2_1DAD
pygssearch_env_executable : Union[str, Path]
Executable for running commands with the environment manager containing the
pygssearch environment. Can be an alias for the executable (e.g. micromamba)
or a path to the executable (e.g. /path/to/micromamba/bin/micromamba).
pygssearch_env_name : Union[str, Path]
Name of the environment containing an installation of pygssearch that
will be called in a subprocess.
service : str, optional
Service to query, by default "https://catalogue.copernicus.gov.au/odata/v1"

Returns
-------
tuple[str, dict]
Command used to run the query and dict of metadata

Raises
------
ValueError
Supplied environment executable contains neither "conda" or "mamba"
NonSingleSceneResultError
0, or more than one SLC result is found
"""

logger.info("Using pygssearch to query Aus Cop Hub for scene metadata")

# Set up the run command with the provided environment manager path and environment name
if "conda" in str(pygssearch_env_executable):
env_name_cli_arg = "-p"
elif "mamba" in str(pygssearch_env_executable):
env_name_cli_arg = "-n"
else:
raise ValueError(
f"Supported environment managers are conda or mamba (including micromamba). Supplied environment manager executable was {pygssearch_env_executable}"
)

# Set the command to use conda or mamba to execute a command using the pygss envrionemnt
environment_cmd = (
f"{pygssearch_env_executable} run {env_name_cli_arg} {pygssearch_env_name} "
)

# Set the query for the scene -- no credentials required when querying
pygss_cmd = (
f"pygssearch --service {service} --name {scene} --format _ --attributes "
)

# Construct the command to run for subprocess
run_cmd = environment_cmd + pygss_cmd

# Run the subprocess
process = subprocess.Popen(
run_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)

try:
# convert the pygssearch result to a python object
# an empty list is returned if no results found, else list of json
output, _ = process.communicate()
aus_cophub_scenes_metadata: list[dict] = ast.literal_eval(output)
except Exception as e:
logger.error(
"Could not convert Aus Cop Hub query response to list of JSON data. Check service and requested scene",
)
raise

# ensure only one slc found
if len(aus_cophub_scenes_metadata) != 1:
raise NonSingleSceneResultError(
f"Expected 1 scene, found {len(aus_cophub_scenes_metadata)} for scene id : {scene}"
)
else:
aus_cophub_scene_metadata = aus_cophub_scenes_metadata[0]
logger.info(f"Scene metadata found")

return run_cmd, aus_cophub_scene_metadata


@log_timing
def download_scene_from_aus_cop_hub(
scene: str,
download_folder: Path,
make_folder: bool = True,
unzip: bool = True,
aus_cop_hub_login: str | None = None,
aus_cop_hub_pass: str | None = None,
aus_cop_hub_client_id: str | None = None,
aus_cop_hub_client_secret: str | None = None,
aus_cop_hub_login: Optional[str] = None,
aus_cop_hub_pass: Optional[str] = None,
aus_cop_hub_client_id: Optional[str] = None,
aus_cop_hub_client_secret: Optional[str] = None,
service: str = "https://catalogue.copernicus.gov.au/odata/v1",
token_url: str = "https://auth.copernicus.gov.au/realms/gss/protocol/openid-connect/token",
pygssearch_conda_env_path: str | Path = None,
pygssearch_env_executable: Optional[Union[str, Path]] = None,
pygssearch_env_name: Optional[str] = None,
) -> tuple[Path, dict]:
"""Download the scene and query associated metadata from the Copernicus
Australasia Regional Data Hub. Function makes use of pygssearch -
Expand Down Expand Up @@ -364,9 +469,12 @@ def download_scene_from_aus_cop_hub(
Service to query, by default "https://catalogue.copernicus.gov.au/odata/v1"
token_url : str, optional
URL to validate token, by default "https://auth.copernicus.gov.au/realms/gss/protocol/openid-connect/token"
pygssearch_conda_env_path: str | Path, optional
Path to the conda environment containing an installation of pygssearch that will be called in a
subprocess. If not specified, If not specified env variable PYGSSEARCH_CONDA_ENV will be used.
pygssearch_env_executable: str | Path, optional
Path or command line alias for the environment manager executable (conda/mamba) used to run the pygssearch environment.
If not specified, env variable PYGSSEARCH_ENV_EXECUTABLE will be used.
pygssearch_conda_env_path: str, optional
Name of the conda/mamba environment containing an installation of pygssearch that will be called in a
subprocess. If not specified, env variable PYGSSEARCH_ENV_NAME will be used.

Returns
-------
Expand All @@ -377,6 +485,8 @@ def download_scene_from_aus_cop_hub(
------
MissingCredentialsError
Required credentials are not set
MissingEnvironmentManagerError
Required environment manager (conda/mamba) variables are not set
NonSingleSceneResultError
Could not find exactly 1 scene
RuntimeError
Expand All @@ -391,9 +501,10 @@ def download_scene_from_aus_cop_hub(
aus_cop_hub_client_secret = aus_cop_hub_client_secret or os.getenv(
"AUS_COP_HUB_CLIENT_SECRET"
)
pygssearch_conda_env_path = pygssearch_conda_env_path or os.getenv(
"PYGSSEARCH_CONDA_ENV"
pygssearch_env_executable = pygssearch_env_executable or os.getenv(
"PYGSSEARCH_ENV_EXECUTABLE"
)
pygssearch_env_name = pygssearch_env_name or os.getenv("PYGSSEARCH_ENV_NAME")

# Check for any missing credentials
missing_vars = []
Expand All @@ -411,61 +522,37 @@ def download_scene_from_aus_cop_hub(
f"Missing credentials: {', '.join(missing_vars)}. Please pass them as arguments or set them as environment variables."
)

# Check for missing pygssearch environment manager parameters
if not (pygssearch_env_executable and pygssearch_env_name):
err_string = (
"Environment manager or environment name were not provided. Please provide "
"the pygssearch_env_executable and pygssearch_env_name arguments "
"or set the PYGSSEARCH_ENV_EXECUTABLE and PYGSSEARCH_ENV_NAME environment variables"
)
raise MissingEnvironmentManagerError(err_string)

# Create a folder for download if requested
if make_folder:
os.makedirs(download_folder, exist_ok=True)

# use the pygssearch command line tool to query scene metadata and then download
base_query = (
f"conda run -p {pygssearch_conda_env_path} "
f"pygssearch --service {service} "
# Run the initial query to get the base run command plus scene metadata
base_cmd, aus_cophub_scene_metadata = query_scene_from_aus_cop_hub(
scene, service, pygssearch_env_executable, pygssearch_env_name
)

# Add additional query parameters to the base command to enable downloading
download_cli_string = (
f"--username {aus_cop_hub_login} "
f"--password {aus_cop_hub_pass} "
f"--token_url {token_url} "
f"--client_id {aus_cop_hub_client_id} "
f"--client_secret {aus_cop_hub_client_secret} "
f"--name {scene} "
f"--download "
f"--output {download_folder} "
)
run_cmd = base_cmd + download_cli_string

# structure query for metadata
metadata_cmd = base_query + f"--format _ " + f"--attributes "
logger.info(f"Using pygssearch to query Aus Cop Hub for scene metadata")

process = subprocess.Popen(
metadata_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)

try:
# convert the pygssearch result to a python object
# an emtpy list is returned if no results found, else list of jsons
output, _ = process.communicate()
sanitised_output = re.sub(
r"(--?(client_secret|password)[=\s]+)\S+", r"\1****", output
)
aus_cophub_scenes_metadata = ast.literal_eval(sanitised_output)
except Exception as e:
logger.error(
"Could not convert Aus Cop Hub query response to list of JSON data. Check credentials and request.",
)
raise

# ensure only one slc found
if len(aus_cophub_scenes_metadata) != 1:
raise NonSingleSceneResultError(
f"Expected 1 scene, found {len(aus_cophub_scenes_metadata)} for scene id : {scene}"
)
else:
aus_cophub_scene_metadata = aus_cophub_scenes_metadata[0]
logger.info(f"Scene metadata found")

# create query for download
download_cmd = base_query + "--download " + f"--output {download_folder}"

# make scene .SAFE and zip paths
# Check if scene has been previously downloaded
scene_zip_path = Path(download_folder) / f"{scene}.zip"
scene_safe_path = scene_zip_path.with_suffix(".SAFE")

Expand All @@ -482,7 +569,7 @@ def download_scene_from_aus_cop_hub(
try:
logger.info(f"Download in progress...")
process = subprocess.Popen(
download_cmd,
run_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
Expand Down
10 changes: 2 additions & 8 deletions tests/sar_pipeline/isce3_rtc/test_downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
"AUS_COP_HUB_PASSWORD",
"AUS_COP_HUB_CLIENT_ID",
"AUS_COP_HUB_CLIENT_SECRET",
"PYGSSEARCH_ENV_EXECUTABLE",
"PYGSSEARCH_ENV_NAME",
]
# check if the required env variables are set
missing = [var for var in REQUIRED_ENV_VARIABLES if not os.getenv(var)]
Expand Down Expand Up @@ -166,14 +168,6 @@ class ProductDownloadTest:
@pytest.mark.parametrize("test_case", TEST_CASES)
def test_product_downloads(test_case):

# if AUS_COP_HUB is selected, set the required environment variable'
# pygssearch conda env is installed via pixi task in pyproject.toml
if "AUS_COP_HUB" in test_case.scene_data_sources:
os.environ["PYGSSEARCH_CONDA_ENV"] = str(
Path(os.getenv("CONDA_EXE")).parent.parent / "envs" / "pygssearch-env"
)
logger.info(f"PYGSSEARCH_CONDA_ENV : {os.getenv("PYGSSEARCH_CONDA_ENV")}")

# test casses we expect to pass
if test_case.passes:

Expand Down
Loading
Loading