diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..c73bc9d --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,10 @@ +name: Ruff +on: [ pull_request ] +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/ruff-action@v3 + - name: Ruff format + run: ruff format --diff diff --git a/README.md b/README.md index 77644fd..cb9b74e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ or [botos3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) [![boto3](https://img.shields.io/badge/boto3->1.34-blue.svg)](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) [![copernicusmarine](https://img.shields.io/badge/copernicusmarine->1.06-blue.svg)](https://help.marine.copernicus.eu/en/collections/4060068-copernicus-marine-toolbox) - +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) * [Marine Data Store ToolBox](#marine-data-store-toolbox) diff --git a/mds.py b/mds.py index f0fc480..69432f0 100755 --- a/mds.py +++ b/mds.py @@ -19,119 +19,272 @@ def main(): @main.command() -@click.option('-o', '--output-directory', required=True, type=str, help='Output directory') -@click.option('-f', '--output-filename', required=True, type=str, help='Output filename') -@click.option('-i', '--dataset-id', required=True, type=str, help='Dataset Id') -@click.option('-v', '--variables', multiple=True, type=str, help='Variables to download') -@click.option('-x', '--minimum-longitude', type=float, help='Minimum longitude for the subset.') -@click.option('-X', '--maximum-longitude', type=float, help='Maximum longitude for the subset. ') -@click.option('-y', '--minimum-latitude', type=float, - help='Minimum latitude for the subset. Requires a float within this range: [-90<=x<=90]') -@click.option('-Y', '--maximum-latitude', type=float, - help='Maximum latitude for the subset. Requires a float within this range: [-90<=x<=90]') -@click.option('-z', '--minimum-depth', type=float, - help='Minimum depth for the subset. Requires a float within this range: [x>=0]') -@click.option('-Z', '--maximum-depth', type=float, - help='Maximum depth for the subset. Requires a float within this range: [x>=0]') -@click.option('-t', '--start-datetime', type=str, default=False, - help='Start datetime as: %Y|%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S|%Y-%m-%dT%H:%M:%S.%fZ') -@click.option('-T', '--end-datetime', type=str, default=False, - help='End datetime as: %Y|%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S|%Y-%m-%dT%H:%M:%S.%fZ') -@click.option('-r', '--dry-run', is_flag=True, default=False, help='Dry run') -@click.option('-g', '--dataset-version', type=str, default=None, help='Dataset version or tag') -@click.option('-n', '--username', type=str, default=None, help='Username') -@click.option('-w', '--password', type=str, default=None, help='Password') +@click.option( + "-o", "--output-directory", required=True, type=str, help="Output directory" +) +@click.option( + "-f", "--output-filename", required=True, type=str, help="Output filename" +) +@click.option("-i", "--dataset-id", required=True, type=str, help="Dataset Id") +@click.option( + "-v", "--variables", multiple=True, type=str, help="Variables to download" +) +@click.option( + "-x", "--minimum-longitude", type=float, help="Minimum longitude for the subset." +) +@click.option( + "-X", "--maximum-longitude", type=float, help="Maximum longitude for the subset. " +) +@click.option( + "-y", + "--minimum-latitude", + type=float, + help="Minimum latitude for the subset. Requires a float within this range: [-90<=x<=90]", +) +@click.option( + "-Y", + "--maximum-latitude", + type=float, + help="Maximum latitude for the subset. Requires a float within this range: [-90<=x<=90]", +) +@click.option( + "-z", + "--minimum-depth", + type=float, + help="Minimum depth for the subset. Requires a float within this range: [x>=0]", +) +@click.option( + "-Z", + "--maximum-depth", + type=float, + help="Maximum depth for the subset. Requires a float within this range: [x>=0]", +) +@click.option( + "-t", + "--start-datetime", + type=str, + default=False, + help="Start datetime as: %Y|%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S|%Y-%m-%dT%H:%M:%S.%fZ", +) +@click.option( + "-T", + "--end-datetime", + type=str, + default=False, + help="End datetime as: %Y|%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%d %H:%M:%S|%Y-%m-%dT%H:%M:%S.%fZ", +) +@click.option("-r", "--dry-run", is_flag=True, default=False, help="Dry run") +@click.option( + "-g", "--dataset-version", type=str, default=None, help="Dataset version or tag" +) +@click.option("-n", "--username", type=str, default=None, help="Username") +@click.option("-w", "--password", type=str, default=None, help="Password") def subset(**kwargs): - wrapper.mds_download('subset', **kwargs) + wrapper.mds_download("subset", **kwargs) @main.command() -@click.option('-f', '--filter', required=False, type=str, help='Filter on the online files') -@click.option('-o', '--output-directory', required=True, type=str, help='Output directory') -@click.option('-i', '--dataset-id', required=True, type=str, help='Dataset Id') -@click.option('-g', '--dataset-version', type=str, default=None, help='Dataset version or tag') +@click.option( + "-f", "--filter", required=False, type=str, help="Filter on the online files" +) +@click.option( + "-o", "--output-directory", required=True, type=str, help="Output directory" +) +@click.option("-i", "--dataset-id", required=True, type=str, help="Dataset Id") +@click.option( + "-g", "--dataset-version", type=str, default=None, help="Dataset version or tag" +) # @click.option('-s', '--service', type=str, default='files', # help="Force download through one of the available services using the service name among " # "['original-files', 'ftp'] or its short name among ['files', 'ftp'].") -@click.option('-d', '--dry-run', is_flag=True, default=False, help='Dry run') -@click.option('-u', '--update', is_flag=True, default=False, - help='If the file not exists, download it, otherwise update it it changed on mds') -@click.option('-v', '--dataset-version', type=str, default=None, help='Dry run') -@click.option('-nd', '--no-directories', type=str, default=True, - help='Option to not recreate folder hierarchy in output directory') -@click.option('--force-download', type=str, default=True, - help='Flag to skip confirmation before download') -@click.option('--disable-progress-bar', type=str, default=True, help='Flag to hide progress bar') -@click.option('-n', '--username', type=str, default=None, help='Username') -@click.option('-w', '--password', type=str, default=None, help='Password') +@click.option("-d", "--dry-run", is_flag=True, default=False, help="Dry run") +@click.option( + "-u", + "--update", + is_flag=True, + default=False, + help="If the file not exists, download it, otherwise update it it changed on mds", +) +@click.option("-v", "--dataset-version", type=str, default=None, help="Dry run") +@click.option( + "-nd", + "--no-directories", + type=str, + default=True, + help="Option to not recreate folder hierarchy in output directory", +) +@click.option( + "--force-download", + type=str, + default=True, + help="Flag to skip confirmation before download", +) +@click.option( + "--disable-progress-bar", type=str, default=True, help="Flag to hide progress bar" +) +@click.option("-n", "--username", type=str, default=None, help="Username") +@click.option("-w", "--password", type=str, default=None, help="Password") def get(**kwargs): - update = kwargs.pop('update') + update = kwargs.pop("update") if update: wrapper.mds_update_download(**kwargs) else: - wrapper.mds_download('get', **kwargs) + wrapper.mds_download("get", **kwargs) @main.command() -@click.argument('dataset_id', type=str) -@click.argument('mds_filter', type=str) -@click.option('-g', '--dataset-version', type=str, default=None, help='Dataset version or tag') +@click.argument("dataset_id", type=str) +@click.argument("mds_filter", type=str) +@click.option( + "-g", "--dataset-version", type=str, default=None, help="Dataset version or tag" +) def file_list(*args, **kwargs): mds_file_list = wrapper.mds_list(*args, **kwargs) print(f"{' '.join(mds_file_list)}") @main.command() -@click.option('-e', '--s3_file', type=str, default=None, - help='Path to a specific s3 file - if present, other parameters are ignored.') -@click.option('-p', '--product', type=str, default=None, help='The product name') -@click.option('-i', '--dataset_id', type=str, default=None, help='The datasetID') -@click.option('-g', '--version', type=str, default=None, - help='Force the selection of a specific dataset version') -@click.option('-s', '--subdir', type=str, default=None, help='Subdir structure on mds (i.e. {year}/{month})') -@click.option('-f', '--mds_filter', type=str, default=None, help='Pattern to filter data (no regex)') +@click.option( + "-e", + "--s3_file", + type=str, + default=None, + help="Path to a specific s3 file - if present, other parameters are ignored.", +) +@click.option("-p", "--product", type=str, default=None, help="The product name") +@click.option("-i", "--dataset_id", type=str, default=None, help="The datasetID") +@click.option( + "-g", + "--version", + type=str, + default=None, + help="Force the selection of a specific dataset version", +) +@click.option( + "-s", + "--subdir", + type=str, + default=None, + help="Subdir structure on mds (i.e. {year}/{month})", +) +@click.option( + "-f", + "--mds_filter", + type=str, + default=None, + help="Pattern to filter data (no regex)", +) def etag(**kwargs): s3_files = wrapper.mds_etag(**kwargs) for s3_file in s3_files: - print(f'{s3_file.name} {s3_file.etag}') + print(f"{s3_file.name} {s3_file.etag}") @main.command() -@click.option('-b', '--bucket', 's3_bucket', required=True, type=str, help='Bucket name') -@click.option('-f', '--filter', 'file_filter', required=True, type=str, help='Filter on the online files') -@click.option('-o', '--output-directory', required=True, type=str, help='Output directory') -@click.option('-p', '--product', required=True, type=str, help='The product name') -@click.option('-i', '--dataset-id', required=True, type=str, help='Dataset Id') -@click.option('-g', '--dataset-version', type=str, default=None, help='Dataset version or tag') -@click.option('-r', '--recursive', is_flag=True, default=False, help='List recursive all s3 files') -@click.option('--threads', 'n_threads', type=int, default=None, help='Downloading file using threads') -@click.option('-s', '--subdir', type=str, default=None, - help='Dataset directory on mds (i.e. {year}/{month}) - If present boost the connection') -@click.option('--overwrite', required=False, is_flag=True, default=False, - help='Force overwrite of the file') -@click.option('--keep-timestamps', required=False, is_flag=True, default=False, - help='After the download, set the correct timestamp to the file') -@click.option('--sync-time', required=False, is_flag=True, default=False, - help='Update the file if it changes on the server using last update information') -@click.option('--sync-etag', required=False, is_flag=True, default=False, - help='Update the file if it changes on the server using etag information') +@click.option( + "-b", "--bucket", "s3_bucket", required=True, type=str, help="Bucket name" +) +@click.option( + "-f", + "--filter", + "file_filter", + required=True, + type=str, + help="Filter on the online files", +) +@click.option( + "-o", "--output-directory", required=True, type=str, help="Output directory" +) +@click.option("-p", "--product", required=True, type=str, help="The product name") +@click.option("-i", "--dataset-id", required=True, type=str, help="Dataset Id") +@click.option( + "-g", "--dataset-version", type=str, default=None, help="Dataset version or tag" +) +@click.option( + "-r", "--recursive", is_flag=True, default=False, help="List recursive all s3 files" +) +@click.option( + "--threads", + "n_threads", + type=int, + default=None, + help="Downloading file using threads", +) +@click.option( + "-s", + "--subdir", + type=str, + default=None, + help="Dataset directory on mds (i.e. {year}/{month}) - If present boost the connection", +) +@click.option( + "--overwrite", + required=False, + is_flag=True, + default=False, + help="Force overwrite of the file", +) +@click.option( + "--keep-timestamps", + required=False, + is_flag=True, + default=False, + help="After the download, set the correct timestamp to the file", +) +@click.option( + "--sync-time", + required=False, + is_flag=True, + default=False, + help="Update the file if it changes on the server using last update information", +) +@click.option( + "--sync-etag", + required=False, + is_flag=True, + default=False, + help="Update the file if it changes on the server using etag information", +) def s3_get(**kwargs): mds_s3.download_files(**kwargs) @main.command() -@click.option('-b', '--bucket', 's3_bucket', required=True, type=str, help='Filter on the online files') -@click.option('-f', '--filter', 'file_filter', required=True, type=str, help='Filter on the online files') -@click.option('-p', '--product', required=True, type=str, help='The product name') -@click.option('-i', '--dataset-id', required=False, type=str, help='Dataset Id') -@click.option('-g', '--dataset-version', type=str, default=None, help='Dataset version or tag') -@click.option('-s', '--subdir', type=str, default=None, - help='Dataset directory on mds (i.e. {year}/{month}) - If present boost the connection') -@click.option('-r', '--recursive', is_flag=True, default=False, help='List recursive all s3 files') +@click.option( + "-b", + "--bucket", + "s3_bucket", + required=True, + type=str, + help="Filter on the online files", +) +@click.option( + "-f", + "--filter", + "file_filter", + required=True, + type=str, + help="Filter on the online files", +) +@click.option("-p", "--product", required=True, type=str, help="The product name") +@click.option("-i", "--dataset-id", required=False, type=str, help="Dataset Id") +@click.option( + "-g", "--dataset-version", type=str, default=None, help="Dataset version or tag" +) +@click.option( + "-s", + "--subdir", + type=str, + default=None, + help="Dataset directory on mds (i.e. {year}/{month}) - If present boost the connection", +) +@click.option( + "-r", "--recursive", is_flag=True, default=False, help="List recursive all s3 files" +) def s3_list(**kwargs): s3_files = mds_s3.get_file_list(**kwargs) print(f"{' '.join([f.file for f in s3_files])}") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/download/copernicus.py b/src/download/copernicus.py index 7bc080c..91fde96 100644 --- a/src/download/copernicus.py +++ b/src/download/copernicus.py @@ -6,20 +6,18 @@ from src.download import utils -GET_MANDATORY_ATTRS = ['filter', 'output_directory', 'dataset_id'] +GET_MANDATORY_ATTRS = ["filter", "output_directory", "dataset_id"] SUBSET_MANDATORY_ATTRS = [ - 'output_filename', 'output_directory', 'dataset_id', - 'start_datetime', 'end_datetime' + "output_filename", + "output_directory", + "dataset_id", + "start_datetime", + "end_datetime", ] def subset(**subset_kwargs) -> pathlib.Path: - subset_kwargs.update( - { - 'force_download': True, - 'disable_progress_bar': True - } - ) + subset_kwargs.update({"force_download": True, "disable_progress_bar": True}) utils.check_dict_validity(subset_kwargs, SUBSET_MANDATORY_ATTRS) # download @@ -35,21 +33,20 @@ def get(**get_kwargs) -> List[pathlib.Path]: # download utils.pprint_dict(get_kwargs) - result = copernicusmarine.get( - **get_kwargs, - no_metadata_cache=True - ) + result = copernicusmarine.get(**get_kwargs, no_metadata_cache=True) return result def get_s3_native(product, dataset, version) -> str: - stac_url = f'https://stac.marine.copernicus.eu/metadata/{product}/{dataset}_{version}/dataset.stac.json' + stac_url = f"https://stac.marine.copernicus.eu/metadata/{product}/{dataset}_{version}/dataset.stac.json" response = requests.get(stac_url) # Check if the request was successful (status code 200) if response.status_code != 200: - raise ValueError(f'Unable to get native from: {stac_url} - {response.status_code}: {response.text}') + raise ValueError( + f"Unable to get native from: {stac_url} - {response.status_code}: {response.text}" + ) dataset_stac = response.json() - return dataset_stac['assets']['native']['href'] + return dataset_stac["assets"]["native"]["href"] diff --git a/src/download/mds_s3.py b/src/download/mds_s3.py index 5f0fd1f..fb62444 100644 --- a/src/download/mds_s3.py +++ b/src/download/mds_s3.py @@ -8,53 +8,93 @@ from src.lib import logging_config # conf -logger = logging_config.set_up('mds_s3') +logger = logging_config.set_up("mds_s3") THREADS_TIMEOUT = 10 * 60 @utils.elapsed_time def get_file_list( - s3_bucket: str, product: str, dataset_id: str, dataset_version: str, file_filter: str, subdir: str, - recursive: bool + s3_bucket: str, + product: str, + dataset_id: str, + dataset_version: str, + file_filter: str, + subdir: str, + recursive: bool, ): my_s3 = s3_singleton.S3() - return my_s3.file_list(s3_bucket, product, dataset_id, dataset_version, file_filter, subdir, recursive) + return my_s3.file_list( + s3_bucket, product, dataset_id, dataset_version, file_filter, subdir, recursive + ) @utils.elapsed_time def download_files( - s3_bucket: str, product: str, dataset_id: str, dataset_version: str, file_filter: str, output_directory: str, - overwrite: bool = False, keep_timestamps: bool = False, subdir: str = None, - sync_etag: bool = False, sync_time: bool = False, n_threads=None, recursive: bool = False + s3_bucket: str, + product: str, + dataset_id: str, + dataset_version: str, + file_filter: str, + output_directory: str, + overwrite: bool = False, + keep_timestamps: bool = False, + subdir: str = None, + sync_etag: bool = False, + sync_time: bool = False, + n_threads=None, + recursive: bool = False, ): - files_list = get_file_list(s3_bucket, product, dataset_id, dataset_version, file_filter, subdir, recursive) + files_list = get_file_list( + s3_bucket, product, dataset_id, dataset_version, file_filter, subdir, recursive + ) if len(files_list) == 0: - logger.error(f'No match found for {file_filter} in {product}/{dataset_id}_{dataset_version}') - raise FileNotFoundError(f'No match found for {file_filter} in {product}/{dataset_id}_{dataset_version}') + logger.error( + f"No match found for {file_filter} in {product}/{dataset_id}_{dataset_version}" + ) + raise FileNotFoundError( + f"No match found for {file_filter} in {product}/{dataset_id}_{dataset_version}" + ) if not n_threads: - _download(files_list, keep_timestamps, output_directory, overwrite, sync_etag, sync_time) + _download( + files_list, + keep_timestamps, + output_directory, + overwrite, + sync_etag, + sync_time, + ) else: # if user select more threads than the files to download if n_threads > len(files_list): - logger.warning(f'Resize number of threads to {len(files_list)}') + logger.warning(f"Resize number of threads to {len(files_list)}") files_list_split = utils.split_list_into_parts(files_list, n_threads) if n_threads != len(files_list_split): - raise ValueError(f'Mismatch between number of threads ({n_threads}) ' - f'and threads parameters ({len(files_list_split)})') + raise ValueError( + f"Mismatch between number of threads ({n_threads}) " + f"and threads parameters ({len(files_list_split)})" + ) # build input args for each threads threads_args = [ [ - files_list_split[i], keep_timestamps, output_directory, overwrite, sync_etag, sync_time - ] for i in range(n_threads) + files_list_split[i], + keep_timestamps, + output_directory, + overwrite, + sync_etag, + sync_time, + ] + for i in range(n_threads) ] with Pool(processes=10) as pool: # start threads - results: List = [pool.apply_async(_download, thread_args) for thread_args in threads_args] + results: List = [ + pool.apply_async(_download, thread_args) for thread_args in threads_args + ] try: for r in results: r.get(timeout=THREADS_TIMEOUT) @@ -66,7 +106,9 @@ def download_files( raise -def _download(files_list, keep_timestamps, output_directory, overwrite, sync_etag, sync_time): +def _download( + files_list, keep_timestamps, output_directory, overwrite, sync_etag, sync_time +): """Download files from S3 to local filesystem""" try: my_s3 = s3_singleton.S3() @@ -75,36 +117,42 @@ def _download(files_list, keep_timestamps, output_directory, overwrite, sync_eta dest_file = str(os.path.join(output_directory, filename)) # check if file must me download - if not file_can_be_downloaded(s3_file, dest_file, overwrite, sync_time, sync_etag): - logger.warning(f'Skipping {s3_file} - local: {dest_file}') + if not file_can_be_downloaded( + s3_file, dest_file, overwrite, sync_time, sync_etag + ): + logger.warning(f"Skipping {s3_file} - local: {dest_file}") continue os.makedirs(os.path.dirname(dest_file), exist_ok=True) - logger.info(f'Downloading: {s3_file.file} as {dest_file}') + logger.info(f"Downloading: {s3_file.file} as {dest_file}") my_s3.download(s3_file.bucket, s3_file.file, dest_file) if keep_timestamps: - logger.info(f'Set original last modified: {s3_file.last_modified} to {dest_file}') + logger.info( + f"Set original last modified: {s3_file.last_modified} to {dest_file}" + ) last_modified_timestamp = s3_file.last_modified.timestamp() os.utime(dest_file, (last_modified_timestamp, last_modified_timestamp)) except BaseException as e: - logger.critical(f'Error: {e}') + logger.critical(f"Error: {e}") return -def file_can_be_downloaded(s3_file: S3File, dest_file: str, overwrite: bool, sync_time: bool, sync_etag: bool) -> bool: +def file_can_be_downloaded( + s3_file: S3File, dest_file: str, overwrite: bool, sync_time: bool, sync_etag: bool +) -> bool: """Check if the destination file can be downloaded/overwritten""" if not os.path.exists(dest_file) or overwrite: return True if sync_time and not utils.timestamp_match(dest_file, s3_file.last_modified): - logger.info(f'{dest_file} is outdated') + logger.info(f"{dest_file} is outdated") return True if sync_etag and not utils.etag_match(dest_file, s3_file.etag): - logger.info(f'{dest_file} is outdated') + logger.info(f"{dest_file} is outdated") return True return False diff --git a/src/download/s3_singleton.py b/src/download/s3_singleton.py index 4e3939b..b3f36fe 100644 --- a/src/download/s3_singleton.py +++ b/src/download/s3_singleton.py @@ -13,39 +13,42 @@ # conf lock = multiprocessing.Lock() S3_ENDPOINT = "https://s3.waw3-1.cloudferro.com" -logger = logging_config.set_up('s3') +logger = logging_config.set_up("s3") class Singleton(type): """Multiprocessing safe implementation of a singleton class""" + _instances = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: with lock: if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) + cls._instances[cls] = super(Singleton, cls).__call__( + *args, **kwargs + ) return cls._instances[cls] def clean_corrupted_file(dest_file: str): """Removes corrupted file if it exists""" # generally s3_client download the file as dest_file.STRING - files_to_clean = glob.glob(f'{dest_file}.*') + files_to_clean = glob.glob(f"{dest_file}.*") for file_to_clean in files_to_clean: - logger.info(f'Cleaning {file_to_clean}') + logger.info(f"Cleaning {file_to_clean}") os.remove(file_to_clean) def build_s3_path(dataset, products, subdir, version): """Build the s3 path with the provided information""" - s3_path = f'native/{products}' + s3_path = f"native/{products}" if dataset: - s3_path += f'/{dataset}' + s3_path += f"/{dataset}" if version: - s3_path += f'_{version}' + s3_path += f"_{version}" if subdir: - s3_path += f'/{subdir}' + s3_path += f"/{subdir}" return s3_path @@ -53,6 +56,7 @@ class S3(metaclass=Singleton): """ Multiprocessing safe implementation of a singleton class to provide a unique client connection to a s3 endpoint. """ + _instance = None def __new__(cls, *args, **kwargs): @@ -76,8 +80,14 @@ def s3(self): return self.__s3 def file_list( - self, s3_bucket: str, products: str, dataset: str, version: str, file_filter: str, - subdir: str = None, recursive: bool = False + self, + s3_bucket: str, + products: str, + dataset: str, + version: str, + file_filter: str, + subdir: str = None, + recursive: bool = False, ) -> list[S3File]: """ Listing file on s3 bucket and return the list of file that match the provided filter @@ -93,20 +103,22 @@ def file_list( """ files_found = [] paginator = self.__paginator - delimiter = '*' if recursive else '/' + delimiter = "*" if recursive else "/" s3_path = build_s3_path(dataset, products, subdir, version) logger.info(f"Listing files in {s3_bucket}/{s3_path}") - for s3_result in paginator.paginate(Bucket=s3_bucket, Prefix=f'{s3_path}/', Delimiter=delimiter): - if 'Contents' not in s3_result: + for s3_result in paginator.paginate( + Bucket=s3_bucket, Prefix=f"{s3_path}/", Delimiter=delimiter + ): + if "Contents" not in s3_result: raise FileNotFoundError(f"No result found for {s3_bucket}/{s3_path}") - for content in s3_result['Contents']: - etag = content['ETag'].replace('"', '') - s3_file = content['Key'] - last_modified = content['LastModified'] - if fnmatch.fnmatch(s3_file, f'*{file_filter}*'): + for content in s3_result["Contents"]: + etag = content["ETag"].replace('"', "") + s3_file = content["Key"] + last_modified = content["LastModified"] + if fnmatch.fnmatch(s3_file, f"*{file_filter}*"): files_found.append(S3File(s3_bucket, s3_file, etag, last_modified)) return files_found @@ -121,13 +133,11 @@ def download(self, s3_bucket: str, s3_file: str, dest_file: str) -> None: s3 = self.s3 try: clean_corrupted_file(dest_file) - s3.download_file( - s3_bucket, - s3_file, - dest_file - ) + s3.download_file(s3_bucket, s3_file, dest_file) if not os.path.isfile(dest_file): - raise RuntimeError(f'Unable to download {s3_file} as {dest_file}, unknown error') + raise RuntimeError( + f"Unable to download {s3_file} as {dest_file}, unknown error" + ) except BaseException as e: logger.critical(f"An error occurs during the download: {e}") clean_corrupted_file(dest_file) diff --git a/src/download/s3file.py b/src/download/s3file.py index c945322..d461852 100644 --- a/src/download/s3file.py +++ b/src/download/s3file.py @@ -12,4 +12,4 @@ def __repr__(self): return f" bool: def pid_is_running(pid: int) -> bool: - """ Check For the existence of a unix pid. 0 signal has no effect on the process""" + """Check For the existence of a unix pid. 0 signal has no effect on the process""" try: # 0 signal doesn't have any effect os.kill(pid, 0) @@ -70,7 +70,7 @@ def get_temporary_directory(output_filename: str, base_directory: str) -> str: """ md5_filename = hashlib.md5(output_filename.encode()).hexdigest() - return os.path.join(base_directory, f'.{md5_filename}') + return os.path.join(base_directory, f".{md5_filename}") def mv_overwrite(file: str, output_directory: str): @@ -96,24 +96,29 @@ def factor_of_1_mb(filesize, num_parts): def calc_etag(input_file, part_size): md5_digests = [] - with open(input_file, 'rb') as f: - for chunk in iter(lambda: f.read(part_size), b''): + with open(input_file, "rb") as f: + for chunk in iter(lambda: f.read(part_size), b""): md5_digests.append(hashlib.md5(chunk).digest()) - return hashlib.md5(b''.join(md5_digests)).hexdigest() + '-' + str(len(md5_digests)) + return hashlib.md5(b"".join(md5_digests)).hexdigest() + "-" + str(len(md5_digests)) def possible_partsizes(filesize, num_parts): - return lambda part_size: part_size < filesize and (float(filesize) / float(part_size)) <= num_parts + return ( + lambda part_size: part_size < filesize + and (float(filesize) / float(part_size)) <= num_parts + ) def compute_etag(input_file, etag): filesize = os.path.getsize(input_file) - num_parts = int(etag.split('-')[1]) + num_parts = int(etag.split("-")[1]) partsizes = [ # Default Partsizes Map 8388608, # aws_cli/boto3 15728640, # s3cmd - factor_of_1_mb(filesize, num_parts) # Used by many clients to upload large files + factor_of_1_mb( + filesize, num_parts + ), # Used by many clients to upload large files ] for part_size in filter(possible_partsizes(filesize, num_parts), partsizes): @@ -124,24 +129,26 @@ def compute_etag(input_file, etag): def elapsed_time(func: Callable): - def decorator(*args, **kwargs): start_time = time.perf_counter() - func_args = f'args={args}' if args else 'args=None' - func_kwargs = f'kwargs={kwargs}' if kwargs else 'kwargs=None' + func_args = f"args={args}" if args else "args=None" + func_kwargs = f"kwargs={kwargs}" if kwargs else "kwargs=None" result = func(*args, **kwargs) elaps_time = time.perf_counter() - start_time - logger.info(f'Elapsed time: {elaps_time} (s) - {func.__name__}({func_args}, {func_kwargs})') + logger.info( + f"Elapsed time: {elaps_time} (s) - {func.__name__}({func_args}, {func_kwargs})" + ) return result + return decorator def etag_match(dest_file, digest: str): - if '-' in digest: - logger.debug('Comparing Etag') + if "-" in digest: + logger.debug("Comparing Etag") local_digest = compute_etag() else: - logger.debug('Comparing md5') + logger.debug("Comparing md5") local_digest = compute_md5(dest_file) return local_digest == digest diff --git a/src/download/wrapper.py b/src/download/wrapper.py index e3df62a..e337acc 100644 --- a/src/download/wrapper.py +++ b/src/download/wrapper.py @@ -1,13 +1,12 @@ +import contextlib +import fcntl import fnmatch import glob import json +import logging import os import shutil import tempfile -import logging -import fcntl -import contextlib - from collections import namedtuple from typing import List @@ -15,31 +14,32 @@ from botocore import UNSIGNED from botocore.config import Config +from src.download import utils, copernicus from src.download.utils import etag_match from src.lib import logging_config -from src.download import utils, copernicus - # log -logger = logging_config.set_up('mds') +logger = logging_config.set_up("mds") # conf -DOWNLOAD_MODES = ['subset', 'get'] +DOWNLOAD_MODES = ["subset", "get"] S3_ENDPOINT = "https://s3.waw3-1.cloudferro.com" -S3_FILE = namedtuple('S3_FILE', ['name', 'etag', 'last_modified']) -SYNC_FILE = '.sync' -DB_FILE = '.etag.json' +S3_FILE = namedtuple("S3_FILE", ["name", "etag", "last_modified"]) +SYNC_FILE = ".sync" +DB_FILE = ".etag.json" def extract_s3_info_from_path(s3_file: str): - s3_file = s3_file.removeprefix('s3://') - s3_bucket = s3_file.split('/')[0] - s3_path = s3_file.removeprefix(f'{s3_bucket}/') + s3_file = s3_file.removeprefix("s3://") + s3_bucket = s3_file.split("/")[0] + s3_path = s3_file.removeprefix(f"{s3_bucket}/") return s3_bucket, s3_path @utils.elapsed_time -def mds_etag(s3_file, product, dataset_id, version, subdir, mds_filter) -> List[S3_FILE]: +def mds_etag( + s3_file, product, dataset_id, version, subdir, mds_filter +) -> List[S3_FILE]: s3_endpoint = S3_ENDPOINT s3 = boto3.client( "s3", endpoint_url=s3_endpoint, config=Config(signature_version=UNSIGNED) @@ -49,20 +49,22 @@ def mds_etag(s3_file, product, dataset_id, version, subdir, mds_filter) -> List[ if s3_file: s3_bucket, s3_path = extract_s3_info_from_path(s3_file) else: - s3_bucket, s3_path = get_s3_info(s3_endpoint, product, dataset_id, version, subdir) + s3_bucket, s3_path = get_s3_info( + s3_endpoint, product, dataset_id, version, subdir + ) logger.debug(f"Listing files in {s3_bucket}/{s3_path}") files_found = [] for s3_result in paginator.paginate(Bucket=s3_bucket, Prefix=s3_path): - if 'Contents' not in s3_result: + if "Contents" not in s3_result: raise ValueError(f"No result found for {s3_bucket}/{s3_path}") - contents = s3_result['Contents'] + contents = s3_result["Contents"] for content in contents: - etag = content['ETag'].replace('"', '') - s3_file = content['Key'] - last_modified = content['LastModified'] + etag = content["ETag"].replace('"', "") + s3_file = content["Key"] + last_modified = content["LastModified"] if not mds_filter or fnmatch.fnmatch(s3_file, mds_filter): files_found.append(S3_FILE(s3_file, etag, last_modified)) @@ -74,29 +76,29 @@ def get_s3_info(s3_endpoint, product, dataset, version, subdir): Query copernicium stat web site to retrieve info about s3 bucket and s3 path """ href_native = copernicus.get_s3_native(product, dataset, version) - native_complete_no_endpoint = href_native.replace(f'{s3_endpoint}/', '') - s3_bucket = native_complete_no_endpoint.split('/')[0] - s3_path = native_complete_no_endpoint.removeprefix(f'{s3_bucket}/') - s3_path = f'{s3_path}/{subdir}' + native_complete_no_endpoint = href_native.replace(f"{s3_endpoint}/", "") + s3_bucket = native_complete_no_endpoint.split("/")[0] + s3_path = native_complete_no_endpoint.removeprefix(f"{s3_bucket}/") + s3_path = f"{s3_path}/{subdir}" return s3_bucket, s3_path @utils.elapsed_time def mds_list(dataset_id, mds_filter: str, quiet=True, dataset_version=None) -> List: # mds write as default this file - name cannot be chosen via python - mds_output_filename = 'files_to_download.txt' + mds_output_filename = "files_to_download.txt" tempdir = tempfile.mkdtemp() - output_file = f'{tempdir}/{mds_output_filename}' + output_file = f"{tempdir}/{mds_output_filename}" # mds_output_filename is ignored mds_get_list_attrs = { - 'dataset_id': dataset_id, - 'filter': mds_filter, - 'output_directory': tempdir, - 'create_file_list': mds_output_filename, - 'force_download': True, - 'disable_progress_bar': True, - 'dataset_version': dataset_version + "dataset_id": dataset_id, + "filter": mds_filter, + "output_directory": tempdir, + "create_file_list": mds_output_filename, + "force_download": True, + "disable_progress_bar": True, + "dataset_version": dataset_version, } try: @@ -109,9 +111,9 @@ def mds_list(dataset_id, mds_filter: str, quiet=True, dataset_version=None) -> L pass if not os.path.exists(output_file): - raise ValueError(f'An error occurred') + raise ValueError("An error occurred") - with open(output_file, 'r') as f: + with open(output_file, "r") as f: data = f.readlines() shutil.rmtree(tempdir, ignore_errors=True) @@ -121,10 +123,10 @@ def mds_list(dataset_id, mds_filter: str, quiet=True, dataset_version=None) -> L @utils.elapsed_time def mds_download( - download_mode: str, - dry_run: bool = False, - overwrite: bool = False, - **kwargs, + download_mode: str, + dry_run: bool = False, + overwrite: bool = False, + **kwargs, ): """ Wrapper around copernicusmarine too to add missing features: @@ -145,23 +147,25 @@ def mds_download( # check if download mode is supported if download_mode not in DOWNLOAD_MODES: raise ValueError(f"Download mode not supported: '{download_mode}'") - logger.info(f'Download mode: {download_mode}') + logger.info(f"Download mode: {download_mode}") # get mandatory args - output_filename = kwargs['output_filename'] if download_mode == "subset" else kwargs['filter'] - output_directory = kwargs['output_directory'] + output_filename = ( + kwargs["output_filename"] if download_mode == "subset" else kwargs["filter"] + ) + output_directory = kwargs["output_directory"] # set output directory if output_directory is None: output_directory = utils.cwd() - logger.info(f'Output directory: {output_directory}') + logger.info(f"Output directory: {output_directory}") # check if the file is already present - logger.info(f'Output filename: {output_filename}') + logger.info(f"Output filename: {output_filename}") destination_file = os.path.join(output_directory, output_filename) files_found = glob.glob(destination_file) if not overwrite and len(files_found) > 0: - logger.info(f'File already exists: {", ".join(files_found)}') + logger.info(f"File already exists: {', '.join(files_found)}") return # get temporary directory where to download the file @@ -169,90 +173,96 @@ def mds_download( output_filename=output_filename, base_directory=output_directory, ) - logger.debug(f'Temporary directory: {temporary_dl_directory}') + logger.debug(f"Temporary directory: {temporary_dl_directory}") # pid - pid_file = os.path.join(temporary_dl_directory, '.pid') + pid_file = os.path.join(temporary_dl_directory, ".pid") # check if another download is ongoing or a zombie directory is present if os.path.exists(temporary_dl_directory): - logger.debug(f'Found temporary directory: {temporary_dl_directory}') + logger.debug(f"Found temporary directory: {temporary_dl_directory}") if os.path.exists(pid_file) and utils.another_instance_in_execution(pid_file): - logger.info(f'Another download process already exists: {pid_file}, nothing to do...') + logger.info( + f"Another download process already exists: {pid_file}, nothing to do..." + ) return # an error must occur in the previous download, restart it - logger.info(f'Zombie download dir found, cleaning it') + logger.info("Zombie download dir found, cleaning it") shutil.rmtree(temporary_dl_directory) # safe mkdir and write pid try: os.makedirs(temporary_dl_directory, exist_ok=False) - with open(pid_file, 'w') as f: + with open(pid_file, "w") as f: f.write(f"{os.getpid()}\n") except OSError as e: # if two processes start in the same moment the previous pid check can fail - logger.error(f'Cannot create temporary directory: {temporary_dl_directory}, possible conflict ongoing') + logger.error( + f"Cannot create temporary directory: {temporary_dl_directory}, possible conflict ongoing" + ) raise e if dry_run: return - kwargs['output_directory'] = temporary_dl_directory + kwargs["output_directory"] = temporary_dl_directory download_func = get_download_func(download_mode) try: download_func(**kwargs) except SystemExit as e: - logger.error(f'An error occurs during download: {e}') + logger.error(f"An error occurs during download: {e}") # check if the file is not on mds - dataset_id = kwargs['dataset_id'] - mds_filter = kwargs['filter'] + dataset_id = kwargs["dataset_id"] + mds_filter = kwargs["filter"] file_list = mds_list(dataset_id, mds_filter, quiet=False) if len(file_list) == 0: shutil.rmtree(temporary_dl_directory) - raise FileNotFoundError(f'No match found for {mds_filter} if {dataset_id}') + raise FileNotFoundError(f"No match found for {mds_filter} if {dataset_id}") shutil.rmtree(temporary_dl_directory) raise e except Exception as e: - logger.error(f'An error occurs during downloading {kwargs}: {e}') + logger.error(f"An error occurs during downloading {kwargs}: {e}") shutil.rmtree(temporary_dl_directory) raise e # move in output_directory - for file in glob.glob(os.path.join(temporary_dl_directory, '*')): - logger.info(f'mv {file} to {output_directory}') + for file in glob.glob(os.path.join(temporary_dl_directory, "*")): + logger.info(f"mv {file} to {output_directory}") utils.mv_overwrite(file, output_directory) - logger.info(f'Removing temporary directory: {temporary_dl_directory}') + logger.info(f"Removing temporary directory: {temporary_dl_directory}") shutil.rmtree(temporary_dl_directory) def get_download_func(download_mode): - if download_mode == 'subset': + if download_mode == "subset": return copernicus.subset - if download_mode == 'get': + if download_mode == "get": return copernicus.get - raise ValueError(f'Unknown download mode: {download_mode}') + raise ValueError(f"Unknown download mode: {download_mode}") @utils.elapsed_time def mds_update_download(**kwargs): - mds_filter = kwargs['filter'] - output_directory = kwargs['output_directory'] + mds_filter = kwargs["filter"] + output_directory = kwargs["output_directory"] # get list of files - dataset_id = kwargs['dataset_id'] + dataset_id = kwargs["dataset_id"] s3_files_list = mds_list(dataset_id, mds_filter, quiet=False) if len(s3_files_list) == 0: - raise FileNotFoundError(f'No matching files found for {dataset_id}/{mds_filter} on mds') + raise FileNotFoundError( + f"No matching files found for {dataset_id}/{mds_filter} on mds" + ) # for each file get etag s3_files = [] for s3_file in s3_files_list: - logger.info(f'Try to obtain Etag for: {s3_file}') + logger.info(f"Try to obtain Etag for: {s3_file}") s3_files.extend(mds_etag(s3_file, *[None for _ in range(5)])) # download @@ -261,41 +271,41 @@ def mds_update_download(**kwargs): dest_file = str(os.path.join(output_directory, filename)) if os.path.exists(dest_file) and etag_match(dest_file, s3_file.etag): - logger.info(f'{s3_file} already updated, nothing to do...') + logger.info(f"{s3_file} already updated, nothing to do...") continue - bk_file = f'{dest_file}.bk' + bk_file = f"{dest_file}.bk" if os.path.exists(dest_file): - logger.info(f'New version available for {s3_file.name}') - logger.info(f'Creating backup file: {bk_file}') + logger.info(f"New version available for {s3_file.name}") + logger.info(f"Creating backup file: {bk_file}") shutil.move(dest_file, bk_file) - kwargs['filter'] = filename - mds_download('get', **kwargs) + kwargs["filter"] = filename + mds_download("get", **kwargs) # update json file # update_etag(filename, output_directory, s3_file.etag) if os.path.exists(bk_file): - logger.info(f'Removing backup file: {bk_file}') + logger.info(f"Removing backup file: {bk_file}") os.remove(bk_file) @utils.elapsed_time def update_etag(filename, output_directory, etag): sync_file = str(os.path.join(output_directory, SYNC_FILE)) - with open(sync_file, 'a') as s: + with open(sync_file, "a") as s: with file_lock(s): db_file = str(os.path.join(output_directory, DB_FILE)) try: - with open(db_file, 'r') as f_read: + with open(db_file, "r") as f_read: data = json.load(f_read) except FileNotFoundError: data = {} data[filename] = etag - with open(db_file, 'w') as f_write: + with open(db_file, "w") as f_write: json.dump(data, f_write, indent=4) @@ -310,8 +320,8 @@ def file_lock(file): @utils.elapsed_time def download_file(*args, **kwargs): - print(f'Downloading') + print("Downloading") def log(): - logger.info("I'm wrappper") + logger.info("I'm wrapper") diff --git a/src/lib/logging_config.py b/src/lib/logging_config.py index e89a993..5064da2 100644 --- a/src/lib/logging_config.py +++ b/src/lib/logging_config.py @@ -10,7 +10,7 @@ def get_my_fmt(): fmt="[%(asctime)s] - %(levelname)s: %(message)s", # TODO try to enable it only if logging level is DEBUG # fmt="[%(asctime)s] - %(levelname)s - %(name)s.%(funcName)s: %(message)s", - datefmt="%Y-%m-%dT%H:%M:%SZ" + datefmt="%Y-%m-%dT%H:%M:%SZ", ) @@ -24,7 +24,9 @@ def set_level(level: int) -> None: root_logger.setLevel(logger_level) -def add_file_handler(my_logger: logging.Logger = None, log_filename: str = None, log_dir: str = None) -> None: +def add_file_handler( + my_logger: logging.Logger = None, log_filename: str = None, log_dir: str = None +) -> None: """ Add a file handler to the python logging module :param my_logger: logger to update with file handler @@ -32,7 +34,7 @@ def add_file_handler(my_logger: logging.Logger = None, log_filename: str = None, :param log_dir: Path of the dir where store the file """ if log_dir is None: - log_dir = f'{os.getcwd()}/.log' + log_dir = f"{os.getcwd()}/.log" os.makedirs(log_dir, exist_ok=True) if log_filename is None: @@ -48,7 +50,7 @@ def add_file_handler(my_logger: logging.Logger = None, log_filename: str = None, # enable file handler if not my_logger: - print('Configuring root logger') + print("Configuring root logger") my_logger = logging.getLogger() my_logger.addHandler(file_handler) @@ -72,5 +74,3 @@ def set_up(name=None) -> logging.Logger: # Enable stream handler my_logger.addHandler(stream_handler) return my_logger - - diff --git a/src/lib/str_parser.py b/src/lib/str_parser.py index f05a3f4..3f83bab 100644 --- a/src/lib/str_parser.py +++ b/src/lib/str_parser.py @@ -1,10 +1,20 @@ from collections import namedtuple -FN_PLACEHOLDERS = namedtuple('FN_PLACEHOLDERS', [ - 'base_dir', 'prodDate', 'refDate', - 'DR', 'DRyyyy', 'DRmm', 'DRdd', - 'DPyyyy', 'DPmm', 'DPdd' -]) +FN_PLACEHOLDERS = namedtuple( + "FN_PLACEHOLDERS", + [ + "base_dir", + "prodDate", + "refDate", + "DR", + "DRyyyy", + "DRmm", + "DRdd", + "DPyyyy", + "DPmm", + "DPdd", + ], +) def build_fn_placeholders(base_dir: str, ref_date: str, prod_date: str): @@ -21,23 +31,25 @@ def build_fn_placeholders(base_dir: str, ref_date: str, prod_date: str): DRdd=ref_dd, DPyyyy=prod_yyyy, DPmm=prod_mm, - DPdd=prod_dd + DPdd=prod_dd, ) -def replace_placeholders(str_template: str, placeholders: FN_PLACEHOLDERS, other_placeholders: dict = None): +def replace_placeholders( + str_template: str, placeholders: FN_PLACEHOLDERS, other_placeholders: dict = None +): """ - Replace placeholders in a string template with values from named tuple placeholders and - optionally from a dictionary of other placeholders. The other placeholders has higher priority. + Replace placeholders in a string template with values from named tuple placeholders and + optionally from a dictionary of other placeholders. The other placeholders has higher priority. - Args: - - str_template (str): The string template where placeholders will be replaced. - - placeholders (namedtuple): Named tuple containing values for placeholders. - - other_placeholders (dict, optional): Dictionary containing additional placeholders and their values. - It overwrites value in placeholders dict if a common value is present + Args: + - str_template (str): The string template where placeholders will be replaced. + - placeholders (namedtuple): Named tuple containing values for placeholders. + - other_placeholders (dict, optional): Dictionary containing additional placeholders and their values. + It overwrites value in placeholders dict if a common value is present - Returns: - - str: The modified string with placeholders replaced. + Returns: + - str: The modified string with placeholders replaced. """ dict_placeholders = placeholders._asdict() # If other_placeholders is provided, update dict_placeholders with its values - overwrite common parameters @@ -47,6 +59,6 @@ def replace_placeholders(str_template: str, placeholders: FN_PLACEHOLDERS, other new_str = str_template # Replace each placeholder in the template with its corresponding value for placeholder, value in dict_placeholders.items(): - new_str = new_str.replace(f'{{{placeholder}}}', value) + new_str = new_str.replace(f"{{{placeholder}}}", value) return new_str diff --git a/src/lib/utils.py b/src/lib/utils.py index ba43b1d..f0d43c7 100644 --- a/src/lib/utils.py +++ b/src/lib/utils.py @@ -5,15 +5,35 @@ def kill_other_drivers(): """It searches if there are any other drivers running, it kills them to start our process if so""" - cmd = ['ps', '-U', os.environ.get('USER'), - '-f', '|', 'grep', 'python', - '|', 'grep', os.path.basename(__file__), - '|', 'grep', '-v', str(os.getpid()), - '|', 'grep', '-v', 'grep', - '|', 'grep', '-v', "mp_dwld_driver_log_", - '|', 'awk', "'{{ print $2 }}'"] + cmd = [ + "ps", + "-U", + os.environ.get("USER"), + "-f", + "|", + "grep", + "python", + "|", + "grep", + os.path.basename(__file__), + "|", + "grep", + "-v", + str(os.getpid()), + "|", + "grep", + "-v", + "grep", + "|", + "grep", + "-v", + "mp_dwld_driver_log_", + "|", + "awk", + "'{{ print $2 }}'", + ] print(cmd) - seperator = ' ' + seperator = " " parent_pid = os.popen(seperator.join(cmd)).read() print(parent_pid) if parent_pid: @@ -29,20 +49,40 @@ def suicide_if_needed(): It searches if there are any other drivers running, returns a non empty array (of other pids) if so. """ cmd = [] - cmd += ['ps', '-U', os.environ.get('USER'), - '-f', '|', 'grep', 'python', - '|', 'grep', os.path.basename(__file__), - '|', 'grep', '-v', str(os.getpid()), - '|', 'grep', '-v', 'grep', - '|', 'grep', '-v', 'mp_dwld_driver_log_', - '|', 'awk', "'{{ print $2 }}'"] + cmd += [ + "ps", + "-U", + os.environ.get("USER"), + "-f", + "|", + "grep", + "python", + "|", + "grep", + os.path.basename(__file__), + "|", + "grep", + "-v", + str(os.getpid()), + "|", + "grep", + "-v", + "grep", + "|", + "grep", + "-v", + "mp_dwld_driver_log_", + "|", + "awk", + "'{{ print $2 }}'", + ] print(cmd) - seperator = ' ' + seperator = " " parent_pid = os.popen(seperator.join(cmd)).read() print(parent_pid) return parent_pid def time_to_seconds(time_string): - hours, minutes = map(int, time_string.split(':')) + hours, minutes = map(int, time_string.split(":")) return hours * 3600 + minutes * 60 diff --git a/tests/test_mds.py b/tests/test_mds.py index fe74926..dc9e174 100644 --- a/tests/test_mds.py +++ b/tests/test_mds.py @@ -7,17 +7,17 @@ def test_get_temporary_directory(): - output_filename = 'test.nc' - destination_dir = '/my_dir' + output_filename = "test.nc" + destination_dir = "/my_dir" result = utils.get_temporary_directory(output_filename, destination_dir) result2 = utils.get_temporary_directory(output_filename, destination_dir) - assert result == f'{destination_dir}/.c69843c60260734a065df6f9bcaca942' + assert result == f"{destination_dir}/.c69843c60260734a065df6f9bcaca942" assert result == result2 def test_cwf(): - home = os.environ['HOME'] + home = os.environ["HOME"] os.chdir(home) os.chdir(home) @@ -28,41 +28,40 @@ def test_cwf(): def test_mds_download_wrong_mode(tmp_path): with pytest.raises(ValueError): - src.download.wrapper.mds_download('wrong') + src.download.wrapper.mds_download("wrong") def test_mds_get_not_found(tmp_path): with pytest.raises(FileNotFoundError): - dataset = 'cmems_mod_med_phy-tem_anfc_4.2km_P1D-m' - output_filename = '20231231_d-CMCC--TEMP-MFSeas8-MEDATL-b2024gsagaga109_an-sv09.00.nc' - output_path = f'{tmp_path}' + dataset = "cmems_mod_med_phy-tem_anfc_4.2km_P1D-m" + output_filename = ( + "20231231_d-CMCC--TEMP-MFSeas8-MEDATL-b2024gsagaga109_an-sv09.00.nc" + ) + output_path = f"{tmp_path}" src.download.wrapper.mds_download( - 'get', + "get", filter=output_filename, output_directory=output_path, - dataset_id=dataset + dataset_id=dataset, ) def test_mds_get_download(tmp_path): - dataset = 'cmems_mod_med_phy-tem_anfc_4.2km_P1D-m' - output_filename = '20231231_d-CMCC--TEMP-MFSeas8-MEDATL-b20240109_an-sv09.00.nc' - output_path = f'{tmp_path}' + dataset = "cmems_mod_med_phy-tem_anfc_4.2km_P1D-m" + output_filename = "20231231_d-CMCC--TEMP-MFSeas8-MEDATL-b20240109_an-sv09.00.nc" + output_path = f"{tmp_path}" src.download.wrapper.mds_download( - 'get', - filter=output_filename, - output_directory=output_path, - dataset_id=dataset + "get", filter=output_filename, output_directory=output_path, dataset_id=dataset ) assert os.path.exists(os.path.join(output_path, output_filename)) def test_mds_get_list(): - dataset = 'cmems_mod_med_phy-tem_anfc_4.2km_P1D-m' - mds_filter = '*-CMCC--TEMP-MFSeas8-MEDATL-b20240109_an-sv09.00.nc' + dataset = "cmems_mod_med_phy-tem_anfc_4.2km_P1D-m" + mds_filter = "*-CMCC--TEMP-MFSeas8-MEDATL-b20240109_an-sv09.00.nc" result = src.download.wrapper.mds_list(dataset, mds_filter) @@ -72,8 +71,8 @@ def test_mds_get_list(): def test_fnmatch(): - s3_file = '_2.5km_PT1H-m_202311/2023/12/20231201_h-CMCC--TEMP-BSeas6-BS-b20231212_an-sv12.00.nc' + s3_file = "_2.5km_PT1H-m_202311/2023/12/20231201_h-CMCC--TEMP-BSeas6-BS-b20231212_an-sv12.00.nc" - assert fnmatch.fnmatch(s3_file, '*20231201_h-CMCC*') - assert not fnmatch.fnmatch(s3_file, '*20231201_h-cmcc*') - assert not fnmatch.fnmatch(s3_file, '*2023201_h*') + assert fnmatch.fnmatch(s3_file, "*20231201_h-CMCC*") + assert not fnmatch.fnmatch(s3_file, "*20231201_h-cmcc*") + assert not fnmatch.fnmatch(s3_file, "*2023201_h*")