Skip to content

Commit 0a17827

Browse files
authored
Merge pull request #3517 from snbianco/subscription-prods
Download products from event subscription JSON files
2 parents 59dfea8 + 181c057 commit 0a17827

5 files changed

Lines changed: 454 additions & 335 deletions

File tree

CHANGES.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ mast
7474
- The ``select_cols`` parameter in ``MastMissions`` query functions now accepts an iterable of column names, a comma-delimited
7575
string of column names, or the special values 'all' or '*' to return all available columns. [#3492]
7676

77+
- Improved robustness of product downloads for ``MastMissions``, including support for subscription-service JSON inputs and
78+
clearer validation of MAST URIs and product metadata. [#3517]
79+
80+
- Added full support for the International Ultraviolet Explorer (IUE) mission in ``MastMissions``. [#3517]
81+
7782
jplspec
7883
^^^^^^^
7984

astroquery/mast/missions.py

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88

99
import difflib
10+
import json
1011
import warnings
1112
from collections.abc import Iterable
1213
from json import JSONDecodeError
@@ -59,7 +60,8 @@ def __init__(self, *, mission='hst', mast_token=None):
5960
'jwst': 'fileSetName',
6061
'roman': 'fileSetName',
6162
'classy': 'Target',
62-
'ullyses': 'observation_id'
63+
'ullyses': 'observation_id',
64+
'iue': 'iue_data_id'
6365
}
6466

6567
# Service attributes
@@ -616,7 +618,7 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True):
616618
Parameters
617619
----------
618620
uri : str
619-
The product dataURI
621+
The product filename or URI to be downloaded.
620622
local_path : str
621623
Directory or filename to which the file will be downloaded. Defaults to current working directory.
622624
cache : bool
@@ -635,14 +637,18 @@ def download_file(self, uri, *, local_path=None, cache=True, verbose=True):
635637
"""
636638

637639
# Construct the full data URL based on mission
638-
if self.mission in ['hst', 'jwst', 'roman']:
640+
if self.mission in ['hst', 'jwst', 'roman', 'roman_spectra', 'roman_cgi']:
639641
# HST, JWST, and RST have a dedicated endpoint for retrieving products
640642
base_url = self._service_api_connection.MISSIONS_DOWNLOAD_URL + self.mission + '/api/v0.1/retrieve_product'
641643
keyword = 'product_name'
642644
else:
643645
# HLSPs use MAST download URL
644646
base_url = self._service_api_connection.MAST_DOWNLOAD_URL
645647
keyword = 'uri'
648+
# These files require a MAST URI and not just a filename
649+
if not uri.startswith('mast:'):
650+
raise InvalidQueryError(f'For mission "{self.mission}", a full MAST URI is required for downloading. '
651+
f'Got "{uri}".')
646652
data_url = base_url + f'?{keyword}=' + uri
647653
escaped_url = base_url + f'?{keyword}=' + quote(uri, safe='')
648654

@@ -714,13 +720,25 @@ def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose
714720
base_dir = Path(base_dir)
715721

716722
for data_product in products:
723+
col_names = data_product.colnames
717724
# Determine local path for each file
718-
local_path = base_dir / data_product['dataset'] if not flat else base_dir
725+
filename = data_product['filename']
726+
uri = data_product['uri'] if 'uri' in col_names else filename
727+
dataset = None
728+
if 'dataset' in col_names:
729+
dataset = data_product['dataset']
730+
elif 'fileset' in col_names:
731+
dataset = data_product['fileset']
732+
if not dataset and not flat:
733+
raise InvalidQueryError('Data product is missing "dataset" or "fileset" field required for '
734+
'constructing local download path. Specify `flat=True` to avoid this '
735+
'requirement.')
736+
local_path = base_dir / dataset if not flat else base_dir
719737
local_path.mkdir(parents=True, exist_ok=True)
720-
local_file_path = local_path / Path(data_product['filename']).name
738+
local_file_path = local_path / Path(filename).name
721739

722740
# Download files and record status
723-
status, msg, url = self.download_file(data_product['uri'],
741+
status, msg, url = self.download_file(uri,
724742
local_path=local_file_path,
725743
cache=cache,
726744
verbose=verbose)
@@ -737,9 +755,10 @@ def download_products(self, products, *, download_dir=None, flat=False,
737755
738756
Parameters
739757
----------
740-
products : str, list, `~astropy.table.Table`
758+
products : str, list of str, `~astropy.table.Table`, or list of dict
741759
Either a single or list of dataset IDs (e.g., as input for `get_product_list`),
742-
or a Table of products (e.g., as output from `get_product_list`)
760+
a Table of products (e.g., as output from `get_product_list`), or a JSON file or data from
761+
the MAST subscription service containing product information.
743762
download_dir : str or Path, optional
744763
Directory for file downloads. Defaults to current directory.
745764
flat : bool, optional
@@ -764,11 +783,30 @@ def download_products(self, products, *, download_dir=None, flat=False,
764783
manifest : `~astropy.table.Table`
765784
A table manifest showing downloaded file locations and statuses.
766785
"""
786+
if not products:
787+
raise InvalidQueryError('No products specified for download.')
788+
767789
# Ensure `products` is a Table, collecting products if necessary
768-
if isinstance(products, (str, list)):
790+
if (isinstance(products, str) and products.endswith('.json')) or isinstance(products, Path):
791+
# Products coming from local JSON filepath from subscription service
792+
try:
793+
with open(products, 'r') as f:
794+
json_data = json.load(f)
795+
except JSONDecodeError as ex:
796+
raise InvalidQueryError(f'Failed to decode JSON file at {products}: {ex}')
797+
798+
if not isinstance(json_data, (list, tuple)):
799+
raise InvalidQueryError(f'Expected a list of product rows in JSON file at {products}.')
800+
products = Table(rows=json_data)
801+
elif isinstance(products, (list)) and all(isinstance(prod, dict) for prod in products):
802+
# Products coming from JSON data from subscription service
803+
products = Table(rows=products)
804+
elif isinstance(products, (str, list)):
805+
# Products given as dataset ID(s)
769806
products = [products] if isinstance(products, str) else products
770807
products = vstack([self.get_product_list(oid) for oid in products])
771808
elif isinstance(products, Row):
809+
# Single row of products
772810
products = Table(products, masked=True)
773811

774812
# Apply filters
@@ -778,7 +816,7 @@ def download_products(self, products, *, download_dir=None, flat=False,
778816
products = utils.remove_duplicate_products(products, 'filename')
779817

780818
if not len(products):
781-
warnings.warn("No products to download.", NoResultsWarning)
819+
warnings.warn("No products to download after applying filters.", NoResultsWarning)
782820
return
783821

784822
# Set up base directory for downloads

0 commit comments

Comments
 (0)