Skip to content

Commit 1a5f534

Browse files
authored
Merge pull request #90 from podaac/develop
Develop in main. closes #89 and closes #62 and closes #62
2 parents 519abea + 368c031 commit 1a5f534

6 files changed

+73
-64
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ All notable changes to this project will be documented in this file.
33

44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
55

6+
## [1.10.2]
7+
### Fixed
8+
- Fixed an issue where using a default global bounding box prevented download of data that didn't use the horizontal spatial domain [87](https://github.com/podaac/data-subscriber/issues/87)
9+
- Fixed limit option not being respected. [86](https://github.com/podaac/data-subscriber/issues/86)
10+
611
## [1.10.1]
712
### Fixed
813
- Support for SHA-256 and SHA-512 checksums

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "podaac-data-subscriber"
3-
version = "1.10.1"
3+
version = "1.10.2"
44
description = "PO.DAAC Data Subscriber Command Line Tool"
55
authors = ["PO.DAAC <[email protected]>"]
66
readme = "README.md"

subscriber/podaac_access.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import tenacity
2222
from datetime import datetime
2323

24-
__version__ = "1.10.1"
24+
__version__ = "1.10.2"
2525
extensions = [".nc", ".h5", ".zip", ".tar.gz"]
2626
edl = "urs.earthdata.nasa.gov"
2727
cmr = "cmr.earthdata.nasa.gov"
@@ -129,16 +129,17 @@ def refresh_token(old_token: str, client_id: str):
129129

130130

131131
def validate(args):
132-
bounds = args.bbox.split(',')
133-
if len(bounds) != 4:
134-
raise ValueError(
135-
"Error parsing '--bounds': " + args.bbox + ". Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces ") # noqa E501
136-
for b in bounds:
137-
try:
138-
float(b)
139-
except ValueError:
132+
if args.bbox is not None:
133+
bounds = args.bbox.split(',')
134+
if len(bounds) != 4:
140135
raise ValueError(
141136
"Error parsing '--bounds': " + args.bbox + ". Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces ") # noqa E501
137+
for b in bounds:
138+
try:
139+
float(b)
140+
except ValueError:
141+
raise ValueError(
142+
"Error parsing '--bounds': " + args.bbox + ". Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces ") # noqa E501
142143

143144
if args.startDate:
144145
try:

subscriber/podaac_data_downloader.py

+23-23
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,13 @@
1414
__version__ = pa.__version__
1515

1616
page_size = 2000
17-
1817
edl = pa.edl
1918
cmr = pa.cmr
2019
token_url = pa.token_url
2120

22-
2321
# The lines below are to get the IP address. You can make this static and
2422
# assign a fixed value to the IPAddr variable
2523

26-
2724
def parse_cycles(cycle_input):
2825
# if cycle_input is None:
2926
# return None
@@ -66,14 +63,14 @@ def create_parser():
6663
help="The ISO date time before which data should be retrieved. For Example, --start-date 2021-01-14T00:00:00Z") # noqa E501
6764
parser.add_argument("-ed", "--end-date", required=False, dest="endDate",
6865
help="The ISO date time after which data should be retrieved. For Example, --end-date 2021-01-14T00:00:00Z") # noqa E501
69-
66+
7067
# Adding optional arguments
7168
parser.add_argument("-f", "--force", dest="force", action="store_true", help = "Flag to force downloading files that are listed in CMR query, even if the file exists and checksum matches") # noqa E501
7269

7370
# spatiotemporal arguments
7471
parser.add_argument("-b", "--bounds", dest="bbox",
7572
help="The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b=\"-180,-90,180,90\" syntax when calling from the command line. Default: \"-180,-90,180,90\".",
76-
default="-180,-90,180,90") # noqa E501
73+
default=None) # noqa E501
7774

7875
# Arguments for how data are stored locally - much processing is based on
7976
# the underlying directory structure (e.g. year/Day-of-year)
@@ -101,9 +98,8 @@ def create_parser():
10198
parser.add_argument("-p", "--provider", dest="provider", default='POCLOUD',
10299
help="Specify a provider for collection search. Default is POCLOUD.") # noqa E501
103100

104-
parser.add_argument("--limit", dest="limit", default='2000', type=int,
105-
help="Integer limit for number of granules to download. Useful in testing. Defaults to " + str(
106-
page_size)) # noqa E501
101+
parser.add_argument("--limit", dest="limit", default=None, type=int,
102+
help="Integer limit for number of granules to download. Useful in testing. Defaults to no limit.") # noqa E501
107103

108104
return parser
109105

@@ -138,8 +134,9 @@ def run(args=None):
138134
process_cmd = args.process_cmd
139135
data_path = args.outputDirectory
140136

141-
if args.limit is not None:
142-
page_size = args.limit
137+
download_limit = None
138+
if args.limit is not None and args.limit > 0:
139+
download_limit = args.limit
143140

144141
if args.offset:
145142
ts_shift = timedelta(hours=int(args.offset))
@@ -158,9 +155,6 @@ def run(args=None):
158155
logging.info("NOTE: Making new data directory at " + data_path + "(This is the first run.)")
159156
makedirs(data_path, exist_ok=True)
160157

161-
# Change this to whatever extent you need. Format is W Longitude,S Latitude,E Longitude,N Latitude
162-
bounding_extent = args.bbox
163-
164158
if search_cycles is not None:
165159
cmr_cycles = search_cycles
166160
params = [
@@ -169,7 +163,6 @@ def run(args=None):
169163
('provider', provider),
170164
('ShortName', short_name),
171165
('token', token),
172-
('bounding_box', bounding_extent),
173166
]
174167
for v in cmr_cycles:
175168
params.append(("cycle[]", v))
@@ -179,20 +172,20 @@ def run(args=None):
179172
else:
180173
temporal_range = pa.get_temporal_range(start_date_time, end_date_time,
181174
datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501
182-
params = {
183-
'page_size': page_size,
184-
'sort_key': "-start_date",
185-
'provider': provider,
186-
'ShortName': short_name,
187-
'temporal': temporal_range,
188-
'token': token,
189-
'bounding_box': bounding_extent,
190-
}
175+
params = [
176+
('page_size', page_size),
177+
('sort_key', "-start_date"),
178+
('provider', provider),
179+
('ShortName', short_name),
180+
('temporal', temporal_range),
181+
]
191182
if args.verbose:
192183
logging.info("Temporal Range: " + temporal_range)
193184

194185
if args.verbose:
195186
logging.info("Provider: " + provider)
187+
if args.bbox is not None:
188+
params.append(('bounding_box', args.bbox))
196189

197190
# If 401 is raised, refresh token and try one more time
198191
try:
@@ -247,6 +240,8 @@ def run(args=None):
247240
# Make this a non-verbose message
248241
# if args.verbose:
249242
logging.info("Found " + str(len(downloads)) + " total files to download")
243+
if download_limit:
244+
logging.info("Limiting downloads to " + str(args.limit) + " total files")
250245
if args.verbose:
251246
logging.info("Downloading files with extensions: " + str(extensions))
252247

@@ -277,6 +272,11 @@ def run(args=None):
277272
pa.process_file(process_cmd, output_path, args)
278273
logging.info(str(datetime.now()) + " SUCCESS: " + f)
279274
success_cnt = success_cnt + 1
275+
276+
#if limit is set and we're at or over it, stop downloading
277+
if download_limit and success_cnt >= download_limit:
278+
break
279+
280280
except Exception:
281281
logging.warning(str(datetime.now()) + " FAILURE: " + f, exc_info=True)
282282
failure_cnt = failure_cnt + 1

subscriber/podaac_data_subscriber.py

+21-31
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def create_parser():
7373
default=False) # noqa E501
7474
parser.add_argument("-b", "--bounds", dest="bbox",
7575
help="The bounding rectangle to filter result in. Format is W Longitude,S Latitude,E Longitude,N Latitude without spaces. Due to an issue with parsing arguments, to use this command, please use the -b=\"-180,-90,180,90\" syntax when calling from the command line. Default: \"-180,-90,180,90\".",
76-
default="-180,-90,180,90") # noqa E501
76+
default=None) # noqa E501
7777

7878
# Arguments for how data are stored locally - much processing is based on
7979
# the underlying directory structure (e.g. year/Day-of-year)
@@ -178,46 +178,36 @@ def run(args=None):
178178
else:
179179
logging.warning("No .update__" + short_name + " in the data directory. (Is this the first run?)")
180180

181-
# Change this to whatever extent you need. Format is W Longitude,S Latitude,E Longitude,N Latitude
182-
bounding_extent = args.bbox
183-
184-
# There are several ways to query for CMR updates that occured during a given timeframe. Read on in the CMR Search documentation:
185-
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#c-with-new-granules (Collections)
186-
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#c-with-revised-granules (Collections)
187-
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#g-production-date (Granules)
188-
# * https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#g-created-at (Granules)
189-
# The `created_at` parameter works for our purposes. It's a granule search parameter that returns the records ingested since the input timestamp.
190-
191181
if defined_time_range:
192182
# if(data_since):
193183
temporal_range = pa.get_temporal_range(start_date_time, end_date_time,
194184
datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) # noqa E501
195185

196-
params = {
197-
'page_size': page_size,
198-
'sort_key': "-start_date",
199-
'provider': provider,
200-
'ShortName': short_name,
201-
'updated_since': data_within_last_timestamp,
202-
'token': token,
203-
'bounding_box': bounding_extent,
204-
}
186+
params = [
187+
('page_size',page_size),
188+
('sort_key', "-start_date"),
189+
('provider', provider),
190+
('ShortName', short_name),
191+
('updated_since', data_within_last_timestamp),
192+
('token', token),
193+
]
205194

206195
if defined_time_range:
207-
params = {
208-
'page_size': page_size,
209-
'sort_key': "-start_date",
210-
'provider': provider,
211-
'updated_since': data_within_last_timestamp,
212-
'ShortName': short_name,
213-
'temporal': temporal_range,
214-
'token': token,
215-
'bounding_box': bounding_extent,
216-
}
217-
196+
params = [
197+
('page_size', page_size),
198+
('sort_key', "-start_date"),
199+
('provider', provider),
200+
('updated_since', data_within_last_timestamp),
201+
('ShortName', short_name),
202+
('temporal', temporal_range),
203+
('token', token),
204+
]
218205
if args.verbose:
219206
logging.info("Temporal Range: " + temporal_range)
220207

208+
if args.bbox is not None:
209+
params.append(('bounding_box', args.bbox))
210+
221211
if args.verbose:
222212
logging.info("Provider: " + provider)
223213
logging.info("Updated Since: " + data_within_last_timestamp)

tests/test_downloader_regression.py

+13
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,19 @@ def create_downloader_args(args):
1212
args2 = parser.parse_args(args)
1313
return args2
1414

15+
16+
#Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure,
17+
# and offset. Running it a second time to ensure it downlaods the files again-
18+
# the downloader doesn't care about updates.
19+
@pytest.mark.regression
20+
def test_downloader_limit_MUR():
21+
shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2', ignore_errors=True)
22+
args2 = create_downloader_args('-c MUR25-JPL-L4-GLOB-v04.2 -d ./MUR25-JPL-L4-GLOB-v04.2 -sd 2020-01-01T00:00:00Z -ed 2020-01-30T00:00:00Z --limit 1'.split())
23+
pdd.run(args2)
24+
# count number of files downloaded...
25+
assert len([name for name in os.listdir('./MUR25-JPL-L4-GLOB-v04.2') if os.path.isfile('./MUR25-JPL-L4-GLOB-v04.2/' + name)])==1
26+
shutil.rmtree('./MUR25-JPL-L4-GLOB-v04.2')
27+
1528
#Test the downlaoder on MUR25 data for start/stop/, yyyy/mmm/dd dir structure,
1629
# and offset. Running it a second time to ensure it downlaods the files again-
1730
# the downloader doesn't care about updates.

0 commit comments

Comments
 (0)