Skip to content

Commit 0d778e1

Browse files
committed
[2/2] [a] Fix: Can't use curl to download a single manifest in one invocation (#5918)
Add a wait parameter option to the manifest endpoint
1 parent 7c8e598 commit 0d778e1

File tree

4 files changed

+110
-37
lines changed

4 files changed

+110
-37
lines changed

lambdas/service/app.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,12 @@ def validate_json_param(name: str, value: str) -> MutableJSON:
739739
raise BRE(f'The {name!r} parameter is not valid JSON')
740740

741741

742+
def validate_wait(wait: str | None):
743+
valid_values = ['0', '1']
744+
if wait not in [None, *valid_values]:
745+
raise BRE(f'Invalid wait value `{wait}`. Must be one of {valid_values}')
746+
747+
742748
class Mandatory:
743749
"""
744750
Validation wrapper signifying that a parameter is mandatory.
@@ -1313,6 +1319,26 @@ def get_summary():
13131319
authentication=request.authentication)
13141320

13151321

1322+
def wait_parameter_spec(*, default: int) -> JSON:
1323+
valid_values = [0, 1]
1324+
assert default in valid_values, default
1325+
return params.query(
1326+
'wait',
1327+
schema.optional(schema.with_default(default,
1328+
type_=schema.enum(*valid_values))),
1329+
description=fd('''
1330+
If 0, the client is responsible for honoring the waiting period
1331+
specified in the `Retry-After` response header. If 1, the server
1332+
will delay the response in order to consume as much of that waiting
1333+
period as possible. This parameter should only be set to 1 by
1334+
clients who can't honor the `Retry-After` header, preventing them
1335+
from quickly exhausting the maximum number of redirects. If the
1336+
server cannot wait the full amount, any amount of wait time left
1337+
will still be returned in the `Retry-After` header of the response.
1338+
''')
1339+
)
1340+
1341+
13161342
post_manifest_example_url = (
13171343
f'{app.base_url}/manifest/files'
13181344
f'?catalog={list(config.catalogs.keys())[0]}'
@@ -1347,7 +1373,8 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
13471373
'parameters': [
13481374
params.path('token', str, description=fd('''
13491375
An opaque string representing the manifest preparation job
1350-
'''))
1376+
''')),
1377+
*([] if fetch else [wait_parameter_spec(default=0)])
13511378
]
13521379
},
13531380
method_spec={
@@ -1465,6 +1492,7 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
14651492
'''),
14661493
'parameters': [
14671494
catalog_param_spec,
1495+
*([wait_parameter_spec(default=1)] if curl else []),
14681496
filters_param_spec,
14691497
params.query(
14701498
'format',
@@ -1660,24 +1688,32 @@ def _file_manifest(fetch: bool, token_or_key: Optional[str] = None):
16601688
and request.headers.get('content-type') == 'application/x-www-form-urlencoded'
16611689
and request.raw_body != b''
16621690
):
1663-
raise BRE('The body must be empty for a POST request of content-type '
1664-
'`application/x-www-form-urlencoded` to this endpoint')
1691+
raise BRE('POST requests to this endpoint must have an empty body if '
1692+
'they specify a `Content-Type` header of '
1693+
'`application/x-www-form-urlencoded`')
16651694
query_params = request.query_params or {}
16661695
_hoist_parameters(query_params, request)
16671696
if token_or_key is None:
16681697
query_params.setdefault('filters', '{}')
1698+
if post:
1699+
query_params.setdefault('wait', '1')
16691700
# We list the `catalog` validator first so that the catalog is validated
16701701
# before any other potentially catalog-dependent validators are invoked
16711702
validate_params(query_params,
16721703
catalog=validate_catalog,
16731704
format=validate_manifest_format,
1674-
filters=validate_filters)
1705+
filters=validate_filters,
1706+
**({'wait': validate_wait} if post else {}))
16751707
# Now that the catalog is valid, we can provide the default format that
16761708
# depends on it
16771709
default_format = app.metadata_plugin.manifest_formats[0].value
16781710
query_params.setdefault('format', default_format)
16791711
else:
1680-
validate_params(query_params)
1712+
validate_params(query_params,
1713+
# If the initial request was a POST to the non-fetch
1714+
# endpoint, the 'wait' param will be carried over to
1715+
# each subsequent GET request to the non-fetch endpoint.
1716+
**({'wait': validate_wait} if not fetch else {}))
16811717
return app.manifest_controller.get_manifest_async(query_params=query_params,
16821718
token_or_key=token_or_key,
16831719
fetch=fetch,
@@ -1724,21 +1760,7 @@ def generate_manifest(event: AnyJSON, _context: LambdaContext):
17241760
made. If that fails, the UUID of the file will be used instead.
17251761
''')
17261762
),
1727-
params.query(
1728-
'wait',
1729-
schema.optional(schema.with_default(0)),
1730-
description=fd('''
1731-
If 0, the client is responsible for honoring the waiting period
1732-
specified in the Retry-After response header. If 1, the server
1733-
will delay the response in order to consume as much of that
1734-
waiting period as possible. This parameter should only be set to
1735-
1 by clients who can't honor the `Retry-After` header,
1736-
preventing them from quickly exhausting the maximum number of
1737-
redirects. If the server cannot wait the full amount, any amount
1738-
of wait time left will still be returned in the Retry-After
1739-
header of the response.
1740-
''')
1741-
),
1763+
wait_parameter_spec(default=0),
17421764
params.query(
17431765
'replica',
17441766
schema.optional(str),
@@ -1880,16 +1902,6 @@ def validate_replica(replica: str) -> None:
18801902
if replica not in ('aws', 'gcp'):
18811903
raise ValueError
18821904

1883-
def validate_wait(wait: Optional[str]) -> Optional[int]:
1884-
if wait is None:
1885-
return None
1886-
elif wait == '0':
1887-
return False
1888-
elif wait == '1':
1889-
return True
1890-
else:
1891-
raise ValueError
1892-
18931905
def validate_version(version: str) -> None:
18941906
# This function exists so the repository plugin can be lazily loaded
18951907
# instead of being loaded before `validate_params()` can run. This is

lambdas/service/openapi.json

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9631,6 +9631,21 @@
96319631
},
96329632
"description": "The name of the catalog to query."
96339633
},
9634+
{
9635+
"name": "wait",
9636+
"in": "query",
9637+
"required": false,
9638+
"schema": {
9639+
"type": "integer",
9640+
"format": "int64",
9641+
"enum": [
9642+
0,
9643+
1
9644+
],
9645+
"default": 1
9646+
},
9647+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
9648+
},
96349649
{
96359650
"name": "filters",
96369651
"in": "query",
@@ -10988,6 +11003,21 @@
1098811003
"type": "string"
1098911004
},
1099011005
"description": "\nAn opaque string representing the manifest preparation job\n"
11006+
},
11007+
{
11008+
"name": "wait",
11009+
"in": "query",
11010+
"required": false,
11011+
"schema": {
11012+
"type": "integer",
11013+
"format": "int64",
11014+
"enum": [
11015+
0,
11016+
1
11017+
],
11018+
"default": 0
11019+
},
11020+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1099111021
}
1099211022
],
1099311023
"get": {
@@ -12541,9 +12571,13 @@
1254112571
"schema": {
1254212572
"type": "integer",
1254312573
"format": "int64",
12574+
"enum": [
12575+
0,
12576+
1
12577+
],
1254412578
"default": 0
1254512579
},
12546-
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
12580+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1254712581
},
1254812582
{
1254912583
"name": "replica",
@@ -12677,9 +12711,13 @@
1267712711
"schema": {
1267812712
"type": "integer",
1267912713
"format": "int64",
12714+
"enum": [
12715+
0,
12716+
1
12717+
],
1268012718
"default": 0
1268112719
},
12682-
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
12720+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1268312721
},
1268412722
{
1268512723
"name": "replica",

src/azul/service/manifest_controller.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from collections.abc import (
22
Mapping,
33
)
4+
from math import (
5+
ceil,
6+
)
47
from typing import (
58
Optional,
69
TypedDict,
@@ -118,6 +121,7 @@ def get_manifest_async(self,
118121
query_params: Mapping[str, str],
119122
fetch: bool,
120123
authentication: Optional[Authentication]):
124+
wait = query_params.get('wait')
121125
if token_or_key is None:
122126
token, manifest_key = None, None
123127
else:
@@ -187,7 +191,9 @@ def get_manifest_async(self,
187191
body: dict[str, int | str | FlatJSON]
188192

189193
if manifest is None:
190-
url = self.manifest_url_func(fetch=fetch, token_or_key=token.encode())
194+
url = self.manifest_url_func(fetch=fetch,
195+
token_or_key=token.encode(),
196+
**({} if wait is None else {'wait': wait}))
191197
body = {
192198
'Status': 301,
193199
'Location': str(url),
@@ -225,6 +231,17 @@ def get_manifest_async(self,
225231
'CommandLine': self.service.command_lines(manifest, url, authentication)
226232
}
227233

234+
if wait is not None:
235+
if wait == '0':
236+
pass
237+
elif wait == '1':
238+
retry_after = body.get('Retry-After')
239+
if retry_after is not None:
240+
time_slept = self.server_side_sleep(float(retry_after))
241+
body['Retry-After'] = ceil(retry_after - time_slept)
242+
else:
243+
assert False, wait
244+
228245
# Note: Response objects returned without a 'Content-Type' header will
229246
# be given one of type 'application/json' as default by Chalice.
230247
# https://aws.github.io/chalice/tutorials/basicrestapi.html#customizing-the-http-response

test/integration_test.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -602,8 +602,9 @@ def _test_other_endpoints(self):
602602
def _test_manifest(self, catalog: CatalogName):
603603
supported_formats = self.metadata_plugin(catalog).manifest_formats
604604
assert supported_formats
605-
for curl, format in chain(
606-
product([False, True], [None, *supported_formats])
605+
for curl, wait, format in chain(
606+
product([False], [None], [None, *supported_formats]),
607+
product([True], [None, 0, 1], [None, *supported_formats])
607608
):
608609
filters = self._manifest_filters(catalog)
609610
if curl:
@@ -614,9 +615,10 @@ def _test_manifest(self, catalog: CatalogName):
614615
fetch_modes = [first_fetch, not first_fetch]
615616
for fetch in fetch_modes:
616617
with self.subTest('manifest', catalog=catalog, format=format,
617-
fetch=fetch, curl=curl):
618+
fetch=fetch, curl=curl, wait=wait):
618619
args = dict(catalog=catalog,
619-
filters=json.dumps(filters))
620+
filters=json.dumps(filters),
621+
**({} if wait is None else {'wait': wait}))
620622
if format is None:
621623
format = first(supported_formats)
622624
else:
@@ -892,6 +894,10 @@ def _get_url_content(self, method: str, url: furl) -> bytes:
892894
retry_after = response.headers.get('Retry-After')
893895
if retry_after is not None:
894896
retry_after = float(retry_after)
897+
if url.args.get('wait') == 1:
898+
# The wait should have happened server-side and been
899+
# subtracted from the retry-after that was returned.
900+
self.assertEqual(0.0, retry_after)
895901
log.info('Sleeping %.3fs to honor Retry-After header', retry_after)
896902
time.sleep(retry_after)
897903
url = furl(response.headers['Location'])

0 commit comments

Comments
 (0)