Skip to content

Commit c91b691

Browse files
authored
Merge pull request #8298 from kyleknap/v2-crt-checksums
[v2] Turn on checksum validation for CRT S3 client
2 parents 90ecbeb + 4245e18 commit c91b691

File tree

3 files changed

+136
-5
lines changed

3 files changed

+136
-5
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "enhancement",
3+
"category": "``s3``",
4+
"description": "Automatically configure CRC32 checksums for uploads and checksum validation for downloads through the CRT transfer client."
5+
}

awscli/s3transfer/crt.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
206206
extra_args = {}
207207
if subscribers is None:
208208
subscribers = {}
209+
self._validate_checksum_algorithm_supported(extra_args)
209210
callargs = CallArgs(
210211
bucket=bucket,
211212
key=key,
@@ -231,6 +232,17 @@ def delete(self, bucket, key, extra_args=None, subscribers=None):
231232
def shutdown(self, cancel=False):
232233
self._shutdown(cancel)
233234

235+
def _validate_checksum_algorithm_supported(self, extra_args):
236+
checksum_algorithm = extra_args.get('ChecksumAlgorithm')
237+
if checksum_algorithm is None:
238+
return
239+
supported_algorithms = list(awscrt.s3.S3ChecksumAlgorithm.__members__)
240+
if checksum_algorithm.upper() not in supported_algorithms:
241+
raise ValueError(
242+
f'ChecksumAlgorithm: {checksum_algorithm} not supported. '
243+
f'Supported algorithms are: {supported_algorithms}'
244+
)
245+
234246
def _cancel_transfers(self):
235247
for coordinator in self._future_coordinators:
236248
if not coordinator.done():
@@ -623,11 +635,17 @@ def _get_make_request_args_put_object(
623635
else:
624636
call_args.extra_args["Body"] = call_args.fileobj
625637

638+
checksum_algorithm = call_args.extra_args.pop(
639+
'ChecksumAlgorithm', 'CRC32'
640+
).upper()
641+
checksum_config = awscrt.s3.S3ChecksumConfig(
642+
algorithm=awscrt.s3.S3ChecksumAlgorithm[checksum_algorithm],
643+
location=awscrt.s3.S3ChecksumLocation.TRAILER,
644+
)
626645
# Suppress botocore's automatic MD5 calculation by setting an override
627646
# value that will get deleted in the BotocoreCRTRequestSerializer.
628-
# The CRT S3 client is able automatically compute checksums as part of
629-
# requests it makes, and the intention is to configure automatic
630-
# checksums in a future update.
647+
# As part of the CRT S3 request, we request the CRT S3 client to
648+
# automatically add trailing checksums to its uploads.
631649
call_args.extra_args["ContentMD5"] = "override-to-be-removed"
632650

633651
make_request_args = self._default_get_make_request_args(
@@ -639,6 +657,7 @@ def _get_make_request_args_put_object(
639657
on_done_after_calls=on_done_after_calls,
640658
)
641659
make_request_args['send_filepath'] = send_filepath
660+
make_request_args['checksum_config'] = checksum_config
642661
return make_request_args
643662

644663
def _get_make_request_args_get_object(
@@ -652,6 +671,7 @@ def _get_make_request_args_get_object(
652671
):
653672
recv_filepath = None
654673
on_body = None
674+
checksum_config = awscrt.s3.S3ChecksumConfig(validate_response=True)
655675
if isinstance(call_args.fileobj, str):
656676
final_filepath = call_args.fileobj
657677
recv_filepath = self._os_utils.get_temp_filename(final_filepath)
@@ -673,6 +693,7 @@ def _get_make_request_args_get_object(
673693
)
674694
make_request_args['recv_filepath'] = recv_filepath
675695
make_request_args['on_body'] = on_body
696+
make_request_args['checksum_config'] = checksum_config
676697
return make_request_args
677698

678699
def _default_get_make_request_args(

tests/functional/s3transfer/test_crt.py

Lines changed: 107 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,10 @@ def _assert_expected_crt_http_request(
129129
)
130130
if expected_missing_headers is not None:
131131
header_names = [
132-
header[0] for header in crt_http_request.headers
132+
header[0].lower() for header in crt_http_request.headers
133133
]
134134
for expected_missing_header in expected_missing_headers:
135-
self.assertNotIn(expected_missing_header, header_names)
135+
self.assertNotIn(expected_missing_header.lower(), header_names)
136136

137137
def _assert_subscribers_called(self, expected_future=None):
138138
self.assertTrue(self.record_subscriber.on_queued_called)
@@ -145,6 +145,21 @@ def _assert_subscribers_called(self, expected_future=None):
145145
self.record_subscriber.on_done_future, expected_future
146146
)
147147

148+
def _get_expected_upload_checksum_config(self, **overrides):
149+
checksum_config_kwargs = {
150+
'algorithm': awscrt.s3.S3ChecksumAlgorithm.CRC32,
151+
'location': awscrt.s3.S3ChecksumLocation.TRAILER,
152+
}
153+
checksum_config_kwargs.update(overrides)
154+
return awscrt.s3.S3ChecksumConfig(**checksum_config_kwargs)
155+
156+
def _get_expected_download_checksum_config(self, **overrides):
157+
checksum_config_kwargs = {
158+
'validate_response': True,
159+
}
160+
checksum_config_kwargs.update(overrides)
161+
return awscrt.s3.S3ChecksumConfig(**checksum_config_kwargs)
162+
148163
def _invoke_done_callbacks(self, **kwargs):
149164
callargs = self.s3_crt_client.make_request.call_args
150165
callargs_kwargs = callargs[1]
@@ -182,6 +197,7 @@ def test_upload(self):
182197
'send_filepath': self.filename,
183198
'on_progress': mock.ANY,
184199
'on_done': mock.ANY,
200+
'checksum_config': self._get_expected_upload_checksum_config(),
185201
},
186202
)
187203
self._assert_expected_crt_http_request(
@@ -208,6 +224,7 @@ def test_upload_from_seekable_stream(self):
208224
'send_filepath': None,
209225
'on_progress': mock.ANY,
210226
'on_done': mock.ANY,
227+
'checksum_config': self._get_expected_upload_checksum_config(),
211228
},
212229
)
213230
self._assert_expected_crt_http_request(
@@ -239,6 +256,7 @@ def test_upload_from_nonseekable_stream(self):
239256
'send_filepath': None,
240257
'on_progress': mock.ANY,
241258
'on_done': mock.ANY,
259+
'checksum_config': self._get_expected_upload_checksum_config(),
242260
},
243261
)
244262
self._assert_expected_crt_http_request(
@@ -253,6 +271,90 @@ def test_upload_from_nonseekable_stream(self):
253271
)
254272
self._assert_subscribers_called(future)
255273

274+
def test_upload_override_checksum_algorithm(self):
275+
future = self.transfer_manager.upload(
276+
self.filename,
277+
self.bucket,
278+
self.key,
279+
{'ChecksumAlgorithm': 'CRC32C'},
280+
[self.record_subscriber],
281+
)
282+
future.result()
283+
284+
callargs_kwargs = self.s3_crt_client.make_request.call_args[1]
285+
self.assertEqual(
286+
callargs_kwargs,
287+
{
288+
'request': mock.ANY,
289+
'type': awscrt.s3.S3RequestType.PUT_OBJECT,
290+
'send_filepath': self.filename,
291+
'on_progress': mock.ANY,
292+
'on_done': mock.ANY,
293+
'checksum_config': self._get_expected_upload_checksum_config(
294+
algorithm=awscrt.s3.S3ChecksumAlgorithm.CRC32C
295+
),
296+
},
297+
)
298+
self._assert_expected_crt_http_request(
299+
callargs_kwargs["request"],
300+
expected_http_method='PUT',
301+
expected_content_length=len(self.expected_content),
302+
expected_missing_headers=[
303+
'Content-MD5',
304+
'x-amz-sdk-checksum-algorithm',
305+
'X-Amz-Trailer',
306+
],
307+
)
308+
self._assert_subscribers_called(future)
309+
310+
def test_upload_override_checksum_algorithm_accepts_lowercase(self):
311+
future = self.transfer_manager.upload(
312+
self.filename,
313+
self.bucket,
314+
self.key,
315+
{'ChecksumAlgorithm': 'crc32c'},
316+
[self.record_subscriber],
317+
)
318+
future.result()
319+
320+
callargs_kwargs = self.s3_crt_client.make_request.call_args[1]
321+
self.assertEqual(
322+
callargs_kwargs,
323+
{
324+
'request': mock.ANY,
325+
'type': awscrt.s3.S3RequestType.PUT_OBJECT,
326+
'send_filepath': self.filename,
327+
'on_progress': mock.ANY,
328+
'on_done': mock.ANY,
329+
'checksum_config': self._get_expected_upload_checksum_config(
330+
algorithm=awscrt.s3.S3ChecksumAlgorithm.CRC32C
331+
),
332+
},
333+
)
334+
self._assert_expected_crt_http_request(
335+
callargs_kwargs["request"],
336+
expected_http_method='PUT',
337+
expected_content_length=len(self.expected_content),
338+
expected_missing_headers=[
339+
'Content-MD5',
340+
'x-amz-sdk-checksum-algorithm',
341+
'X-Amz-Trailer',
342+
],
343+
)
344+
self._assert_subscribers_called(future)
345+
346+
def test_upload_throws_error_for_unsupported_checksum(self):
347+
with self.assertRaisesRegex(
348+
ValueError, 'ChecksumAlgorithm: UNSUPPORTED not supported'
349+
):
350+
self.transfer_manager.upload(
351+
self.filename,
352+
self.bucket,
353+
self.key,
354+
{'ChecksumAlgorithm': 'UNSUPPORTED'},
355+
[self.record_subscriber],
356+
)
357+
256358
def test_download(self):
257359
future = self.transfer_manager.download(
258360
self.bucket, self.key, self.filename, {}, [self.record_subscriber]
@@ -269,6 +371,7 @@ def test_download(self):
269371
'on_progress': mock.ANY,
270372
'on_done': mock.ANY,
271373
'on_body': None,
374+
'checksum_config': self._get_expected_download_checksum_config(),
272375
},
273376
)
274377
# the recv_filepath will be set to a temporary file path with some
@@ -306,6 +409,7 @@ def test_download_to_seekable_stream(self):
306409
'on_progress': mock.ANY,
307410
'on_done': mock.ANY,
308411
'on_body': mock.ANY,
412+
'checksum_config': self._get_expected_download_checksum_config(),
309413
},
310414
)
311415
self._assert_expected_crt_http_request(
@@ -340,6 +444,7 @@ def test_download_to_nonseekable_stream(self):
340444
'on_progress': mock.ANY,
341445
'on_done': mock.ANY,
342446
'on_body': mock.ANY,
447+
'checksum_config': self._get_expected_download_checksum_config(),
343448
},
344449
)
345450
self._assert_expected_crt_http_request(

0 commit comments

Comments
 (0)