Skip to content

Commit 3a7e6e4

Browse files
committed
Turn on checksum validation for CRT S3 client
For uploads, the CRT S3 client will add CRC32 trailing checksums. For downloads, the CRT S3 client will validate checksums associated to the object when possible.
1 parent 90ecbeb commit 3a7e6e4

File tree

3 files changed

+100
-5
lines changed

3 files changed

+100
-5
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "enhancement",
3+
"category": "``s3``",
4+
"description": "Automatically configure CRC32 checksums for uploads and checksum validation for downloads through the CRT transfer client."
5+
}

awscli/s3transfer/crt.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
206206
extra_args = {}
207207
if subscribers is None:
208208
subscribers = {}
209+
self._validate_checksum_algorithm_supported(extra_args)
209210
callargs = CallArgs(
210211
bucket=bucket,
211212
key=key,
@@ -231,6 +232,17 @@ def delete(self, bucket, key, extra_args=None, subscribers=None):
231232
def shutdown(self, cancel=False):
232233
self._shutdown(cancel)
233234

235+
def _validate_checksum_algorithm_supported(self, extra_args):
236+
checksum_algorithm = extra_args.get('ChecksumAlgorithm')
237+
if checksum_algorithm is None:
238+
return
239+
if checksum_algorithm not in awscrt.s3.S3ChecksumAlgorithm.__members__:
240+
raise ValueError(
241+
f'ChecksumAlgorithm: {checksum_algorithm} not supported. '
242+
f'Supported algorithms are: '
243+
f'{list(awscrt.s3.S3ChecksumAlgorithm.__members__)}'
244+
)
245+
234246
def _cancel_transfers(self):
235247
for coordinator in self._future_coordinators:
236248
if not coordinator.done():
@@ -623,11 +635,17 @@ def _get_make_request_args_put_object(
623635
else:
624636
call_args.extra_args["Body"] = call_args.fileobj
625637

638+
checksum_algorithm = call_args.extra_args.pop(
639+
'ChecksumAlgorithm', 'CRC32'
640+
)
641+
checksum_config = awscrt.s3.S3ChecksumConfig(
642+
algorithm=awscrt.s3.S3ChecksumAlgorithm[checksum_algorithm],
643+
location=awscrt.s3.S3ChecksumLocation.TRAILER,
644+
)
626645
# Suppress botocore's automatic MD5 calculation by setting an override
627646
# value that will get deleted in the BotocoreCRTRequestSerializer.
628-
# The CRT S3 client is able automatically compute checksums as part of
629-
# requests it makes, and the intention is to configure automatic
630-
# checksums in a future update.
647+
# As part of the CRT S3 request, we request the CRT S3 client to
648+
# automatically add trailing checksums to its uploads.
631649
call_args.extra_args["ContentMD5"] = "override-to-be-removed"
632650

633651
make_request_args = self._default_get_make_request_args(
@@ -639,6 +657,7 @@ def _get_make_request_args_put_object(
639657
on_done_after_calls=on_done_after_calls,
640658
)
641659
make_request_args['send_filepath'] = send_filepath
660+
make_request_args['checksum_config'] = checksum_config
642661
return make_request_args
643662

644663
def _get_make_request_args_get_object(
@@ -652,6 +671,7 @@ def _get_make_request_args_get_object(
652671
):
653672
recv_filepath = None
654673
on_body = None
674+
checksum_config = awscrt.s3.S3ChecksumConfig(validate_response=True)
655675
if isinstance(call_args.fileobj, str):
656676
final_filepath = call_args.fileobj
657677
recv_filepath = self._os_utils.get_temp_filename(final_filepath)
@@ -673,6 +693,7 @@ def _get_make_request_args_get_object(
673693
)
674694
make_request_args['recv_filepath'] = recv_filepath
675695
make_request_args['on_body'] = on_body
696+
make_request_args['checksum_config'] = checksum_config
676697
return make_request_args
677698

678699
def _default_get_make_request_args(

tests/functional/s3transfer/test_crt.py

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,10 @@ def _assert_expected_crt_http_request(
129129
)
130130
if expected_missing_headers is not None:
131131
header_names = [
132-
header[0] for header in crt_http_request.headers
132+
header[0].lower() for header in crt_http_request.headers
133133
]
134134
for expected_missing_header in expected_missing_headers:
135-
self.assertNotIn(expected_missing_header, header_names)
135+
self.assertNotIn(expected_missing_header.lower(), header_names)
136136

137137
def _assert_subscribers_called(self, expected_future=None):
138138
self.assertTrue(self.record_subscriber.on_queued_called)
@@ -145,6 +145,21 @@ def _assert_subscribers_called(self, expected_future=None):
145145
self.record_subscriber.on_done_future, expected_future
146146
)
147147

148+
def _get_expected_upload_checksum_config(self, **overrides):
149+
checksum_config_kwargs = {
150+
'algorithm': awscrt.s3.S3ChecksumAlgorithm.CRC32,
151+
'location': awscrt.s3.S3ChecksumLocation.TRAILER,
152+
}
153+
checksum_config_kwargs.update(overrides)
154+
return awscrt.s3.S3ChecksumConfig(**checksum_config_kwargs)
155+
156+
def _get_expected_download_checksum_config(self, **overrides):
157+
checksum_config_kwargs = {
158+
'validate_response': True,
159+
}
160+
checksum_config_kwargs.update(overrides)
161+
return awscrt.s3.S3ChecksumConfig(**checksum_config_kwargs)
162+
148163
def _invoke_done_callbacks(self, **kwargs):
149164
callargs = self.s3_crt_client.make_request.call_args
150165
callargs_kwargs = callargs[1]
@@ -182,6 +197,7 @@ def test_upload(self):
182197
'send_filepath': self.filename,
183198
'on_progress': mock.ANY,
184199
'on_done': mock.ANY,
200+
'checksum_config': self._get_expected_upload_checksum_config(),
185201
},
186202
)
187203
self._assert_expected_crt_http_request(
@@ -208,6 +224,7 @@ def test_upload_from_seekable_stream(self):
208224
'send_filepath': None,
209225
'on_progress': mock.ANY,
210226
'on_done': mock.ANY,
227+
'checksum_config': self._get_expected_upload_checksum_config(),
211228
},
212229
)
213230
self._assert_expected_crt_http_request(
@@ -239,6 +256,7 @@ def test_upload_from_nonseekable_stream(self):
239256
'send_filepath': None,
240257
'on_progress': mock.ANY,
241258
'on_done': mock.ANY,
259+
'checksum_config': self._get_expected_upload_checksum_config(),
242260
},
243261
)
244262
self._assert_expected_crt_http_request(
@@ -253,6 +271,54 @@ def test_upload_from_nonseekable_stream(self):
253271
)
254272
self._assert_subscribers_called(future)
255273

274+
def test_upload_override_checksum_algorithm(self):
275+
future = self.transfer_manager.upload(
276+
self.filename,
277+
self.bucket,
278+
self.key,
279+
{'ChecksumAlgorithm': 'CRC32C'},
280+
[self.record_subscriber],
281+
)
282+
future.result()
283+
284+
callargs_kwargs = self.s3_crt_client.make_request.call_args[1]
285+
self.assertEqual(
286+
callargs_kwargs,
287+
{
288+
'request': mock.ANY,
289+
'type': awscrt.s3.S3RequestType.PUT_OBJECT,
290+
'send_filepath': self.filename,
291+
'on_progress': mock.ANY,
292+
'on_done': mock.ANY,
293+
'checksum_config': self._get_expected_upload_checksum_config(
294+
algorithm=awscrt.s3.S3ChecksumAlgorithm.CRC32C
295+
),
296+
},
297+
)
298+
self._assert_expected_crt_http_request(
299+
callargs_kwargs["request"],
300+
expected_http_method='PUT',
301+
expected_content_length=len(self.expected_content),
302+
expected_missing_headers=[
303+
'Content-MD5',
304+
'x-amz-sdk-checksum-algorithm',
305+
'X-Amz-Trailer',
306+
],
307+
)
308+
self._assert_subscribers_called(future)
309+
310+
def test_upload_throws_error_for_unsupported_checksum(self):
311+
with self.assertRaisesRegex(
312+
ValueError, 'ChecksumAlgorithm: UNSUPPORTED not supported'
313+
):
314+
self.transfer_manager.upload(
315+
self.filename,
316+
self.bucket,
317+
self.key,
318+
{'ChecksumAlgorithm': 'UNSUPPORTED'},
319+
[self.record_subscriber],
320+
)
321+
256322
def test_download(self):
257323
future = self.transfer_manager.download(
258324
self.bucket, self.key, self.filename, {}, [self.record_subscriber]
@@ -269,6 +335,7 @@ def test_download(self):
269335
'on_progress': mock.ANY,
270336
'on_done': mock.ANY,
271337
'on_body': None,
338+
'checksum_config': self._get_expected_download_checksum_config(),
272339
},
273340
)
274341
# the recv_filepath will be set to a temporary file path with some
@@ -306,6 +373,7 @@ def test_download_to_seekable_stream(self):
306373
'on_progress': mock.ANY,
307374
'on_done': mock.ANY,
308375
'on_body': mock.ANY,
376+
'checksum_config': self._get_expected_download_checksum_config(),
309377
},
310378
)
311379
self._assert_expected_crt_http_request(
@@ -340,6 +408,7 @@ def test_download_to_nonseekable_stream(self):
340408
'on_progress': mock.ANY,
341409
'on_done': mock.ANY,
342410
'on_body': mock.ANY,
411+
'checksum_config': self._get_expected_download_checksum_config(),
343412
},
344413
)
345414
self._assert_expected_crt_http_request(

0 commit comments

Comments
 (0)