Skip to content

Commit 337155f

Browse files
TingDaoKgraebm
andauthored
Support full object checksum (#468)
Co-authored-by: Michael Graeb <graebm@amazon.com>
1 parent 9c1bd19 commit 337155f

26 files changed

Lines changed: 730 additions & 472 deletions

include/aws/s3/private/s3_checksums.h

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,45 @@
1111

1212
struct aws_s3_checksum;
1313

14+
/* List to check the checksum algorithm to use based on the priority. */
15+
static const enum aws_s3_checksum_algorithm s_checksum_algo_priority_list[] = {
16+
AWS_SCA_CRC64NVME,
17+
AWS_SCA_CRC32C,
18+
AWS_SCA_CRC32,
19+
AWS_SCA_SHA1,
20+
AWS_SCA_SHA256,
21+
};
22+
AWS_STATIC_ASSERT(AWS_ARRAY_SIZE(s_checksum_algo_priority_list) == (AWS_SCA_END - AWS_SCA_INIT + 1));
23+
1424
struct aws_checksum_vtable {
1525
void (*destroy)(struct aws_s3_checksum *checksum);
1626
int (*update)(struct aws_s3_checksum *checksum, const struct aws_byte_cursor *buf);
17-
int (*finalize)(struct aws_s3_checksum *checksum, struct aws_byte_buf *out, size_t truncate_to);
27+
int (*finalize)(struct aws_s3_checksum *checksum, struct aws_byte_buf *out);
1828
};
1929

2030
struct aws_s3_checksum {
2131
struct aws_allocator *allocator;
2232
struct aws_checksum_vtable *vtable;
23-
void *impl;
2433
size_t digest_size;
2534
enum aws_s3_checksum_algorithm algorithm;
2635
bool good;
36+
union {
37+
struct aws_hash *hash;
38+
uint32_t crc_val_32bit;
39+
uint64_t crc_val_64bit;
40+
} impl;
2741
};
2842

29-
struct checksum_config {
43+
struct checksum_config_storage {
44+
struct aws_allocator *allocator;
45+
struct aws_byte_buf full_object_checksum;
46+
bool has_full_object_checksum;
47+
3048
enum aws_s3_checksum_location location;
3149
enum aws_s3_checksum_algorithm checksum_algorithm;
3250
bool validate_response_checksum;
3351
struct {
52+
bool crc64nvme;
3453
bool crc32c;
3554
bool crc32;
3655
bool sha1;
@@ -85,25 +104,26 @@ struct aws_input_stream *aws_chunk_stream_new(
85104
* Get the size of the checksum output corresponding to the aws_s3_checksum_algorithm enum value.
86105
*/
87106
AWS_S3_API
88-
size_t aws_get_digest_size_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
107+
size_t aws_get_digest_size_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);
89108

90109
/**
91-
* Get the header name corresponding to the aws_s3_checksum_algorithm enum value.
110+
* Get header name to use for algorithm (e.g. "x-amz-checksum-crc32")
92111
*/
93112
AWS_S3_API
94-
const struct aws_byte_cursor *aws_get_http_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
113+
struct aws_byte_cursor aws_get_http_header_name_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);
95114

96115
/**
97-
* Get the multipart upload header name corresponding to the aws_s3_checksum_algorithm enum value.
116+
* Get algorithm's name (e.g. "CRC32"), to be used as the value of headers like `x-amz-checksum-algorithm`
98117
*/
99118
AWS_S3_API
100-
const struct aws_byte_cursor *aws_get_create_mpu_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
119+
struct aws_byte_cursor aws_get_checksum_algorithm_name(enum aws_s3_checksum_algorithm algorithm);
101120

102121
/**
103-
* Get the complete multipart upload name corresponding to the aws_s3_checksum_algorithm enum value.
122+
* Get the name of checksum algorithm to be used as the details of the parts were uploaded. Referring to
123+
* https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompletedPart.html#AmazonS3-Type-CompletedPart
104124
*/
105125
AWS_S3_API
106-
const struct aws_byte_cursor *aws_get_complete_mpu_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
126+
struct aws_byte_cursor aws_get_completed_part_name_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);
107127

108128
/**
109129
* create a new aws_checksum corresponding to the aws_s3_checksum_algorithm enum value.
@@ -121,8 +141,7 @@ int aws_checksum_compute(
121141
struct aws_allocator *allocator,
122142
enum aws_s3_checksum_algorithm algorithm,
123143
const struct aws_byte_cursor *input,
124-
struct aws_byte_buf *output,
125-
size_t truncate_to);
144+
struct aws_byte_buf *output);
126145

127146
/**
128147
* Cleans up and deallocates checksum.
@@ -141,9 +160,15 @@ int aws_checksum_update(struct aws_s3_checksum *checksum, const struct aws_byte_
141160
* Allocation of output is the caller's responsibility.
142161
*/
143162
AWS_S3_API
144-
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output, size_t truncate_to);
163+
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output);
164+
165+
AWS_S3_API
166+
void aws_checksum_config_storage_init(
167+
struct aws_allocator *allocator,
168+
struct checksum_config_storage *internal_config,
169+
const struct aws_s3_checksum_config *config);
145170

146171
AWS_S3_API
147-
void checksum_config_init(struct checksum_config *internal_config, const struct aws_s3_checksum_config *config);
172+
void aws_checksum_config_storage_cleanup(struct checksum_config_storage *internal_config);
148173

149174
#endif /* AWS_S3_CHECKSUMS_H */

include/aws/s3/private/s3_meta_request_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ struct aws_s3_meta_request {
272272
const bool should_compute_content_md5;
273273

274274
/* deep copy of the checksum config. */
275-
struct checksum_config checksum_config;
275+
struct checksum_config_storage checksum_config;
276276

277277
/* checksum found in either a default get request, or in the initial head request of a multipart get */
278278
struct aws_byte_buf meta_request_level_response_header_checksum;

include/aws/s3/private/s3_request_messages.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ struct aws_byte_buf;
1717
struct aws_byte_cursor;
1818
struct aws_string;
1919
struct aws_array_list;
20-
struct checksum_config;
20+
struct checksum_config_storage;
2121

2222
AWS_EXTERN_C_BEGIN
2323

@@ -52,13 +52,9 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
5252
struct aws_allocator *allocator,
5353
struct aws_byte_buf *byte_buf,
5454
struct aws_http_message *out_message,
55-
const struct checksum_config *checksum_config,
55+
const struct checksum_config_storage *checksum_config,
5656
struct aws_byte_buf *out_checksum);
5757

58-
/* Return true if checksum headers has been set. */
59-
AWS_S3_API
60-
bool aws_s3_message_util_check_checksum_header(struct aws_http_message *message);
61-
6258
/* Create an HTTP request for an S3 Ranged Get Object Request, using the given request as a basis */
6359
AWS_S3_API
6460
struct aws_http_message *aws_s3_ranged_get_object_message_new(
@@ -80,7 +76,7 @@ AWS_S3_API
8076
struct aws_http_message *aws_s3_create_multipart_upload_message_new(
8177
struct aws_allocator *allocator,
8278
struct aws_http_message *base_message,
83-
const struct checksum_config *checksum_config);
79+
const struct checksum_config_storage *checksum_config);
8480

8581
/* Create an HTTP request for an S3 Put Object request, using the original request as a basis. Creates and assigns a
8682
* body stream using the passed in buffer. If multipart is not needed, part number and upload_id can be 0 and NULL,
@@ -93,7 +89,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
9389
uint32_t part_number,
9490
const struct aws_string *upload_id,
9591
bool should_compute_content_md5,
96-
const struct checksum_config *checksum_config,
92+
const struct checksum_config_storage *checksum_config,
9793
struct aws_byte_buf *encoded_checksum_output);
9894

9995
/* Create an HTTP request for an S3 UploadPartCopy request, using the original request as a basis.
@@ -120,7 +116,7 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
120116
struct aws_byte_buf *body_buffer,
121117
const struct aws_string *upload_id,
122118
const struct aws_array_list *parts,
123-
const struct checksum_config *checksum_config);
119+
const struct checksum_config_storage *checksum_config);
124120

125121
AWS_S3_API
126122
struct aws_http_message *aws_s3_abort_multipart_upload_message_new(

include/aws/s3/private/s3_util.h

Lines changed: 8 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -61,44 +61,17 @@ extern const struct aws_byte_cursor g_request_validation_mode;
6161
AWS_S3_API
6262
extern const struct aws_byte_cursor g_enabled;
6363

64+
/**
65+
* The checksum-algorithm header name used for CopyObject and CreateMultipartUpload
66+
*/
6467
AWS_S3_API
65-
extern const struct aws_byte_cursor g_create_mpu_checksum_header_name;
66-
67-
AWS_S3_API
68-
extern const struct aws_byte_cursor g_crc32c_header_name;
69-
70-
AWS_S3_API
71-
extern const struct aws_byte_cursor g_crc32_header_name;
72-
73-
AWS_S3_API
74-
extern const struct aws_byte_cursor g_sha1_header_name;
75-
76-
AWS_S3_API
77-
extern const struct aws_byte_cursor g_sha256_header_name;
78-
79-
AWS_S3_API
80-
extern const struct aws_byte_cursor g_crc32c_create_mpu_header_name;
81-
82-
AWS_S3_API
83-
extern const struct aws_byte_cursor g_crc32_create_mpu_header_name;
84-
85-
AWS_S3_API
86-
extern const struct aws_byte_cursor g_sha1_create_mpu_header_name;
87-
88-
AWS_S3_API
89-
extern const struct aws_byte_cursor g_sha256_create_mpu_header_name;
90-
91-
AWS_S3_API
92-
extern const struct aws_byte_cursor g_crc32c_complete_mpu_name;
93-
94-
AWS_S3_API
95-
extern const struct aws_byte_cursor g_crc32_complete_mpu_name;
96-
97-
AWS_S3_API
98-
extern const struct aws_byte_cursor g_sha1_complete_mpu_name;
68+
extern const struct aws_byte_cursor g_checksum_algorithm_header_name;
9969

70+
/**
71+
* The checksum-algorithm header name used for PutObject, UploadParts and PutObject*
72+
*/
10073
AWS_S3_API
101-
extern const struct aws_byte_cursor g_sha256_complete_mpu_name;
74+
extern const struct aws_byte_cursor g_sdk_checksum_algorithm_header_name;
10275

10376
AWS_S3_API
10477
extern const struct aws_byte_cursor g_s3_client_version;

include/aws/s3/s3_client.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ enum aws_s3_checksum_algorithm {
241241
AWS_SCA_CRC32,
242242
AWS_SCA_SHA1,
243243
AWS_SCA_SHA256,
244-
AWS_SCA_END = AWS_SCA_SHA256,
244+
AWS_SCA_CRC64NVME,
245+
AWS_SCA_END = AWS_SCA_CRC64NVME,
245246
};
246247

247248
enum aws_s3_checksum_location {
@@ -559,7 +560,7 @@ struct aws_s3_checksum_config {
559560
/**
560561
* The location of client added checksum header.
561562
*
562-
* If AWS_SCL_NONE. No request payload checksum will be calculated or added.
563+
* If AWS_SCL_NONE. No request payload checksum will be added.
563564
*
564565
* If AWS_SCL_HEADER, the client will calculate the checksum and add it to the headers.
565566
*
@@ -592,7 +593,7 @@ struct aws_s3_checksum_config {
592593
*
593594
* The list of algorithms for user to pick up when validate the checksum. Client will pick up the algorithm from the
594595
* list with the priority based on performance, and the algorithm sent by server. The priority based on performance
595-
* is [CRC32C, CRC32, SHA1, SHA256].
596+
* is [CRC64NVME, CRC32C, CRC32, SHA1, SHA256].
596597
*
597598
* If the response checksum was validated by client, the result will indicate which algorithm was picked.
598599
*/

source/s3_auto_ranged_put.c

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,71 @@ static struct aws_s3_meta_request_vtable s_s3_auto_ranged_put_vtable = {
306306
.pause = s_s3_auto_ranged_put_pause,
307307
};
308308

309+
static int s_init_and_verify_checksum_config_from_headers(
310+
struct checksum_config_storage *checksum_config,
311+
const struct aws_http_message *message,
312+
const void *log_id) {
313+
/* Check if the checksum header was set from the message */
314+
struct aws_http_headers *headers = aws_http_message_get_headers(message);
315+
enum aws_s3_checksum_algorithm header_algo = AWS_SCA_NONE;
316+
struct aws_byte_cursor header_value;
317+
AWS_ZERO_STRUCT(header_value);
318+
319+
for (size_t i = 0; i < AWS_ARRAY_SIZE(s_checksum_algo_priority_list); i++) {
320+
enum aws_s3_checksum_algorithm algorithm = s_checksum_algo_priority_list[i];
321+
const struct aws_byte_cursor algorithm_header_name =
322+
aws_get_http_header_name_from_checksum_algorithm(algorithm);
323+
if (aws_http_headers_get(headers, algorithm_header_name, &header_value) == AWS_OP_SUCCESS) {
324+
if (header_algo == AWS_SCA_NONE) {
325+
header_algo = algorithm;
326+
} else {
327+
/* If there are multiple checksum headers set, it's malformed request */
328+
AWS_LOGF_ERROR(
329+
AWS_LS_S3_META_REQUEST,
330+
"id=%p Could not create auto-ranged-put meta request; multiple checksum headers has been set",
331+
log_id);
332+
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
333+
}
334+
}
335+
}
336+
if (header_algo == AWS_SCA_NONE) {
337+
/* No checksum header found, done */
338+
return AWS_OP_SUCCESS;
339+
}
340+
341+
/* Found the full object checksum from the header, check if it matches the explicit setting from config */
342+
if (checksum_config->checksum_algorithm != AWS_SCA_NONE && checksum_config->checksum_algorithm != header_algo) {
343+
AWS_LOGF_ERROR(
344+
AWS_LS_S3_META_REQUEST,
345+
"id=%p Could not create auto-ranged-put meta request; checksum config mismatch the checksum from header.",
346+
log_id);
347+
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
348+
}
349+
AWS_ASSERT(!checksum_config->has_full_object_checksum);
350+
351+
AWS_LOGF_DEBUG(
352+
AWS_LS_S3_META_REQUEST,
353+
"id=%p Setting the full-object checksum from header; algorithm: " PRInSTR ", value: " PRInSTR ".",
354+
log_id,
355+
AWS_BYTE_CURSOR_PRI(aws_get_checksum_algorithm_name(header_algo)),
356+
AWS_BYTE_CURSOR_PRI(header_value));
357+
/* Set algo */
358+
checksum_config->checksum_algorithm = header_algo;
359+
if (checksum_config->location == AWS_SCL_NONE) {
360+
/* Set the checksum location to trailer for the parts, complete MPU will still have the checksum in the header.
361+
* But to keep the data integrity for the parts, we need to set the checksum location to trailer to send the
362+
* parts level checksums.
363+
*/
364+
checksum_config->location = AWS_SCL_TRAILER;
365+
}
366+
367+
/* Set full object checksum from the header value. */
368+
aws_byte_buf_init_copy_from_cursor(
369+
&checksum_config->full_object_checksum, checksum_config->allocator, header_value);
370+
checksum_config->has_full_object_checksum = true;
371+
return AWS_OP_SUCCESS;
372+
}
373+
309374
/* Allocate a new auto-ranged put meta request */
310375
struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_put_new(
311376
struct aws_allocator *allocator,
@@ -363,6 +428,11 @@ struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_put_new(
363428
goto error_clean_up;
364429
}
365430

431+
if (s_init_and_verify_checksum_config_from_headers(
432+
&auto_ranged_put->base.checksum_config, options->message, (void *)&auto_ranged_put->base)) {
433+
goto error_clean_up;
434+
}
435+
366436
AWS_LOGF_DEBUG(
367437
AWS_LS_S3_META_REQUEST, "id=%p Created new Auto-Ranged Put Meta Request.", (void *)&auto_ranged_put->base);
368438

@@ -767,7 +837,7 @@ static int s_verify_part_matches_checksum(
767837
}
768838

769839
struct aws_byte_buf checksum;
770-
if (aws_byte_buf_init(&checksum, allocator, aws_get_digest_size_from_algorithm(algorithm))) {
840+
if (aws_byte_buf_init(&checksum, allocator, aws_get_digest_size_from_checksum_algorithm(algorithm))) {
771841
return AWS_OP_ERR;
772842
}
773843

@@ -776,14 +846,14 @@ static int s_verify_part_matches_checksum(
776846
int return_status = AWS_OP_SUCCESS;
777847

778848
size_t encoded_len = 0;
779-
if (aws_base64_compute_encoded_len(aws_get_digest_size_from_algorithm(algorithm), &encoded_len)) {
849+
if (aws_base64_compute_encoded_len(aws_get_digest_size_from_checksum_algorithm(algorithm), &encoded_len)) {
780850
AWS_LOGF_ERROR(
781851
AWS_LS_S3_META_REQUEST, "Failed to resume upload. Unable to determine length of encoded checksum.");
782852
return_status = aws_raise_error(AWS_ERROR_S3_RESUME_FAILED);
783853
goto on_done;
784854
}
785855

786-
if (aws_checksum_compute(allocator, algorithm, &body_cur, &checksum, 0)) {
856+
if (aws_checksum_compute(allocator, algorithm, &body_cur, &checksum)) {
787857
AWS_LOGF_ERROR(
788858
AWS_LS_S3_META_REQUEST, "Failed to resume upload. Unable to compute checksum for the skipped part.");
789859
return_status = aws_raise_error(AWS_ERROR_S3_RESUME_FAILED);

source/s3_checksum_stream.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ static int s_finalize_checksum(struct aws_checksum_stream *impl) {
2424
return AWS_OP_SUCCESS;
2525
}
2626

27-
if (aws_checksum_finalize(impl->checksum, &impl->checksum_result, 0) != AWS_OP_SUCCESS) {
27+
if (aws_checksum_finalize(impl->checksum, &impl->checksum_result) != AWS_OP_SUCCESS) {
2828
AWS_LOGF_ERROR(
2929
AWS_LS_S3_CLIENT,
3030
"Failed to calculate checksum with error code %d (%s).",

0 commit comments

Comments
 (0)