Skip to content

Commit 9d53af3

Browse files
committed
don't regenerate the checksum
1 parent 1762f83 commit 9d53af3

File tree

7 files changed

+176
-92
lines changed

7 files changed

+176
-92
lines changed

include/aws/s3/private/s3_checksums.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,18 @@ struct aws_input_stream *aws_checksum_stream_new(
9393
* @param existing_stream The data to be chunkified prepended by information on the stream length followed by a final
9494
* chunk and a trailing chunk containing a checksum of the existing stream. Destroying the
9595
* chunk stream will destroy the existing stream.
96-
* @param checksum_output Optional argument, if provided the buffer will be initialized to the appropriate size and
96+
* @param checksum_buffer Required.
97+
* - Empty buffer, the buffer will be initialized to the appropriate size and
9798
* filled with the checksum result when calculated. Callers responsibility to cleanup.
99+
* - Otherwise, the buffer will be used directly.
100+
* Caller takes the ownership of the buffer, error or not.
98101
*/
99102
AWS_S3_API
100103
struct aws_input_stream *aws_chunk_stream_new(
101104
struct aws_allocator *allocator,
102105
struct aws_input_stream *existing_stream,
103106
enum aws_s3_checksum_algorithm algorithm,
104-
struct aws_byte_buf *checksum_output);
107+
struct aws_byte_buf *checksum_buffer);
105108

106109
/**
107110
* Get the size of the checksum output corresponding to the aws_s3_checksum_algorithm enum value.

include/aws/s3/private/s3_client_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ struct aws_s3_client_vtable {
174174
struct aws_http_stream *(*http_connection_make_request)(
175175
struct aws_http_connection *client_connection,
176176
const struct aws_http_make_request_options *options);
177+
178+
void (*after_prepare_upload_part_finish)(struct aws_s3_request *request);
177179
};
178180

179181
struct aws_s3_upload_part_timeout_stats {

include/aws/s3/private/s3_request_messages.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
5353
struct aws_byte_buf *byte_buf,
5454
struct aws_http_message *out_message,
5555
const struct checksum_config_storage *checksum_config,
56-
struct aws_byte_buf *out_checksum);
56+
struct aws_byte_buf *checksum_buf);
5757

5858
/* Create an HTTP request for an S3 Ranged Get Object Request, using the given request as a basis */
5959
AWS_S3_API
@@ -90,7 +90,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
9090
const struct aws_string *upload_id,
9191
bool should_compute_content_md5,
9292
const struct checksum_config_storage *checksum_config,
93-
struct aws_byte_buf *encoded_checksum_output);
93+
struct aws_byte_buf *encoded_checksum);
9494

9595
/* Create an HTTP request for an S3 UploadPartCopy request, using the original request as a basis.
9696
* If multipart is not needed, part number and upload_id can be 0 and NULL,

source/s3_auto_ranged_put.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,7 @@ static void s_s3_prepare_upload_part_on_read_done(void *user_data) {
11151115
static void s_s3_prepare_upload_part_finish(struct aws_s3_prepare_upload_part_job *part_prep, int error_code) {
11161116
struct aws_s3_request *request = part_prep->request;
11171117
struct aws_s3_meta_request *meta_request = request->meta_request;
1118+
struct aws_s3_client *client = meta_request->client;
11181119
struct aws_s3_auto_ranged_put *auto_ranged_put = meta_request->impl;
11191120

11201121
if (error_code != AWS_ERROR_SUCCESS) {
@@ -1141,8 +1142,10 @@ static void s_s3_prepare_upload_part_finish(struct aws_s3_prepare_upload_part_jo
11411142
aws_array_list_get_at(&auto_ranged_put->synced_data.part_list, &part, request->part_number - 1);
11421143
AWS_ASSERT(part != NULL);
11431144
checksum_buf = &part->checksum_base64;
1144-
/* Clean up the buffer in case of it's initialized before and retry happens. */
1145-
aws_byte_buf_clean_up(checksum_buf);
1145+
/* If checksum buf is not empty, it means either the part being retried or the part resumed from list parts.
1146+
* Keep reusing the old checksum in case of the request body in memory mangled */
1147+
AWS_ASSERT(
1148+
checksum_buf->len == 0 || request->num_times_prepared > 0 || auto_ranged_put->resume_token != NULL);
11461149
aws_s3_meta_request_unlock_synced_data(meta_request);
11471150
}
11481151
/* END CRITICAL SECTION */
@@ -1173,6 +1176,10 @@ static void s_s3_prepare_upload_part_finish(struct aws_s3_prepare_upload_part_jo
11731176
aws_future_http_message_set_result_by_move(part_prep->on_complete, &message);
11741177

11751178
on_done:
1179+
if (client->vtable->after_prepare_upload_part_finish) {
1180+
/* TEST ONLY, allow test to stub here. */
1181+
client->vtable->after_prepare_upload_part_finish(request);
1182+
}
11761183
AWS_FATAL_ASSERT(aws_future_http_message_is_done(part_prep->on_complete));
11771184
aws_future_bool_release(part_prep->asyncstep_read_part);
11781185
aws_future_http_message_release(part_prep->on_complete);

source/s3_chunk_stream.c

Lines changed: 43 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,15 @@ typedef int(set_stream_fn)(struct aws_chunk_stream *parent_stream);
2222
struct aws_chunk_stream {
2323
struct aws_input_stream base;
2424
struct aws_allocator *allocator;
25-
2625
/* aws_input_stream_byte_cursor provides our actual functionality */
2726
/* Pointing to the stream we read from */
2827
struct aws_input_stream *current_stream;
2928

30-
struct aws_input_stream *checksum_stream;
31-
struct aws_byte_buf checksum_result;
32-
struct aws_byte_buf *checksum_result_output;
29+
/* The passed-in buffer, owned by the caller. If passed-in buffer is empty
30+
* it will be created by the chunk stream, but still caller owns its lifetime. Error or not. */
31+
struct aws_byte_buf *checksum_buffer;
32+
33+
struct aws_input_stream *chunk_body_stream;
3334
struct aws_byte_buf pre_chunk_buffer;
3435
struct aws_byte_buf post_chunk_buffer;
3536
struct aws_byte_cursor checksum_header_name;
@@ -63,16 +64,12 @@ static int s_set_post_chunk_stream(struct aws_chunk_stream *parent_stream) {
6364
struct aws_byte_cursor post_trailer_cursor = aws_byte_cursor_from_string(s_post_trailer);
6465
struct aws_byte_cursor colon_cursor = aws_byte_cursor_from_string(s_colon);
6566

66-
if (parent_stream->checksum_result.len == 0) {
67+
if (parent_stream->checksum_buffer->len == 0) {
6768
AWS_LOGF_ERROR(AWS_LS_S3_META_REQUEST, "Failed to extract base64 encoded checksum of stream");
6869
return aws_raise_error(AWS_ERROR_S3_CHECKSUM_CALCULATION_FAILED);
6970
}
70-
struct aws_byte_cursor checksum_result_cursor = aws_byte_cursor_from_buf(&parent_stream->checksum_result);
71-
if (parent_stream->checksum_result_output &&
72-
aws_byte_buf_init_copy_from_cursor(
73-
parent_stream->checksum_result_output, parent_stream->allocator, checksum_result_cursor)) {
74-
return AWS_OP_ERR;
75-
}
71+
struct aws_byte_cursor checksum_result_cursor = aws_byte_cursor_from_buf(parent_stream->checksum_buffer);
72+
7673
if (aws_byte_buf_init(
7774
&parent_stream->post_chunk_buffer,
7875
parent_stream->allocator,
@@ -92,16 +89,15 @@ static int s_set_post_chunk_stream(struct aws_chunk_stream *parent_stream) {
9289
parent_stream->set_current_stream_fn = s_set_null_stream;
9390
return AWS_OP_SUCCESS;
9491
error:
95-
aws_byte_buf_clean_up(parent_stream->checksum_result_output);
9692
aws_byte_buf_clean_up(&parent_stream->post_chunk_buffer);
9793
return AWS_OP_ERR;
9894
}
9995

10096
static int s_set_chunk_stream(struct aws_chunk_stream *parent_stream) {
10197
aws_input_stream_release(parent_stream->current_stream);
102-
parent_stream->current_stream = parent_stream->checksum_stream;
98+
parent_stream->current_stream = parent_stream->chunk_body_stream;
10399
aws_byte_buf_clean_up(&parent_stream->pre_chunk_buffer);
104-
parent_stream->checksum_stream = NULL;
100+
parent_stream->chunk_body_stream = NULL;
105101
parent_stream->set_current_stream_fn = s_set_post_chunk_stream;
106102
return AWS_OP_SUCCESS;
107103
}
@@ -169,11 +165,10 @@ static void s_aws_input_chunk_stream_destroy(struct aws_chunk_stream *impl) {
169165
if (impl->current_stream) {
170166
aws_input_stream_release(impl->current_stream);
171167
}
172-
if (impl->checksum_stream) {
173-
aws_input_stream_release(impl->checksum_stream);
168+
if (impl->chunk_body_stream) {
169+
aws_input_stream_release(impl->chunk_body_stream);
174170
}
175171
aws_byte_buf_clean_up(&impl->pre_chunk_buffer);
176-
aws_byte_buf_clean_up(&impl->checksum_result);
177172
aws_byte_buf_clean_up(&impl->post_chunk_buffer);
178173
aws_mem_release(impl->allocator, impl);
179174
}
@@ -190,13 +185,17 @@ struct aws_input_stream *aws_chunk_stream_new(
190185
struct aws_allocator *allocator,
191186
struct aws_input_stream *existing_stream,
192187
enum aws_s3_checksum_algorithm algorithm,
193-
struct aws_byte_buf *checksum_output) {
188+
struct aws_byte_buf *checksum_buffer) {
189+
AWS_PRECONDITION(allocator);
190+
AWS_PRECONDITION(existing_stream);
191+
AWS_PRECONDITION(checksum_buffer);
194192

195193
struct aws_chunk_stream *impl = aws_mem_calloc(allocator, 1, sizeof(struct aws_chunk_stream));
196194

197195
impl->allocator = allocator;
198196
impl->base.vtable = &s_aws_input_chunk_stream_vtable;
199-
impl->checksum_result_output = checksum_output;
197+
impl->checksum_buffer = checksum_buffer;
198+
200199
int64_t stream_length = 0;
201200
int64_t final_chunk_len = 0;
202201
if (aws_input_stream_get_length(existing_stream, &stream_length)) {
@@ -225,15 +224,29 @@ struct aws_input_stream *aws_chunk_stream_new(
225224
if (aws_base64_compute_encoded_len(checksum_len, &encoded_checksum_len)) {
226225
goto error;
227226
}
228-
if (aws_byte_buf_init(&impl->checksum_result, allocator, encoded_checksum_len)) {
229-
goto error;
230-
}
231-
232-
impl->checksum_stream = aws_checksum_stream_new(allocator, existing_stream, algorithm, &impl->checksum_result);
233-
if (impl->checksum_stream == NULL) {
234-
goto error;
227+
if (checksum_buffer->len == 0) {
228+
/* Empty passed-in buffer, calculate the checksum during reading from the stream. */
229+
if (aws_byte_buf_init(impl->checksum_buffer, allocator, encoded_checksum_len)) {
230+
goto error;
231+
}
232+
/* Wrap the existing stream with checksum stream to calculate the checksum when reading from it. */
233+
impl->chunk_body_stream = aws_checksum_stream_new(allocator, existing_stream, algorithm, impl->checksum_buffer);
234+
if (impl->chunk_body_stream == NULL) {
235+
goto error;
236+
}
237+
} else {
238+
if (checksum_buffer->len != encoded_checksum_len) {
239+
AWS_LOGF_ERROR(
240+
AWS_LS_S3_GENERAL,
241+
"Mismatched checksum buffer and algorithm. checksum_buf->len is %zu, but encoded_checksum_len is %zu",
242+
checksum_buffer->len,
243+
encoded_checksum_len);
244+
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
245+
goto error;
246+
}
247+
/* No need to calculate the checksum during read, use the existing stream directly. */
248+
impl->chunk_body_stream = aws_input_stream_acquire(existing_stream);
235249
}
236-
237250
int64_t prechunk_stream_len = 0;
238251
int64_t colon_len = s_colon->len;
239252
int64_t post_trailer_len = s_post_trailer->len;
@@ -248,9 +261,9 @@ struct aws_input_stream *aws_chunk_stream_new(
248261
}
249262
impl->set_current_stream_fn = s_set_chunk_stream;
250263
} else {
251-
impl->current_stream = impl->checksum_stream;
264+
impl->current_stream = impl->chunk_body_stream;
252265
final_chunk_len = s_empty_chunk->len;
253-
impl->checksum_stream = NULL;
266+
impl->chunk_body_stream = NULL;
254267
impl->set_current_stream_fn = s_set_post_chunk_stream;
255268
}
256269

@@ -268,10 +281,9 @@ struct aws_input_stream *aws_chunk_stream_new(
268281
return &impl->base;
269282

270283
error:
271-
aws_input_stream_release(impl->checksum_stream);
284+
aws_input_stream_release(impl->chunk_body_stream);
272285
aws_input_stream_release(impl->current_stream);
273286
aws_byte_buf_clean_up(&impl->pre_chunk_buffer);
274-
aws_byte_buf_clean_up(&impl->checksum_result);
275287
aws_mem_release(impl->allocator, impl);
276288
return NULL;
277289
}

source/s3_request_messages.c

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,10 @@ struct aws_http_message *aws_s3_create_multipart_upload_message_new(
352352
return NULL;
353353
}
354354

355-
/* Create a new put object request from an existing put object request. Currently just optionally adds part information
356-
* for a multipart upload. */
355+
/**
356+
* Create a new put object request from an existing put object request. Currently just optionally adds part information
357+
* for a multipart upload.
358+
**/
357359
struct aws_http_message *aws_s3_upload_part_message_new(
358360
struct aws_allocator *allocator,
359361
struct aws_http_message *base_message,
@@ -362,7 +364,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
362364
const struct aws_string *upload_id,
363365
bool should_compute_content_md5,
364366
const struct checksum_config_storage *checksum_config,
365-
struct aws_byte_buf *encoded_checksum_output) {
367+
struct aws_byte_buf *encoded_checksum) {
366368
AWS_PRECONDITION(allocator);
367369
AWS_PRECONDITION(base_message);
368370
AWS_PRECONDITION(part_number > 0);
@@ -383,7 +385,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
383385
goto error_clean_up;
384386
}
385387

386-
if (aws_s3_message_util_assign_body(allocator, buffer, message, checksum_config, encoded_checksum_output) == NULL) {
388+
if (aws_s3_message_util_assign_body(allocator, buffer, message, checksum_config, encoded_checksum) == NULL) {
387389
goto error_clean_up;
388390
}
389391

@@ -836,19 +838,30 @@ static int s_calculate_in_memory_checksum_helper(
836838
struct aws_allocator *allocator,
837839
struct aws_byte_cursor data,
838840
const struct checksum_config_storage *checksum_config,
839-
struct aws_byte_buf *out_checksum) {
841+
struct aws_byte_buf *checksum_buf) {
840842
AWS_ASSERT(checksum_config->checksum_algorithm != AWS_SCA_NONE);
841-
AWS_ASSERT(out_checksum != NULL);
842-
AWS_ZERO_STRUCT(*out_checksum);
843+
AWS_ASSERT(checksum_buf != NULL);
843844

844845
int ret_code = AWS_OP_ERR;
845846
size_t digest_size = aws_get_digest_size_from_checksum_algorithm(checksum_config->checksum_algorithm);
846847
size_t encoded_checksum_len = 0;
847848
if (aws_base64_compute_encoded_len(digest_size, &encoded_checksum_len)) {
848849
return AWS_OP_ERR;
849850
}
850-
851-
aws_byte_buf_init(out_checksum, allocator, encoded_checksum_len);
851+
if (checksum_buf->len > 0) {
852+
if (checksum_buf->len == encoded_checksum_len) {
853+
return AWS_OP_SUCCESS;
854+
} else {
855+
AWS_LOGF_ERROR(
856+
AWS_LS_S3_GENERAL,
857+
"Mismatched checksum buffer and algorithm. checksum_buf->len is %zu, but encoded_checksum_len is %zu",
858+
checksum_buf->len,
859+
encoded_checksum_len);
860+
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
861+
return AWS_OP_ERR;
862+
}
863+
}
864+
aws_byte_buf_init(checksum_buf, allocator, encoded_checksum_len);
852865

853866
struct aws_byte_buf raw_checksum;
854867
aws_byte_buf_init(&raw_checksum, allocator, digest_size);
@@ -857,14 +870,14 @@ static int s_calculate_in_memory_checksum_helper(
857870
goto done;
858871
}
859872
struct aws_byte_cursor raw_checksum_cursor = aws_byte_cursor_from_buf(&raw_checksum);
860-
if (aws_base64_encode(&raw_checksum_cursor, out_checksum)) {
873+
if (aws_base64_encode(&raw_checksum_cursor, checksum_buf)) {
861874
goto done;
862875
}
863876

864877
ret_code = AWS_OP_SUCCESS;
865878
done:
866879
if (ret_code) {
867-
aws_byte_buf_clean_up(out_checksum);
880+
aws_byte_buf_clean_up(checksum_buf);
868881
}
869882
aws_byte_buf_clean_up(&raw_checksum);
870883
return ret_code;
@@ -880,19 +893,19 @@ static int s_calculate_and_add_checksum_to_header_helper(
880893
struct aws_byte_cursor data,
881894
const struct checksum_config_storage *checksum_config,
882895
struct aws_http_message *out_message,
883-
struct aws_byte_buf *out_checksum) {
896+
struct aws_byte_buf *checksum_buf) {
884897
AWS_ASSERT(checksum_config->checksum_algorithm != AWS_SCA_NONE);
885898
AWS_ASSERT(out_message != NULL);
886899
int ret_code = AWS_OP_ERR;
887900

888901
struct aws_byte_buf local_encoded_checksum_buf;
902+
AWS_ZERO_STRUCT(local_encoded_checksum_buf);
889903
struct aws_byte_buf *local_encoded_checksum;
890-
if (out_checksum == NULL) {
904+
if (checksum_buf == NULL) {
891905
local_encoded_checksum = &local_encoded_checksum_buf;
892906
} else {
893-
local_encoded_checksum = out_checksum;
907+
local_encoded_checksum = checksum_buf;
894908
}
895-
AWS_ZERO_STRUCT(*local_encoded_checksum);
896909
if (s_calculate_in_memory_checksum_helper(allocator, data, checksum_config, local_encoded_checksum)) {
897910
goto done;
898911
}
@@ -908,8 +921,8 @@ static int s_calculate_and_add_checksum_to_header_helper(
908921

909922
ret_code = AWS_OP_SUCCESS;
910923
done:
911-
if (ret_code || out_checksum == NULL) {
912-
/* In case of error happen or out_checksum is not set, clean up the encoded checksum. Otherwise, the caller will
924+
if (ret_code || checksum_buf == NULL) {
925+
/* In case of error happen or checksum_buf is not set, clean up the encoded checksum. Otherwise, the caller will
913926
* own the encoded checksum. */
914927
aws_byte_buf_clean_up(local_encoded_checksum);
915928
}
@@ -922,7 +935,7 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
922935
struct aws_byte_buf *byte_buf,
923936
struct aws_http_message *out_message,
924937
const struct checksum_config_storage *checksum_config,
925-
struct aws_byte_buf *out_checksum) {
938+
struct aws_byte_buf *checksum_buf) {
926939
AWS_PRECONDITION(allocator);
927940
AWS_PRECONDITION(out_message);
928941
AWS_PRECONDITION(byte_buf);
@@ -992,7 +1005,7 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
9921005
}
9931006
/* set input stream to chunk stream */
9941007
struct aws_input_stream *chunk_stream =
995-
aws_chunk_stream_new(allocator, input_stream, checksum_config->checksum_algorithm, out_checksum);
1008+
aws_chunk_stream_new(allocator, input_stream, checksum_config->checksum_algorithm, checksum_buf);
9961009
if (!chunk_stream) {
9971010
goto error_clean_up;
9981011
}
@@ -1001,14 +1014,14 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
10011014
} else if (checksum_config->location == AWS_SCL_HEADER) {
10021015
/* Calculate the checksum directly from memory and add it to the header. */
10031016
if (s_calculate_and_add_checksum_to_header_helper(
1004-
allocator, buffer_byte_cursor, checksum_config, out_message, out_checksum)) {
1017+
allocator, buffer_byte_cursor, checksum_config, out_message, checksum_buf)) {
10051018
goto error_clean_up;
10061019
}
10071020

1008-
} else if (checksum_config->checksum_algorithm != AWS_SCA_NONE && out_checksum != NULL) {
1009-
/* In case checksums still wanted, and we can calculate it directly from the buffer in memory to
1010-
* out_checksum */
1011-
if (s_calculate_in_memory_checksum_helper(allocator, buffer_byte_cursor, checksum_config, out_checksum)) {
1021+
} else if (checksum_config->checksum_algorithm != AWS_SCA_NONE && checksum_buf != NULL) {
1022+
/* In case checksums still wanted, but not sent to the server, and we can calculate it directly from the
1023+
* buffer in memory to out_checksum */
1024+
if (s_calculate_in_memory_checksum_helper(allocator, buffer_byte_cursor, checksum_config, checksum_buf)) {
10121025
goto error_clean_up;
10131026
}
10141027
}

0 commit comments

Comments
 (0)