Skip to content

Commit 36e2c37

Browse files
Revamp checksum - retry will reuse the checksum (#532)
Co-authored-by: Dmitriy Musatkin <63878209+DmitriyMusatkin@users.noreply.github.com>
1 parent 081eee8 commit 36e2c37

21 files changed

+763
-215
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#ifndef AWS_S3_CHECKSUM_CONTEXT_H
2+
#define AWS_S3_CHECKSUM_CONTEXT_H
3+
4+
/**
5+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
6+
* SPDX-License-Identifier: Apache-2.0
7+
*/
8+
9+
#include "aws/s3/s3_client.h"
10+
#include <aws/common/byte_buf.h>
11+
#include <aws/common/ref_count.h>
12+
13+
struct aws_s3_meta_request_checksum_config_storage;
14+
15+
AWS_EXTERN_C_BEGIN
16+
17+
/**
18+
* Upload request checksum context that encapsulates all checksum-related state and behavior
19+
* for individual upload part requests. This replaces the complex tri-state buffer logic
20+
* with a cleaner approach. Uses reference counting for lifetime management since context
21+
* is transferred between functions.
22+
*/
23+
struct aws_s3_upload_request_checksum_context {
24+
struct aws_allocator *allocator;
25+
struct aws_ref_count ref_count;
26+
27+
/* Configuration */
28+
enum aws_s3_checksum_algorithm algorithm;
29+
enum aws_s3_checksum_location location;
30+
31+
struct aws_byte_buf base64_checksum;
32+
/* The checksum already be calculated or not. */
33+
bool checksum_calculated;
34+
35+
/* Validation */
36+
size_t encoded_checksum_size;
37+
};
38+
39+
/**
40+
* Create a new upload request checksum context from configuration and buffer parameters.
41+
* This function encapsulates all the complex logic for determining buffer state.
42+
* Returns with reference count of 1.
43+
*
44+
* @param allocator Memory allocator
45+
* @param checksum_config Meta request level checksum configuration (can be NULL)
46+
* @return New checksum context or NULL on error
47+
*/
48+
AWS_S3_API
49+
struct aws_s3_upload_request_checksum_context *aws_s3_upload_request_checksum_context_new(
50+
struct aws_allocator *allocator,
51+
const struct aws_s3_meta_request_checksum_config_storage *checksum_config);
52+
53+
/**
54+
* Create a new upload request checksum context with an existing base64 encoded checksum value.
55+
* This is useful when resuming uploads or when the checksum is pre-calculated.
56+
* Returns with reference count of 1.
57+
*
58+
* @param allocator Memory allocator
59+
* @param checksum_config Meta request level checksum configuration (can be NULL)
60+
* @param existing_base64_checksum Pre-calculated checksum value as a byte cursor
61+
* @return New checksum context or NULL on error (e.g., if checksum size doesn't match algorithm)
62+
*/
63+
AWS_S3_API
64+
struct aws_s3_upload_request_checksum_context *aws_s3_upload_request_checksum_context_new_with_existing_base64_checksum(
65+
struct aws_allocator *allocator,
66+
const struct aws_s3_meta_request_checksum_config_storage *checksum_config,
67+
struct aws_byte_cursor existing_base64_checksum);
68+
69+
/**
70+
* Acquire a reference to the upload request checksum context.
71+
* Use this when transferring ownership to another function/structure.
72+
*
73+
* @param context The checksum context to acquire
74+
* @return The same context pointer (for convenience)
75+
*/
76+
AWS_S3_API
77+
struct aws_s3_upload_request_checksum_context *aws_s3_upload_request_checksum_context_acquire(
78+
struct aws_s3_upload_request_checksum_context *context);
79+
80+
/**
81+
* Release a reference to the upload request checksum context.
82+
* When the reference count reaches zero, the context will be destroyed.
83+
* Always returns NULL.
84+
*
85+
* @param context The checksum context to release
86+
*/
87+
AWS_S3_API
88+
struct aws_s3_upload_request_checksum_context *aws_s3_upload_request_checksum_context_release(
89+
struct aws_s3_upload_request_checksum_context *context);
90+
91+
/**
92+
* Check if checksum calculation is needed based on context state.
93+
* Returns true if the context has a valid algorithm and the checksum has not been calculated yet.
94+
*
95+
* @param context The checksum context to check
96+
* @return true if checksum calculation is needed, false otherwise
97+
*/
98+
AWS_S3_API
99+
bool aws_s3_upload_request_checksum_context_should_calculate(
100+
const struct aws_s3_upload_request_checksum_context *context);
101+
102+
/**
103+
* Check if checksum should be added to HTTP headers.
104+
* Returns true if the context has a valid algorithm and the location is set to header.
105+
*
106+
* @param context The checksum context to check
107+
* @return true if checksum should be added to headers, false otherwise
108+
*/
109+
AWS_S3_API
110+
bool aws_s3_upload_request_checksum_context_should_add_header(
111+
const struct aws_s3_upload_request_checksum_context *context);
112+
113+
/**
114+
* Check if checksum should be added as trailer (aws-chunked encoding).
115+
* Returns true if the context has a valid algorithm and the location is set to trailer.
116+
*
117+
* @param context The checksum context to check
118+
* @return true if checksum should be added as trailer, false otherwise
119+
*/
120+
AWS_S3_API
121+
bool aws_s3_upload_request_checksum_context_should_add_trailer(
122+
const struct aws_s3_upload_request_checksum_context *context);
123+
124+
/**
125+
* Get the checksum buffer to use for output.
126+
* Returns the internal buffer for storing the calculated checksum.
127+
*
128+
* @param context The checksum context
129+
* @return Pointer to the checksum buffer, or NULL if context is invalid
130+
*/
131+
AWS_S3_API
132+
struct aws_byte_buf *aws_s3_upload_request_checksum_context_get_output_buffer(
133+
struct aws_s3_upload_request_checksum_context *context);
134+
135+
/**
136+
* Get a cursor to the current base64 encoded checksum value (for use in headers/trailers).
137+
* Returns an empty cursor if the checksum has not been calculated yet.
138+
*
139+
* @param context The checksum context
140+
* @return Byte cursor to the calculated checksum, or empty cursor if not available
141+
*/
142+
AWS_S3_API
143+
struct aws_byte_cursor aws_s3_upload_request_checksum_context_get_checksum_cursor(
144+
const struct aws_s3_upload_request_checksum_context *context);
145+
146+
AWS_EXTERN_C_END
147+
148+
#endif /* AWS_S3_CHECKSUM_CONTEXT_H */

include/aws/s3/private/s3_checksums.h

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* aws-c-sdkutil. */
1111

1212
struct aws_s3_checksum;
13+
struct aws_s3_upload_request_checksum_context;
1314

1415
/* List to check the checksum algorithm to use based on the priority. */
1516
static const enum aws_s3_checksum_algorithm s_checksum_algo_priority_list[] = {
@@ -40,7 +41,7 @@ struct aws_s3_checksum {
4041
} impl;
4142
};
4243

43-
struct checksum_config_storage {
44+
struct aws_s3_meta_request_checksum_config_storage {
4445
struct aws_allocator *allocator;
4546
struct aws_byte_buf full_object_checksum;
4647
bool has_full_object_checksum;
@@ -83,25 +84,46 @@ struct aws_input_stream *aws_checksum_stream_new(
8384

8485
/**
8586
* TODO: properly support chunked encoding.
87+
* Creates a chunked encoding stream that wraps an existing stream and adds checksum trailers.
8688
*
87-
* A stream that takes in a stream, encodes it to aws_chunked. Computes a running checksum as it is read and add the
88-
* checksum as trailer at the end of the stream. All of the added bytes will be counted to the length of the stream.
89-
* Note: seek this stream will immediately fail, as it would prevent an accurate calculation of the
90-
* checksum.
89+
* This function creates a stream that:
90+
* 1. Encodes the input stream wraps the existing_stream with aws-chunked encoded.
91+
* 2. Calculates a checksum of the stream content (if not already calculated)
92+
* 3. Appends the checksum as a trailer at the end of the aws-chunked stream
9193
*
92-
* @param allocator
93-
* @param existing_stream The data to be chunkified prepended by information on the stream length followed by a final
94-
* chunk and a trailing chunk containing a checksum of the existing stream. Destroying the
95-
* chunk stream will destroy the existing stream.
96-
* @param checksum_output Optional argument, if provided the buffer will be initialized to the appropriate size and
97-
* filled with the checksum result when calculated. Callers responsibility to cleanup.
94+
* Note: This stream does not support seeking operations, as seeking would prevent
95+
* accurate checksum calculation and corrupt the chunked encoding format.
96+
*
97+
* @param allocator Memory allocator to use for stream creation and internal buffers
98+
* @param existing_stream The input stream to be chunked and checksummed. This stream
99+
* will be acquired by the chunk stream and released when the
100+
* chunk stream is destroyed. Must not be NULL.
101+
* @param checksum_context Context containing checksum configuration and state. Must not be NULL.
102+
* The context contains:
103+
* - algorithm: The checksum algorithm to use (CRC32, CRC32C, etc.)
104+
* - base64_checksum: Buffer for the calculated checksum result
105+
* - checksum_calculated: Whether checksum is pre-calculated or needs calculation
106+
* - encoded_checksum_size: Expected size of the base64-encoded checksum
107+
*
108+
* If checksum_calculated is false, the stream will wrap existing_stream
109+
* with a checksum stream to calculate the checksum during reading.
110+
* If checksum_calculated is true, the existing checksum value will be used.
111+
*
112+
* @return A new input stream that provides chunked encoding with checksum trailers,
113+
* or NULL if creation fails. The returned stream must be released with
114+
* aws_input_stream_release() when no longer needed.
115+
*
116+
* @note The total length of the returned stream includes:
117+
* - Chunk size header (hex representation + \r\n)
118+
* - Original stream content
119+
* - Final chunk marker (0\r\n or \r\n0\r\n)
120+
* - Checksum trailer (header name + : + base64 checksum + \r\n\r\n)
98121
*/
99122
AWS_S3_API
100123
struct aws_input_stream *aws_chunk_stream_new(
101124
struct aws_allocator *allocator,
102125
struct aws_input_stream *existing_stream,
103-
enum aws_s3_checksum_algorithm algorithm,
104-
struct aws_byte_buf *checksum_output);
126+
struct aws_s3_upload_request_checksum_context *context);
105127

106128
/**
107129
* Get the size of the checksum output corresponding to the aws_s3_checksum_algorithm enum value.
@@ -166,14 +188,15 @@ AWS_S3_API
166188
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output);
167189

168190
AWS_S3_API
169-
int aws_checksum_config_storage_init(
191+
int aws_s3_meta_request_checksum_config_storage_init(
170192
struct aws_allocator *allocator,
171-
struct checksum_config_storage *internal_config,
193+
struct aws_s3_meta_request_checksum_config_storage *internal_config,
172194
const struct aws_s3_checksum_config *config,
173195
const struct aws_http_message *message,
174196
const void *log_id);
175197

176198
AWS_S3_API
177-
void aws_checksum_config_storage_cleanup(struct checksum_config_storage *internal_config);
199+
void aws_s3_meta_request_checksum_config_storage_cleanup(
200+
struct aws_s3_meta_request_checksum_config_storage *internal_config);
178201

179202
#endif /* AWS_S3_CHECKSUMS_H */

include/aws/s3/private/s3_client_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ struct aws_s3_client_vtable {
174174
struct aws_http_stream *(*http_connection_make_request)(
175175
struct aws_http_connection *client_connection,
176176
const struct aws_http_make_request_options *options);
177+
178+
void (*after_prepare_upload_part_finish)(struct aws_s3_request *request);
177179
};
178180

179181
struct aws_s3_upload_part_timeout_stats {

include/aws/s3/private/s3_meta_request_impl.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ struct aws_s3_request;
2525
struct aws_http_headers;
2626
struct aws_http_make_request_options;
2727
struct aws_retry_strategy;
28-
28+
struct aws_s3_upload_request_checksum_context;
2929
enum aws_s3_meta_request_state {
3030
AWS_S3_META_REQUEST_STATE_ACTIVE,
3131
AWS_S3_META_REQUEST_STATE_FINISHED,
@@ -276,7 +276,7 @@ struct aws_s3_meta_request {
276276
const bool should_compute_content_md5;
277277

278278
/* deep copy of the checksum config. */
279-
struct checksum_config_storage checksum_config;
279+
struct aws_s3_meta_request_checksum_config_storage checksum_config;
280280

281281
/* checksum found in either a default get request, or in the initial head request of a multipart get */
282282
struct aws_byte_buf meta_request_level_response_header_checksum;
@@ -294,7 +294,7 @@ struct aws_s3_meta_request {
294294
struct aws_s3_mpu_part_info {
295295
uint64_t size;
296296
struct aws_string *etag;
297-
struct aws_byte_buf checksum_base64;
297+
struct aws_s3_upload_request_checksum_context *checksum_context;
298298
bool was_previously_uploaded;
299299
};
300300

include/aws/s3/private/s3_request_messages.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ struct aws_byte_buf;
1717
struct aws_byte_cursor;
1818
struct aws_string;
1919
struct aws_array_list;
20-
struct checksum_config_storage;
20+
struct aws_s3_meta_request_checksum_config_storage;
21+
struct aws_s3_upload_request_checksum_context;
2122

2223
AWS_EXTERN_C_BEGIN
2324

@@ -52,8 +53,7 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
5253
struct aws_allocator *allocator,
5354
struct aws_byte_buf *byte_buf,
5455
struct aws_http_message *out_message,
55-
const struct checksum_config_storage *checksum_config,
56-
struct aws_byte_buf *out_checksum);
56+
struct aws_s3_upload_request_checksum_context *checksum_context);
5757

5858
/* Create an HTTP request for an S3 Ranged Get Object Request, using the given request as a basis */
5959
AWS_S3_API
@@ -76,7 +76,7 @@ AWS_S3_API
7676
struct aws_http_message *aws_s3_create_multipart_upload_message_new(
7777
struct aws_allocator *allocator,
7878
struct aws_http_message *base_message,
79-
const struct checksum_config_storage *checksum_config);
79+
const struct aws_s3_meta_request_checksum_config_storage *checksum_config);
8080

8181
/* Create an HTTP request for an S3 Put Object request, using the original request as a basis. Creates and assigns a
8282
* body stream using the passed in buffer. If multipart is not needed, part number and upload_id can be 0 and NULL,
@@ -89,8 +89,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
8989
uint32_t part_number,
9090
const struct aws_string *upload_id,
9191
bool should_compute_content_md5,
92-
const struct checksum_config_storage *checksum_config,
93-
struct aws_byte_buf *encoded_checksum_output);
92+
struct aws_s3_upload_request_checksum_context *checksum_context);
9493

9594
/* Create an HTTP request for an S3 UploadPartCopy request, using the original request as a basis.
9695
* If multipart is not needed, part number and upload_id can be 0 and NULL,
@@ -116,7 +115,7 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
116115
struct aws_byte_buf *body_buffer,
117116
const struct aws_string *upload_id,
118117
const struct aws_array_list *parts,
119-
const struct checksum_config_storage *checksum_config);
118+
const struct aws_s3_meta_request_checksum_config_storage *checksum_config);
120119

121120
AWS_S3_API
122121
struct aws_http_message *aws_s3_abort_multipart_upload_message_new(

0 commit comments

Comments
 (0)