Skip to content

Commit d8274ef

Browse files
Dynamic default part size (#575)
Co-authored-by: Dmitriy Musatkin <63878209+DmitriyMusatkin@users.noreply.github.com>
1 parent 52e5170 commit d8274ef

35 files changed

+4002
-339
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ jobs:
223223
uses: actions/checkout@v4
224224
- name: Build ${{ env.PACKAGE_NAME }} + consumers
225225
run: |
226+
python3 -m venv .venv
227+
source .venv/bin/activate
226228
python3 -c "from urllib.request import urlretrieve; urlretrieve('${{ env.BUILDER_HOST }}/${{ env.BUILDER_SOURCE }}/${{ env.BUILDER_VERSION }}/builder.pyz?run=${{ env.RUN }}', 'builder')"
227229
chmod a+x builder
228230
./builder build -p ${{ env.PACKAGE_NAME }} --cmake-extra=-DASSERT_LOCK_HELD=ON

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
108108

109109
include(CTest)
110110
if (BUILD_TESTING)
111+
add_definitions(-DAWS_C_S3_ENABLE_TEST_STUBS)
111112
add_subdirectory(tests)
112113
if (NOT BYO_CRYPTO AND NOT CMAKE_CROSSCOMPILING)
113114
add_subdirectory(samples)

include/aws/s3/private/s3_auto_ranged_get.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@ struct aws_s3_auto_ranged_get {
2121

2222
struct aws_string *etag;
2323

24+
/* Estimated object stored part size based on ETag analysis */
25+
uint64_t estimated_object_stored_part_size;
26+
/* Number of parts stored in S3. We derive this from ETag, if ETag is not formatted as expected, this will be
27+
* default to 1.
28+
* Note: For S3Express Append, the object will be treated as a single part, even though, it can be multiple parts
29+
* stored in S3.
30+
*/
31+
uint64_t num_stored_parts;
32+
/* Part size was set or not from user for this meta request. */
33+
bool part_size_set;
34+
bool force_dynamic_part_size;
35+
2436
bool initial_message_has_start_range;
2537
bool initial_message_has_end_range;
2638
uint64_t initial_range_start;
@@ -74,6 +86,7 @@ AWS_S3_API struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_get_new(
7486
struct aws_allocator *allocator,
7587
struct aws_s3_client *client,
7688
size_t part_size,
89+
bool part_size_set,
7790
const struct aws_s3_meta_request_options *options);
7891

7992
AWS_EXTERN_C_END

include/aws/s3/private/s3_client_impl.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,10 @@ struct aws_s3_client_vtable {
178178
struct aws_http_connection *client_connection,
179179
const struct aws_http_make_request_options *options);
180180

181-
void (*after_prepare_upload_part_finish)(struct aws_s3_request *request, struct aws_http_message *message);
181+
#ifdef AWS_C_S3_ENABLE_TEST_STUBS
182+
/********************* TEST ONLY STUB **************************/
183+
void (*after_prepare_upload_part_finish_stub)(struct aws_s3_request *request, struct aws_http_message *message);
184+
#endif
182185
};
183186

184187
struct aws_s3_upload_part_timeout_stats {
@@ -234,10 +237,17 @@ struct aws_s3_client {
234237
* to meta requests for use. */
235238
const size_t part_size;
236239

240+
bool part_size_set;
241+
237242
/* Size of parts for files when doing gets or puts. This exists on the client as configurable option that is passed
238243
* to meta requests for use. */
239244
const uint64_t max_part_size;
240245

246+
/* Calculated optimal range size for GET operations based on client configuration (memory limits, throughput
247+
* targets). This is used when part_size is not explicitly configured, replacing the default with reasonable
248+
* calculation. Value is calculated during client initialization and remains constant for the client's lifetime. */
249+
const uint64_t optimal_range_size;
250+
241251
/* File I/O options. */
242252
bool fio_options_set;
243253
struct aws_s3_file_io_options fio_opts;

include/aws/s3/private/s3_default_buffer_pool.h

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
* SPDX-License-Identifier: Apache-2.0.
77
*/
88

9+
#include <aws/common/hash_table.h>
10+
#include <aws/common/mutex.h>
911
#include <aws/s3/s3.h>
1012
#include <aws/s3/s3_buffer_pool.h>
1113

@@ -59,11 +61,79 @@ struct aws_s3_default_buffer_pool_usage_stats {
5961
/* Secondary memory reserved, but not yet used. Accurate, maps directly to base allocator. */
6062
size_t secondary_reserved;
6163

64+
/* Overall memory allocated for special-sized blocks. */
65+
size_t special_blocks_allocated;
66+
/* Number of special block sizes created. */
67+
size_t special_blocks_num;
68+
/* Memory reserved in special-sized blocks. */
69+
size_t special_blocks_reserved;
70+
/* Memory used in special-sized blocks. */
71+
size_t special_blocks_used;
72+
6273
/* Bytes used in "forced" buffers (created even if they exceed memory limits).
6374
* This is always <= primary_used + secondary_used */
6475
size_t forced_used;
6576
};
6677

78+
/* Structure to track special-sized blocks */
79+
struct s3_special_block_list {
80+
struct aws_allocator *allocator;
81+
size_t buffer_size; /* Size of buffers in this list */
82+
struct aws_array_list blocks; /* Array of uint8_t* pointers to allocated blocks */
83+
};
84+
85+
struct aws_s3_default_buffer_pool {
86+
struct aws_allocator *base_allocator;
87+
struct aws_mutex mutex;
88+
89+
size_t block_size;
90+
size_t chunk_size;
91+
/* size at which allocations should go to secondary */
92+
size_t primary_size_cutoff;
93+
94+
/* NOTE: See aws_s3_buffer_pool_usage_stats for descriptions of most fields */
95+
96+
size_t mem_limit;
97+
98+
size_t primary_allocated;
99+
size_t primary_reserved;
100+
size_t primary_used;
101+
102+
size_t special_blocks_allocated;
103+
size_t special_blocks_reserved;
104+
size_t special_blocks_used;
105+
106+
size_t secondary_reserved;
107+
size_t secondary_used;
108+
109+
size_t forced_used;
110+
111+
struct aws_array_list blocks;
112+
113+
struct aws_linked_list pending_reserves;
114+
115+
/* Special-sized blocks: hash table mapping size -> struct s3_special_block_list * */
116+
/* TODO: let's discuss about the special list lifetime. Should we just keep it with the memory pool? Concern is that
117+
* the pool will live with the client, and may result in all sorts of special lists to be around. */
118+
struct aws_hash_table special_blocks;
119+
120+
/* TEST ONLY: to force the special blocks alive during trim. */
121+
bool force_keeping_special_blocks;
122+
};
123+
124+
struct s3_pending_reserve {
125+
struct aws_linked_list_node node;
126+
struct aws_future_s3_buffer_ticket *ticket_future;
127+
struct aws_s3_default_buffer_ticket *ticket;
128+
struct aws_s3_buffer_pool_reserve_meta meta;
129+
};
130+
131+
struct s3_buffer_pool_block {
132+
size_t block_size;
133+
uint8_t *block_ptr;
134+
uint16_t alloc_bit_mask;
135+
};
136+
67137
/*
68138
* Create new buffer pool.
69139
* chunk_size - specifies the size of memory that will most commonly be acquired

include/aws/s3/private/s3_meta_request_impl.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ struct aws_s3_meta_request_vtable {
122122

123123
/* Pause the given request */
124124
int (*pause)(struct aws_s3_meta_request *meta_request, struct aws_s3_meta_request_resume_token **resume_token);
125+
126+
#ifdef AWS_C_S3_ENABLE_TEST_STUBS
127+
/********************* TEST ONLY STUB **************************/
128+
/* A stub to the update implementation from meta request with the lock held. Only for tests. */
129+
bool (*synced_update_stub)(struct aws_s3_meta_request *meta_request);
130+
#endif
125131
};
126132

127133
/**
@@ -151,6 +157,8 @@ struct aws_s3_meta_request {
151157

152158
/* Part size to use for uploads and downloads. Passed down by the creating client. */
153159
const size_t part_size;
160+
/* Hard limit on max connections set through the meta request option. */
161+
const uint32_t max_active_connections_override;
154162

155163
struct aws_cached_signing_config_aws *cached_signing_config;
156164

@@ -160,6 +168,9 @@ struct aws_s3_meta_request {
160168

161169
struct aws_s3_endpoint *endpoint;
162170

171+
/* Number of requests being sent/received over network for the meta request. */
172+
struct aws_atomic_var num_requests_network;
173+
163174
/* Event loop to schedule IO work related on, ie, reading from streams, streaming parts back to the caller, etc...
164175
* After the meta request is finished, this will be reset along with the client reference.*/
165176
struct aws_event_loop *io_event_loop;
@@ -179,6 +190,10 @@ struct aws_s3_meta_request {
179190

180191
enum aws_s3_meta_request_type type;
181192
struct aws_string *s3express_session_host;
193+
/* Is the meta request made to s3express bucket or not. */
194+
bool is_express;
195+
/* If the buffer pool optimized for the specific size or not. */
196+
bool buffer_pool_optimized;
182197

183198
struct {
184199
struct aws_mutex lock;
@@ -263,6 +278,9 @@ struct aws_s3_meta_request {
263278
/* True if this meta request is currently in the client's list. */
264279
bool scheduled;
265280

281+
/* Track the number of requests being prepared for this meta request. */
282+
size_t num_request_being_prepared;
283+
266284
} client_process_work_threaded_data;
267285

268286
/* Anything in this structure should only ever be accessed by the meta-request from its io_event_loop thread. */
@@ -271,6 +289,9 @@ struct aws_s3_meta_request {
271289
* This is an optimization, we could have just copied the array when the task runs,
272290
* but swapping two array-lists back and forth avoids an allocation. */
273291
struct aws_array_list event_delivery_array;
292+
293+
/* The range start for the next response body delivery */
294+
uint64_t next_deliver_range_start;
274295
} io_threaded_data;
275296

276297
const bool should_compute_content_md5;
@@ -407,9 +428,11 @@ void aws_s3_meta_request_add_event_for_delivery_synced(
407428
bool aws_s3_meta_request_are_events_out_for_delivery_synced(struct aws_s3_meta_request *meta_request);
408429

409430
/* Cancel the requests with cancellable HTTP stream for the meta request */
431+
AWS_S3_API
410432
void aws_s3_meta_request_cancel_cancellable_requests_synced(struct aws_s3_meta_request *meta_request, int error_code);
411433

412434
/* Cancel the pending buffer futures for the meta request */
435+
AWS_S3_API
413436
void aws_s3_meta_request_cancel_pending_buffer_futures_synced(struct aws_s3_meta_request *meta_request, int error_code);
414437

415438
/* Asynchronously read from the meta request's input stream. Should always be done outside of any mutex,

include/aws/s3/private/s3_request.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,20 @@ struct aws_s3_request_metrics {
168168
int error_code;
169169
/* Retry attempt. */
170170
uint32_t retry_attempt;
171+
/* Is the memory for the request allocated from the buffer pool or not. */
172+
bool memory_allocated_from_pool;
171173
} crt_info_metrics;
172174

175+
/* TODO: align the part info metrics with the others, eg: `aws_s3_mpu_part_info`. */
176+
struct {
177+
/* Beginning range of this part. */
178+
uint64_t part_range_start;
179+
/* Last byte of this part. */
180+
uint64_t part_range_end;
181+
/* Part number that this request refers to. */
182+
uint32_t part_number;
183+
} part_info_metrics;
184+
173185
struct aws_ref_count ref_count;
174186
};
175187

@@ -229,11 +241,9 @@ struct aws_s3_request {
229241
struct aws_s3_buffer_ticket *ticket;
230242

231243
/* Beginning range of this part. */
232-
/* TODO currently only used by auto_range_get, could be hooked up to auto_range_put as well. */
233244
uint64_t part_range_start;
234245

235246
/* Last byte of this part.*/
236-
/* TODO currently only used by auto_range_get, could be hooked up to auto_range_put as well. */
237247
uint64_t part_range_end;
238248

239249
/* Part number that this request refers to. If this is not a part, this can be 0. (S3 Part Numbers start at 1.)

include/aws/s3/private/s3_util.h

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,23 @@ extern const double g_default_throughput_target_gbps;
160160

161161
AWS_S3_API
162162
extern const uint64_t g_streaming_object_size_threshold;
163+
164+
AWS_S3_API
165+
extern const uint64_t g_default_part_size_fallback;
166+
167+
AWS_S3_API
168+
extern const uint64_t g_default_max_part_size;
169+
170+
AWS_S3_API
171+
extern const uint64_t g_s3_optimal_range_size_alignment;
172+
173+
AWS_S3_API
174+
extern const uint32_t g_s3express_connection_limitation;
175+
AWS_S3_API
176+
extern const uint64_t g_s3express_connection_limitation_part_size_threshold;
177+
AWS_S3_API
178+
extern const uint64_t g_s3express_connection_limitation_object_size_threshold;
179+
163180
/**
164181
* Returns AWS_S3_REQUEST_TYPE_UNKNOWN if name doesn't map to an enum value.
165182
*/
@@ -240,12 +257,20 @@ void aws_s3_add_user_agent_header(struct aws_allocator *allocator, struct aws_ht
240257
* object-size. All output arguments are optional.*/
241258
AWS_S3_API
242259
int aws_s3_parse_content_range_response_header(
243-
struct aws_allocator *allocator,
244260
struct aws_http_headers *response_headers,
245261
uint64_t *out_range_start,
246262
uint64_t *out_range_end,
247263
uint64_t *out_object_size);
248264

265+
/* Given a Content-Range header value as a byte cursor, parses the range-start, range-end and
266+
* object-size. All output arguments are optional. */
267+
AWS_S3_API
268+
int aws_s3_parse_content_range_cursor(
269+
struct aws_byte_cursor content_range_cursor,
270+
uint64_t *out_range_start,
271+
uint64_t *out_range_end,
272+
uint64_t *out_object_size);
273+
249274
/* Given response headers, parses the content-length from a content-length response header.*/
250275
AWS_S3_API
251276
int aws_s3_parse_content_length_response_header(
@@ -318,6 +343,53 @@ int aws_s3_check_headers_for_checksum(
318343
struct aws_byte_buf *out_checksum_buffer,
319344
bool meta_request_level);
320345

346+
/**
347+
* Calculate client-level optimal range size based on memory and connection constraints.
348+
* This function is called during client initialization to determine the base range size
349+
* using the formula: MemoryLimit / concurrency / divisor.
350+
* The result is rounded up to ensure proper alignment and applies minimum size constraints.
351+
*
352+
* @param memory_limit_in_bytes Total memory limit available for buffering
353+
* @param max_connections Maximum number of concurrent connections
354+
* @param out_client_optimal_range_size Output parameter for calculated client-level optimal range size
355+
* @return AWS_OP_SUCCESS on success, AWS_OP_ERR on failure (caller should fall back to default)
356+
*/
357+
AWS_S3_API
358+
int aws_s3_calculate_client_optimal_range_size(
359+
uint64_t memory_limit_in_bytes,
360+
uint32_t max_connections,
361+
uint64_t *out_client_optimal_range_size);
362+
363+
/**
364+
* Calculate request-level optimal range size by considering object-specific information.
365+
* This function is called per request to adjust the client-level range size based on
366+
* estimated object stored part size using: min(client_optimal_range_size, estimated_object_stored_part_size).
367+
*
368+
* @param client_optimal_range_size The client-level optimal range size from initialization
369+
* @param estimated_object_stored_part_size Estimated size of object stored parts in S3
370+
* @param is_express If the request is a s3express request or not.
371+
* @param out_request_optimal_range_size Output parameter for calculated request-level optimal range size
372+
* @return AWS_OP_SUCCESS on success, AWS_OP_ERR on failure (caller should fall back to client size)
373+
*/
374+
AWS_S3_API
375+
int aws_s3_calculate_request_optimal_range_size(
376+
uint64_t client_optimal_range_size,
377+
uint64_t estimated_object_stored_part_size,
378+
bool is_express,
379+
uint64_t *out_request_optimal_range_size);
380+
381+
/**
382+
* Extract the number of parts from an S3 ETag header value.
383+
* S3 multipart upload ETags have the format "<hash>-<number_of_parts>".
384+
* Single-part uploads have ETags without dashes.
385+
*
386+
* @param etag_header_value The ETag header value (may include quotes)
387+
* @param out_num_parts Output parameter for the number of parts (1 for single-part uploads)
388+
* @return AWS_OP_SUCCESS on success, AWS_OP_ERR on failure (invalid ETag format)
389+
*/
390+
AWS_S3_API
391+
int aws_s3_extract_parts_from_etag(struct aws_byte_cursor etag_header_value, uint32_t *out_num_parts);
392+
321393
AWS_EXTERN_C_END
322394

323395
#endif /* AWS_S3_UTIL_H */

0 commit comments

Comments
 (0)