Skip to content

Commit c7ebc80

Browse files
committed
Merge branch 'main' into dynamic-default-part-size
2 parents e0fe6d8 + 332dd22 commit c7ebc80

File tree

3 files changed

+29
-11
lines changed

3 files changed

+29
-11
lines changed

include/aws/s3/s3_client.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -317,29 +317,26 @@ enum aws_s3_recv_file_options {
317317

318318
/**
319319
* WARNING: experimental/unstable:
320-
* Controls how client performance file I/O operations. Only applies to the file based
321-
* workload.
320+
* Controls how client performance file I/O operations. Only applies to the file based workload.
322321
**/
323322
struct aws_s3_file_io_options {
324323
/**
325324
* Skip buffering the part in memory before sending the request.
326-
* If set, set the `disk_throughput_gbps` to be reasonable align with the available disk throughput.
327-
* Otherwise, the transfer may fail with connection starvation.
328325
*
329-
* Default to false.
326+
* Default to false on small objects, and true when the object size exceed a certain threshold
327+
*`g_streaming_object_size_threshold`.
330328
**/
331329
bool should_stream;
332330

333331
/**
334332
* The estimated disk throughput. Only be applied when `streaming_upload` is true.
335333
* in gigabits per second (Gbps).
336334
*
337-
* When doing upload with streaming, it's important to set the disk throughput to prevent the connection starvation.
338335
* Notes: There are possibilities that cannot reach the all available disk throughput:
339336
* 1. Disk is busy with other applications
340337
* 2. OS Cache may cap the throughput, use `direct_io` to get around this.
341338
*
342-
* Note: When `streaming_upload` is true, this default to 10 Gbps.
339+
* Default to throughput_target_gbps.
343340
**/
344341
double disk_throughput_gbps;
345342

source/s3_meta_request.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ int aws_s3_meta_request_init_base(
194194

195195
if (meta_request->fio_opts.should_stream && meta_request->fio_opts.disk_throughput_gbps == 0) {
196196
/* If disk throughput is not set, set it to the default. */
197-
meta_request->fio_opts.disk_throughput_gbps = g_default_throughput_target_gbps;
197+
meta_request->fio_opts.disk_throughput_gbps = client->throughput_target_gbps;
198198
}
199199

200200
/* Set up reference count. */

source/s3_util.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,31 @@ const struct aws_byte_cursor g_user_agent_header_unknown = AWS_BYTE_CUR_INIT_FRO
6464

6565
const uint32_t g_s3_max_num_upload_parts = 10000;
6666
const size_t g_s3_min_upload_part_size = MB_TO_BYTES(5);
67-
const size_t g_streaming_buffer_size = MB_TO_BYTES(8);
67+
6868
const double g_default_throughput_target_gbps = 10.0;
69-
/* TODO: disable this threshold until we have a better option for threshold */
70-
const uint64_t g_streaming_object_size_threshold = UINT64_MAX;
69+
70+
/**
71+
* Streaming buffer size selection based on experimental results on EBS:
72+
*
73+
* - Too small buffer sizes (e.g., 16KiB) impact disk read performance,
74+
* achieving only 6.73 Gbps throughput from EBS.
75+
* - Too large buffer sizes cause network connections to starve more easily
76+
* when disk reads cannot provide data fast enough.
77+
* - 1MiB buffer size provides optimal balance: sufficient disk read throughput
78+
* while maintaining reasonable retry rates due to connection starvation.
79+
*/
80+
const size_t g_streaming_buffer_size = MB_TO_BYTES(1);
81+
82+
/**
83+
* The streaming approach reduces memory consumption without introducing unexpected errors
84+
* or performance degradation.
85+
*
86+
* We start streaming for objects larger than 1TiB, with plans to lower this threshold in future iterations.
87+
*
88+
* The 1TiB threshold was chosen to minimize the blast radius of this behavioral change
89+
* while still providing meaningful memory usage improvements for large objects.
90+
*/
91+
const uint64_t g_streaming_object_size_threshold = TB_TO_BYTES(1);
7192

7293
/**
7394
* TODO: update this default part size 17/16 MiB based on S3 best practice.

0 commit comments

Comments
 (0)