Skip to content

Commit 5eac79f

Browse files
authored
update first byte timeout algo (#461)
1 parent f14b84d commit 5eac79f

3 files changed

Lines changed: 140 additions & 12 deletions

File tree

include/aws/s3/private/s3_client_impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct aws_http_connection;
2626
struct aws_http_connection_manager;
2727
struct aws_host_resolver;
2828
struct aws_s3_endpoint;
29+
struct aws_priority_queue;
2930

3031
enum aws_s3_connection_finish_code {
3132
AWS_S3_CONNECTION_FINISH_CODE_SUCCESS,
@@ -185,6 +186,8 @@ struct aws_s3_upload_part_timeout_stats {
185186
struct {
186187
uint64_t sum_ns;
187188
uint64_t num_samples;
189+
bool collecting_p90;
190+
struct aws_priority_queue p90_samples;
188191
} initial_request_time;
189192

190193
/* Track the timeout rate. */

source/s3_client.c

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <aws/common/clock.h>
2323
#include <aws/common/device_random.h>
2424
#include <aws/common/json.h>
25+
#include <aws/common/priority_queue.h>
2526
#include <aws/common/string.h>
2627
#include <aws/common/system_info.h>
2728
#include <aws/http/connection.h>
@@ -768,6 +769,9 @@ static void s_s3_client_finish_destroy_default(struct aws_s3_client *client) {
768769
void *shutdown_user_data = client->shutdown_callback_user_data;
769770

770771
aws_s3_buffer_pool_destroy(client->buffer_pool);
772+
if (client->synced_data.upload_part_stats.initial_request_time.collecting_p90) {
773+
aws_priority_queue_clean_up(&client->synced_data.upload_part_stats.initial_request_time.p90_samples);
774+
}
771775

772776
aws_mem_release(client->allocator, client->network_interface_names_cursor_array);
773777
for (size_t i = 0; i < client->num_network_interface_names; i++) {
@@ -2481,6 +2485,13 @@ static uint64_t s_upload_timeout_threshold_ns = 5000000000; /* 5 Secs */
24812485
const size_t g_expect_timeout_offset_ms =
24822486
700; /* 0.7 Secs. From experiments on c5n.18xlarge machine for 30 GiB upload, it gave us best performance. */
24832487

2488+
static int s_compare_uint64_min_heap(const void *a, const void *b) {
2489+
uint64_t arg1 = *(const uint64_t *)a;
2490+
uint64_t arg2 = *(const uint64_t *)b;
2491+
/* Use a min-heap to get the P90, which will be the min of the largest 10% of data. */
2492+
return arg1 > arg2;
2493+
}
2494+
24842495
/**
24852496
* The upload timeout optimization: explained.
24862497
*
@@ -2501,10 +2512,10 @@ const size_t g_expect_timeout_offset_ms =
25012512
* would be better off waiting 5sec for the response, vs re-uploading the whole request.
25022513
*
25032514
* The current algorithm:
2504-
* 1. Start without a timeout value. After 10 requests completed, we know the average of how long the
2505-
* request takes. We decide if it's worth to set a timeout value or not. (If the average of request takes more than
2506-
* 5 secs or not) TODO: if the client have different part size, this doesn't make sense
2507-
* 2. If it is worth to retry, start with a default timeout value, 1 sec.
2515+
* 1. Start without a timeout value. After max(10, # of ideal connections) requests completed, we know the average of
2516+
* how long the request takes. We decide if it's worth to set a timeout value or not. (If the average of request
2517+
* takes more than 5 secs or not).
2518+
* 2. If it is worth to retry, start with a timeout value from max(1 sec, P90 of the initial samples).
25082519
* 3. If a request finishes successfully, use the average response_to_first_byte_time + g_expect_timeout_offset_ms as
25092520
* our expected timeout value. (TODO: The real expected timeout value should be a P99 of all the requests.)
25102521
* 3.1 Adjust the current timeout value against the expected timeout value, via 0.99 * <current timeout> + 0.01 *
@@ -2537,28 +2548,67 @@ void aws_s3_client_update_upload_part_timeout(
25372548
uint64_t updated_timeout_ns = 0;
25382549
uint64_t expect_timeout_offset_ns =
25392550
aws_timestamp_convert(g_expect_timeout_offset_ms, AWS_TIMESTAMP_MILLIS, AWS_TIMESTAMP_NANOS, NULL);
2551+
// Default to ideal connection count, but at least collect 10 samples.
2552+
uint32_t num_samples_to_collect = aws_max_u32(client->ideal_connection_count, 10);
25402553

25412554
switch (finished_error_code) {
25422555
case AWS_ERROR_SUCCESS:
25432556
/* We only interested in request succeed */
25442557
stats->num_successful_upload_requests = aws_add_u64_saturating(stats->num_successful_upload_requests, 1);
2545-
if (stats->num_successful_upload_requests <= 10) {
2558+
if (stats->num_successful_upload_requests <= num_samples_to_collect) {
25462559
/* Gether the data */
25472560
uint64_t request_time_ns =
25482561
metrics->time_metrics.receive_end_timestamp_ns - metrics->time_metrics.send_start_timestamp_ns;
25492562
stats->initial_request_time.sum_ns =
25502563
aws_add_u64_saturating(stats->initial_request_time.sum_ns, request_time_ns);
25512564
++stats->initial_request_time.num_samples;
2552-
if (stats->num_successful_upload_requests == 10) {
2565+
if (!stats->initial_request_time.collecting_p90) {
2566+
/* Initialize the priority queue to collect the p90 of the initial requests. */
2567+
stats->initial_request_time.collecting_p90 = true;
2568+
size_t queue_size = num_samples_to_collect / 10;
2569+
AWS_ASSERT(queue_size > 0);
2570+
aws_priority_queue_init_dynamic(
2571+
&stats->initial_request_time.p90_samples,
2572+
client->allocator,
2573+
queue_size,
2574+
sizeof(uint64_t),
2575+
s_compare_uint64_min_heap);
2576+
} else {
2577+
/* check if the queue is full, if full pop and top and push the next. */
2578+
size_t current_size = aws_priority_queue_size(&stats->initial_request_time.p90_samples);
2579+
size_t current_capacity = aws_priority_queue_capacity(&stats->initial_request_time.p90_samples);
2580+
if (current_size == current_capacity) {
2581+
uint64_t *min = NULL;
2582+
aws_priority_queue_top(&stats->initial_request_time.p90_samples, (void **)&min);
2583+
if (request_time_ns > *min) {
2584+
/* Push the data into the queue if it's larger than the min of the queue. */
2585+
uint64_t pop_val = 0;
2586+
aws_priority_queue_pop(&stats->initial_request_time.p90_samples, &pop_val);
2587+
aws_priority_queue_push(&stats->initial_request_time.p90_samples, &request_time_ns);
2588+
}
2589+
} else {
2590+
aws_priority_queue_push(&stats->initial_request_time.p90_samples, &request_time_ns);
2591+
}
2592+
}
2593+
2594+
if (stats->num_successful_upload_requests == client->ideal_connection_count) {
25532595
/* Decide we need a timeout or not */
25542596
uint64_t average_request_time_ns =
25552597
stats->initial_request_time.sum_ns / stats->initial_request_time.num_samples;
25562598
if (average_request_time_ns >= s_upload_timeout_threshold_ns) {
25572599
/* We don't need a timeout, as retry will be slower than just wait for the server to response */
25582600
stats->stop_timeout = true;
25592601
} else {
2560-
/* Start the timeout at 1 secs */
2561-
aws_atomic_store_int(&client->upload_timeout_ms, 1000);
2602+
/* Start the timeout at th p90 of the initial samples. */
2603+
uint64_t *p90_ns = NULL;
2604+
aws_priority_queue_top(&stats->initial_request_time.p90_samples, (void **)&p90_ns);
2605+
uint64_t p90_ms =
2606+
aws_timestamp_convert(*p90_ns, AWS_TIMESTAMP_NANOS, AWS_TIMESTAMP_MILLIS, NULL);
2607+
uint64_t init_upload_timeout_ms = aws_max_u64(p90_ms, 1000 /*1sec*/);
2608+
aws_atomic_store_int(&client->upload_timeout_ms, (size_t)init_upload_timeout_ms);
2609+
/* Clean up the queue now, as not needed anymore. */
2610+
aws_priority_queue_clean_up(&stats->initial_request_time.p90_samples);
2611+
stats->initial_request_time.collecting_p90 = false;
25622612
}
25632613
}
25642614
goto unlock;

tests/s3_client_test.c

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,19 @@ static int s_starts_upload_retry(struct aws_s3_client *client, struct aws_s3_req
4141
AWS_ZERO_STRUCT(client->synced_data.upload_part_stats);
4242

4343
s_init_mock_s3_request_upload_part_timeout(mock_request, 0, average_time_ns, average_time_ns);
44-
for (size_t i = 0; i < 10; i++) {
45-
/* Mock a number of requests completed with the large time for the request */
44+
size_t init_count = client->ideal_connection_count;
45+
size_t p90_count = init_count / 10 + 1;
46+
for (size_t i = 0; i < init_count - p90_count; i++) {
47+
/* With 90% of the average request time. */
4648
aws_s3_client_update_upload_part_timeout(client, mock_request, AWS_ERROR_SUCCESS);
4749
}
4850

51+
uint64_t one_sec_time_ns = aws_timestamp_convert(1, AWS_TIMESTAMP_SECS, AWS_TIMESTAMP_NANOS, NULL); /* 1 Secs */
52+
s_init_mock_s3_request_upload_part_timeout(mock_request, 0, one_sec_time_ns, one_sec_time_ns);
53+
for (size_t i = 0; i < p90_count; i++) {
54+
/* 10 percent of the request takes 1 sec */
55+
aws_s3_client_update_upload_part_timeout(client, mock_request, AWS_ERROR_SUCCESS);
56+
}
4957
/* Check that retry should be turned off */
5058
ASSERT_FALSE(client->synced_data.upload_part_stats.stop_timeout);
5159
size_t current_timeout_ms = aws_atomic_load_int(&client->upload_timeout_ms);
@@ -78,7 +86,7 @@ TEST_CASE(client_update_upload_part_timeout) {
7886
uint64_t average_time_ns = aws_timestamp_convert(
7987
250, AWS_TIMESTAMP_MILLIS, AWS_TIMESTAMP_NANOS, NULL); /* 0.25 Secs, close to average for upload a part */
8088

81-
size_t init_count = 10;
89+
size_t init_count = client->ideal_connection_count;
8290
{
8391
/* 1. If the request time is larger than 5 secs, we don't do retry */
8492
AWS_ZERO_STRUCT(client->synced_data.upload_part_stats);
@@ -91,10 +99,62 @@ TEST_CASE(client_update_upload_part_timeout) {
9199
aws_s3_client_update_upload_part_timeout(client, &mock_request, AWS_ERROR_SUCCESS);
92100
}
93101

94-
/* Check that retry should be turned off */
95102
ASSERT_TRUE(client->synced_data.upload_part_stats.stop_timeout);
96103
size_t current_timeout_ms = aws_atomic_load_int(&client->upload_timeout_ms);
97104
ASSERT_UINT_EQUALS(0, current_timeout_ms);
105+
/* clean up */
106+
if (client->synced_data.upload_part_stats.initial_request_time.collecting_p90) {
107+
aws_priority_queue_clean_up(&client->synced_data.upload_part_stats.initial_request_time.p90_samples);
108+
client->synced_data.upload_part_stats.initial_request_time.collecting_p90 = false;
109+
}
110+
}
111+
{
112+
/* 2.1. Test that the P90 of the init samples are used correctly and at least 1 sec */
113+
AWS_ZERO_STRUCT(client->synced_data.upload_part_stats);
114+
/* Hack around to set the ideal connection time for testing. */
115+
size_t test_init_connection = 1000;
116+
*(uint32_t *)(void *)&client->ideal_connection_count = (uint32_t)test_init_connection;
117+
for (size_t i = 0; i < test_init_connection; i++) {
118+
/* Mock a number of requests completed with the large time for the request */
119+
uint64_t time_ns = aws_timestamp_convert(i, AWS_TIMESTAMP_MILLIS, AWS_TIMESTAMP_NANOS, NULL);
120+
s_init_mock_s3_request_upload_part_timeout(&mock_request, 0, time_ns, time_ns);
121+
aws_s3_client_update_upload_part_timeout(client, &mock_request, AWS_ERROR_SUCCESS);
122+
}
123+
124+
size_t current_timeout_ms = aws_atomic_load_int(&client->upload_timeout_ms);
125+
/* the P90 of the results is 900, but it has to be at least 1000 */
126+
ASSERT_UINT_EQUALS(1000, current_timeout_ms);
127+
/* clean up */
128+
if (client->synced_data.upload_part_stats.initial_request_time.collecting_p90) {
129+
aws_priority_queue_clean_up(&client->synced_data.upload_part_stats.initial_request_time.p90_samples);
130+
client->synced_data.upload_part_stats.initial_request_time.collecting_p90 = false;
131+
}
132+
/* Change it back */
133+
*(uint32_t *)(void *)&client->ideal_connection_count = (uint32_t)init_count;
134+
}
135+
{
136+
/* 2.2. Test that the P90 of the init samples are used correctly */
137+
AWS_ZERO_STRUCT(client->synced_data.upload_part_stats);
138+
/* Hack around to set the ideal connection time for testing. */
139+
size_t test_init_connection = 10000;
140+
*(uint32_t *)(void *)&client->ideal_connection_count = (uint32_t)test_init_connection;
141+
for (size_t i = 0; i < test_init_connection; i++) {
142+
/* Mock a number of requests completed with the large time for the request */
143+
uint64_t time_ns = aws_timestamp_convert(i, AWS_TIMESTAMP_MILLIS, AWS_TIMESTAMP_NANOS, NULL);
144+
s_init_mock_s3_request_upload_part_timeout(&mock_request, 0, time_ns, time_ns);
145+
aws_s3_client_update_upload_part_timeout(client, &mock_request, AWS_ERROR_SUCCESS);
146+
}
147+
148+
size_t current_timeout_ms = aws_atomic_load_int(&client->upload_timeout_ms);
149+
/* P90 is 9000 */
150+
ASSERT_UINT_EQUALS(9000, current_timeout_ms);
151+
/* clean up */
152+
if (client->synced_data.upload_part_stats.initial_request_time.collecting_p90) {
153+
aws_priority_queue_clean_up(&client->synced_data.upload_part_stats.initial_request_time.p90_samples);
154+
client->synced_data.upload_part_stats.initial_request_time.collecting_p90 = false;
155+
}
156+
/* Change it back */
157+
*(uint32_t *)(void *)&client->ideal_connection_count = (uint32_t)init_count;
98158
}
99159

100160
{
@@ -137,6 +197,11 @@ TEST_CASE(client_update_upload_part_timeout) {
137197
aws_timestamp_convert(average_time_ns, AWS_TIMESTAMP_NANOS, AWS_TIMESTAMP_MILLIS, NULL) +
138198
g_expect_timeout_offset_ms,
139199
current_timeout_ms);
200+
/* clean up */
201+
if (client->synced_data.upload_part_stats.initial_request_time.collecting_p90) {
202+
aws_priority_queue_clean_up(&client->synced_data.upload_part_stats.initial_request_time.p90_samples);
203+
client->synced_data.upload_part_stats.initial_request_time.collecting_p90 = false;
204+
}
140205
}
141206

142207
{
@@ -162,6 +227,11 @@ TEST_CASE(client_update_upload_part_timeout) {
162227
current_timeout_ms = aws_atomic_load_int(&client->upload_timeout_ms);
163228
/* 1.1 secs, still */
164229
ASSERT_UINT_EQUALS(1100, current_timeout_ms);
230+
/* clean up */
231+
if (client->synced_data.upload_part_stats.initial_request_time.collecting_p90) {
232+
aws_priority_queue_clean_up(&client->synced_data.upload_part_stats.initial_request_time.p90_samples);
233+
client->synced_data.upload_part_stats.initial_request_time.collecting_p90 = false;
234+
}
165235
}
166236

167237
{
@@ -224,6 +294,11 @@ TEST_CASE(client_update_upload_part_timeout) {
224294
current_timeout_ms = aws_atomic_load_int(&client->upload_timeout_ms);
225295
ASSERT_UINT_EQUALS(3000, current_timeout_ms);
226296
ASSERT_FALSE(client->synced_data.upload_part_stats.stop_timeout);
297+
/* clean up */
298+
if (client->synced_data.upload_part_stats.initial_request_time.collecting_p90) {
299+
aws_priority_queue_clean_up(&client->synced_data.upload_part_stats.initial_request_time.p90_samples);
300+
client->synced_data.upload_part_stats.initial_request_time.collecting_p90 = false;
301+
}
227302
}
228303

229304
{

0 commit comments

Comments
 (0)