Skip to content

Commit f2110f5

Browse files
authored
Fixing race condition clean up bugs (#94)
* If the client was to exit before the first host resolver callback, then it would crash. * If the client was to clean up all resources and call the finish_destroy function while start_destroy is still executing, the client could try to schedule the work task at the end of the function using a destroyed client. * Slightly simplifying clean up. The finish-destroy function can now be called only from one place--the work task.
1 parent 924410a commit f2110f5

3 files changed

Lines changed: 88 additions & 85 deletions

File tree

include/aws/s3/private/s3_auto_ranged_get.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ struct aws_s3_auto_ranged_get {
2525
uint32_t num_parts_successful;
2626
uint32_t num_parts_failed;
2727

28-
size_t total_object_size;
29-
3028
uint32_t get_without_range : 1;
3129
uint32_t get_without_range_sent : 1;
3230
uint32_t get_without_range_completed : 1;

include/aws/s3/private/s3_client_impl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,13 @@ struct aws_s3_client {
213213
/* Whether or not the client has started cleaning up all of its resources */
214214
uint32_t active : 1;
215215

216+
/* True if the start_destroy function is still executing, which blocks shutdown from completing. */
217+
uint32_t start_destroy_executing : 1;
218+
219+
/* True if the client has called aws_host_resolver_resolve_host but hasn't received a callback yet. There isn't
220+
* a way to cancel this first callback, so this will block shutdown from completing. */
221+
uint32_t waiting_for_first_host_resolve_callback : 1;
222+
216223
/* Whether or not work processing is currently scheduled. */
217224
uint32_t process_work_task_scheduled : 1;
218225

source/s3_client.c

Lines changed: 81 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,7 @@ AWS_STATIC_STRING_FROM_LITERAL(s_http_proxy_env_var, "HTTP_PROXY");
6767
/* Called when ref count is 0. */
6868
static void s_s3_client_start_destroy(void *user_data);
6969

70-
typedef void(s3_client_update_synced_data_state_fn)(struct aws_s3_client *client);
71-
72-
/* Used to atomically update client state during clean-up and check for finishing shutdown. */
73-
static void s_s3_client_check_for_shutdown(
74-
struct aws_s3_client *client,
75-
s3_client_update_synced_data_state_fn *update_fn);
76-
77-
/* Called by s_s3_client_check_for_shutdown when all shutdown criteria has been met. */
70+
/* Called by s_s3_client_process_work_default when all shutdown criteria has been met. */
7871
static void s_s3_client_finish_destroy(void *user_data);
7972

8073
/* Called when the body streaming elg shutdown has completed. */
@@ -273,6 +266,7 @@ struct aws_s3_client *aws_s3_client_new(
273266
if (client_config->tls_mode == AWS_MR_TLS_ENABLED) {
274267
client->tls_connection_options =
275268
aws_mem_calloc(client->allocator, 1, sizeof(struct aws_tls_connection_options));
269+
276270
if (client->tls_connection_options == NULL) {
277271
goto on_error;
278272
}
@@ -358,17 +352,11 @@ void aws_s3_client_release(struct aws_s3_client *client) {
358352
aws_ref_count_release(&client->ref_count);
359353
}
360354

361-
static void s_s3_client_reset_active_synced(struct aws_s3_client *client) {
362-
AWS_PRECONDITION(client);
363-
ASSERT_SYNCED_DATA_LOCK_HELD(client);
364-
client->synced_data.active = false;
365-
}
366-
367355
static void s_s3_client_start_destroy(void *user_data) {
368356
struct aws_s3_client *client = user_data;
369357
AWS_PRECONDITION(client);
370358

371-
AWS_LOGF_DEBUG(AWS_LS_S3_CLIENT, "id=%p Client starting destruction..", (void *)client);
359+
AWS_LOGF_DEBUG(AWS_LS_S3_CLIENT, "id=%p Client starting destruction.", (void *)client);
372360

373361
struct aws_linked_list local_vip_list;
374362
aws_linked_list_init(&local_vip_list);
@@ -377,6 +365,11 @@ static void s_s3_client_start_destroy(void *user_data) {
377365

378366
aws_s3_client_lock_synced_data(client);
379367

368+
client->synced_data.active = false;
369+
370+
/* Prevent the client from cleaning up inbetween the mutex unlock/re-lock below.*/
371+
client->synced_data.start_destroy_executing = true;
372+
380373
/* Grab the host listener from the synced_data so that we can remove it outside of the lock. */
381374
host_listener = client->synced_data.host_listener;
382375
client->synced_data.host_listener = NULL;
@@ -402,42 +395,13 @@ static void s_s3_client_start_destroy(void *user_data) {
402395
aws_event_loop_group_release(client->body_streaming_elg);
403396
client->body_streaming_elg = NULL;
404397

405-
s_s3_client_check_for_shutdown(client, s_s3_client_reset_active_synced);
406-
407-
aws_s3_client_lock_synced_data(client);
408-
s_s3_client_schedule_process_work_synced(client);
409-
aws_s3_client_unlock_synced_data(client);
410-
}
411-
412-
static void s_s3_client_check_for_shutdown(
413-
struct aws_s3_client *client,
414-
s3_client_update_synced_data_state_fn *update_fn) {
415-
(void)client;
416-
417-
bool finish_destroy = false;
418-
419398
aws_s3_client_lock_synced_data(client);
399+
client->synced_data.start_destroy_executing = false;
420400

421-
if (update_fn != NULL) {
422-
update_fn(client);
423-
}
424-
425-
/* This flag should never be set twice. If it was, that means a double-free could occur.*/
426-
AWS_ASSERT(!client->synced_data.finish_destroy);
427-
428-
finish_destroy = client->synced_data.active == false && client->synced_data.allocated_vip_count == 0 &&
429-
client->synced_data.host_listener_allocated == false &&
430-
client->synced_data.body_streaming_elg_allocated == false &&
431-
client->synced_data.process_work_task_scheduled == false &&
432-
client->synced_data.process_work_task_in_progress == false;
433-
434-
client->synced_data.finish_destroy = finish_destroy;
435-
401+
/* Schedule the work task to clean up outstanding connections and also to call s_s3_client_finish_destroy function
402+
* if everything cleaning up asynchronously has finished. */
403+
s_s3_client_schedule_process_work_synced(client);
436404
aws_s3_client_unlock_synced_data(client);
437-
438-
if (finish_destroy) {
439-
s_s3_client_finish_destroy(client);
440-
}
441405
}
442406

443407
static void s_s3_client_finish_destroy(void *user_data) {
@@ -486,20 +450,16 @@ static void s_s3_client_finish_destroy(void *user_data) {
486450
}
487451
}
488452

489-
static void s_s3_client_set_body_streaming_elg_shutdown_synced(struct aws_s3_client *client) {
453+
static void s_s3_client_body_streaming_elg_shutdown(void *user_data) {
454+
struct aws_s3_client *client = user_data;
490455
AWS_PRECONDITION(client);
491-
ASSERT_SYNCED_DATA_LOCK_HELD(client);
492456

493457
AWS_LOGF_DEBUG(AWS_LS_S3_CLIENT, "id=%p Client body streaming ELG shutdown.", (void *)client);
494458

459+
aws_s3_client_lock_synced_data(client);
495460
client->synced_data.body_streaming_elg_allocated = false;
496-
}
497-
498-
static void s_s3_client_body_streaming_elg_shutdown(void *user_data) {
499-
struct aws_s3_client *client = user_data;
500-
AWS_PRECONDITION(client);
501-
502-
s_s3_client_check_for_shutdown(client, s_s3_client_set_body_streaming_elg_shutdown_synced);
461+
s_s3_client_schedule_process_work_synced(client);
462+
aws_s3_client_unlock_synced_data(client);
503463
}
504464

505465
static int s_s3_client_get_proxy_uri(struct aws_s3_client *client, struct aws_uri *proxy_uri) {
@@ -753,7 +713,7 @@ static void s_s3_vip_check_for_shutdown(struct aws_s3_vip *vip, s3_client_vip_up
753713
}
754714
}
755715

756-
static void s_s3_vip_set_conn_manager_shutdown(struct aws_s3_vip *vip) {
716+
static void s_s3_vip_set_conn_manager_shutdown_synced(struct aws_s3_vip *vip) {
757717
AWS_PRECONDITION(vip);
758718
AWS_PRECONDITION(vip->owning_client);
759719
ASSERT_SYNCED_DATA_LOCK_HELD(vip->owning_client);
@@ -769,7 +729,7 @@ static void s_s3_vip_http_connection_manager_shutdown_callback(void *user_data)
769729
AWS_LOGF_DEBUG(
770730
AWS_LS_S3_CLIENT, "id=%p VIP %p Connection manager shutdown", (void *)vip->owning_client, (void *)vip);
771731

772-
s_s3_vip_check_for_shutdown(vip, s_s3_vip_set_conn_manager_shutdown);
732+
s_s3_vip_check_for_shutdown(vip, s_s3_vip_set_conn_manager_shutdown_synced);
773733
}
774734

775735
static void s_s3_vip_finish_destroy(void *user_data) {
@@ -1099,17 +1059,14 @@ static struct aws_s3_meta_request *s_s3_client_meta_request_factory_default(
10991059
return NULL;
11001060
}
11011061

1102-
static void s_s3_client_sub_vip_count_synced(struct aws_s3_client *client) {
1103-
AWS_PRECONDITION(client);
1104-
ASSERT_SYNCED_DATA_LOCK_HELD(client);
1105-
--client->synced_data.allocated_vip_count;
1106-
}
1107-
11081062
static void s_s3_client_vip_shutdown_callback(void *user_data) {
11091063
AWS_PRECONDITION(user_data);
11101064
struct aws_s3_client *client = user_data;
11111065

1112-
s_s3_client_check_for_shutdown(client, s_s3_client_sub_vip_count_synced);
1066+
aws_s3_client_lock_synced_data(client);
1067+
--client->synced_data.allocated_vip_count;
1068+
s_s3_client_schedule_process_work_synced(client);
1069+
aws_s3_client_unlock_synced_data(client);
11131070
}
11141071

11151072
static void s_s3_client_push_meta_request_synced(
@@ -1164,12 +1121,6 @@ static void s_s3_client_remove_meta_request_threaded(
11641121
aws_s3_meta_request_release(meta_request);
11651122
}
11661123

1167-
static void s_s3_client_reset_work_task_in_progress_synced(struct aws_s3_client *client) {
1168-
AWS_PRECONDITION(client);
1169-
ASSERT_SYNCED_DATA_LOCK_HELD(client);
1170-
client->synced_data.process_work_task_in_progress = false;
1171-
}
1172-
11731124
/* Task function for trying to find a request that can be processed. */
11741125
static void s_s3_client_process_work_task(struct aws_task *task, void *arg, enum aws_task_status task_status) {
11751126
AWS_PRECONDITION(task);
@@ -1299,6 +1250,9 @@ static void s_s3_client_process_work_default(struct aws_s3_client *client) {
12991250
(void *)client);
13001251
s_s3_client_assign_requests_to_connections_threaded(client, client_active, 0);
13011252

1253+
/*******************/
1254+
/* Step 4: Log client stats. */
1255+
/*******************/
13021256
{
13031257
uint32_t num_idle_connections = 0;
13041258

@@ -1332,7 +1286,51 @@ static void s_s3_client_process_work_default(struct aws_s3_client *client) {
13321286
client->threaded_data.num_active_vip_connections);
13331287
}
13341288

1335-
s_s3_client_check_for_shutdown(client, s_s3_client_reset_work_task_in_progress_synced);
1289+
/*******************/
1290+
/* Step 5: Check for client shutdown. */
1291+
/*******************/
1292+
{
1293+
aws_s3_client_lock_synced_data(client);
1294+
client->synced_data.process_work_task_in_progress = false;
1295+
1296+
/* This flag should never be set twice. If it was, that means a double-free could occur.*/
1297+
AWS_ASSERT(!client->synced_data.finish_destroy);
1298+
1299+
bool finish_destroy = client->synced_data.active == false &&
1300+
client->synced_data.waiting_for_first_host_resolve_callback == false &&
1301+
client->synced_data.start_destroy_executing == false &&
1302+
client->synced_data.allocated_vip_count == 0 &&
1303+
client->synced_data.host_listener_allocated == false &&
1304+
client->synced_data.body_streaming_elg_allocated == false &&
1305+
client->synced_data.process_work_task_scheduled == false &&
1306+
client->synced_data.process_work_task_in_progress == false;
1307+
1308+
client->synced_data.finish_destroy = finish_destroy;
1309+
1310+
if (!client->synced_data.active) {
1311+
AWS_LOGF_DEBUG(
1312+
AWS_LS_S3_CLIENT,
1313+
"id=%p Client shutdown progress: waiting_for_first_host_resolve_callback=%d "
1314+
"starting_destroy_executing=%d "
1315+
" allocated_vip_count=%d host_listener_allocated=%d body_streaming_elg_allocated=%d "
1316+
"process_work_task_scheduled=%d process_work_task_in_progress=%d finish_destroy=%d",
1317+
(void *)client,
1318+
(int)client->synced_data.waiting_for_first_host_resolve_callback,
1319+
(int)client->synced_data.start_destroy_executing,
1320+
(int)client->synced_data.allocated_vip_count,
1321+
(int)client->synced_data.host_listener_allocated,
1322+
(int)client->synced_data.body_streaming_elg_allocated,
1323+
(int)client->synced_data.process_work_task_scheduled,
1324+
(int)client->synced_data.process_work_task_in_progress,
1325+
(int)client->synced_data.finish_destroy);
1326+
}
1327+
1328+
aws_s3_client_unlock_synced_data(client);
1329+
1330+
if (finish_destroy) {
1331+
s_s3_client_finish_destroy(client);
1332+
}
1333+
}
13361334
}
13371335

13381336
static void s_s3_client_assign_requests_to_connections_threaded(
@@ -2028,6 +2026,11 @@ static void s_s3_client_on_host_resolver_address_resolved(
20282026
aws_error_str(last_error_code));
20292027
}
20302028
}
2029+
2030+
aws_s3_client_lock_synced_data(client);
2031+
client->synced_data.waiting_for_first_host_resolve_callback = false;
2032+
s_s3_client_schedule_process_work_synced(client);
2033+
aws_s3_client_unlock_synced_data(client);
20312034
}
20322035

20332036
int aws_s3_client_add_vips(struct aws_s3_client *client, const struct aws_array_list *host_addresses) {
@@ -2214,20 +2217,14 @@ static void s_s3_client_host_listener_expired_address_callback(
22142217
aws_s3_client_remove_vips(client, host_addresses);
22152218
}
22162219

2217-
static void s_s3_client_set_host_listener_shutdown_synced(struct aws_s3_client *client) {
2218-
AWS_PRECONDITION(client);
2219-
ASSERT_SYNCED_DATA_LOCK_HELD(client);
2220-
2221-
AWS_LOGF_DEBUG(AWS_LS_S3_CLIENT, "id=%p: Host listener finished shutdown.", (void *)client);
2222-
2223-
client->synced_data.host_listener_allocated = false;
2224-
}
2225-
22262220
static void s_s3_client_host_listener_shutdown_callback(void *user_data) {
22272221
AWS_PRECONDITION(user_data);
22282222
struct aws_s3_client *client = user_data;
22292223

2230-
s_s3_client_check_for_shutdown(client, s_s3_client_set_host_listener_shutdown_synced);
2224+
aws_s3_client_lock_synced_data(client);
2225+
client->synced_data.host_listener_allocated = false;
2226+
s_s3_client_schedule_process_work_synced(client);
2227+
aws_s3_client_unlock_synced_data(client);
22312228
}
22322229

22332230
static int s_s3_client_start_resolving_addresses(struct aws_s3_client *client) {
@@ -2273,6 +2270,7 @@ static int s_s3_client_start_resolving_addresses(struct aws_s3_client *client) {
22732270

22742271
client->synced_data.host_listener = host_listener;
22752272
client->synced_data.host_listener_allocated = true;
2273+
client->synced_data.waiting_for_first_host_resolve_callback = true;
22762274

22772275
unlock:
22782276
aws_s3_client_unlock_synced_data(client);

0 commit comments

Comments
 (0)