Skip to content

Commit 2548d3a

Browse files
authored
Proposal to fix reuse endpoint with refcount can reach zero (#183)
*Issue* - There was a race condition when the endpoint ref drops to zero, the other thread can bump it up to one again. - There was a work around to the race condition is to allow this from happening and handle it by the caller to check the refcount when clean up with lock and decide to not really clean up the resource if the refcount bumps up to one. *Description of changes:* - I believe the workaround described above is not a good solution - The caller should be responsible to not bump up the ref once it drops to zero - Add the helper for caller to hold lock to clean the resource before drop the refcount if needed.
1 parent 303d62c commit 2548d3a

File tree

7 files changed

+35
-186
lines changed

7 files changed

+35
-186
lines changed

include/aws/s3/private/s3_client_impl.h

+9-16
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,13 @@ enum aws_s3_connection_finish_code {
3131
AWS_S3_CONNECTION_FINISH_CODE_RETRY,
3232
};
3333

34-
/* Callback for the owner of the endpoint when the ref count of this endpoint hits zero. This gives the owner a
35-
* chance to clean up any references to the endpoint, but also to short circuit clean-up. This can be needed in the case
36-
* of a synced table, where before the lock of the table can be grabbed, a reference for the endpoint is added by a
37-
* different critical section.*/
38-
typedef bool(aws_s3_endpoint_ref_zero_fn)(struct aws_s3_endpoint *endpoint);
39-
4034
/* Callback for the owner of the endpoint when the endpoint has completely cleaned up. */
4135
typedef void(aws_s3_endpoint_shutdown_fn)(void *user_data);
4236

4337
struct aws_s3_endpoint_options {
4438
/* URL of the host that this endpoint refers to. */
4539
struct aws_string *host_name;
4640

47-
/* Callback for the owner of the endpoint when the endpoint's refcount hits zero. (More details in the typedef of
48-
* this callback.)*/
49-
aws_s3_endpoint_ref_zero_fn *ref_count_zero_callback;
50-
5141
/* Callback for when this endpoint completely shuts down. */
5242
aws_s3_endpoint_shutdown_fn *shutdown_callback;
5343

@@ -83,13 +73,13 @@ struct aws_s3_endpoint {
8373
/* Connection manager that manages all connections to this endpoint. */
8474
struct aws_http_connection_manager *http_connection_manager;
8575

86-
/* Callback for the owner of the endpoint when the endpoint's refcount hits zero. (More details in the typedef of
87-
* this callback.)*/
88-
aws_s3_endpoint_ref_zero_fn *ref_count_zero_callback;
89-
9076
/* Callback for when this endpoint completely shuts down. */
9177
aws_s3_endpoint_shutdown_fn *shutdown_callback;
9278

79+
/* True, if the endpoint is created by client. So, it need to be thread safe to manage the refcount via
80+
* `aws_s3_client_endpoint_release` */
81+
bool handled_by_client;
82+
9383
void *user_data;
9484
};
9585

@@ -129,8 +119,6 @@ struct aws_s3_client_vtable {
129119

130120
void (*process_work)(struct aws_s3_client *client);
131121

132-
bool (*endpoint_ref_count_zero)(struct aws_s3_endpoint *endpoint);
133-
134122
void (*endpoint_shutdown_callback)(void *user_data);
135123

136124
void (*finish_destroy)(struct aws_s3_client *client);
@@ -326,6 +314,11 @@ struct aws_s3_endpoint *aws_s3_endpoint_acquire(struct aws_s3_endpoint *endpoint
326314
AWS_S3_API
327315
void aws_s3_endpoint_release(struct aws_s3_endpoint *endpoint);
328316

317+
/* If the endpoint is created by s3 client, it will be managed by the client via a hash table that need to be protected
318+
* by lock. A lock will be acquired within the call, never invoke with lock held */
319+
AWS_S3_API
320+
void aws_s3_client_endpoint_release(struct aws_s3_client *client, struct aws_s3_endpoint *endpoint);
321+
329322
AWS_S3_API
330323
extern const uint32_t g_max_num_connections_per_vip;
331324

source/s3_client.c

+3-33
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,6 @@ static void s_s3_client_process_work_task(struct aws_task *task, void *arg, enum
108108

109109
static void s_s3_client_process_work_default(struct aws_s3_client *client);
110110

111-
static bool s_s3_client_endpoint_ref_count_zero(struct aws_s3_endpoint *endpoint);
112-
113111
static void s_s3_client_endpoint_shutdown_callback(void *user_data);
114112

115113
/* Default factory function for creating a meta request. */
@@ -123,7 +121,6 @@ static struct aws_s3_client_vtable s_s3_client_default_vtable = {
123121
.get_host_address_count = aws_host_resolver_get_host_address_count,
124122
.schedule_process_work_synced = s_s3_client_schedule_process_work_synced_default,
125123
.process_work = s_s3_client_process_work_default,
126-
.endpoint_ref_count_zero = s_s3_client_endpoint_ref_count_zero,
127124
.endpoint_shutdown_callback = s_s3_client_endpoint_shutdown_callback,
128125
.finish_destroy = s_s3_client_finish_destroy_default,
129126
};
@@ -682,7 +679,6 @@ struct aws_s3_meta_request *aws_s3_client_make_meta_request(
682679
if (was_created) {
683680
struct aws_s3_endpoint_options endpoint_options = {
684681
.host_name = endpoint_host_name,
685-
.ref_count_zero_callback = client->vtable->endpoint_ref_count_zero,
686682
.shutdown_callback = client->vtable->endpoint_shutdown_callback,
687683
.client_bootstrap = client->client_bootstrap,
688684
.tls_connection_options = is_https ? client->tls_connection_options : NULL,
@@ -699,7 +695,7 @@ struct aws_s3_meta_request *aws_s3_client_make_meta_request(
699695
error_occurred = true;
700696
goto unlock;
701697
}
702-
698+
endpoint->handled_by_client = true;
703699
endpoint_hash_element->value = endpoint;
704700
++client->synced_data.num_endpoints_allocated;
705701
} else {
@@ -736,33 +732,6 @@ struct aws_s3_meta_request *aws_s3_client_make_meta_request(
736732
return meta_request;
737733
}
738734

739-
static bool s_s3_client_endpoint_ref_count_zero(struct aws_s3_endpoint *endpoint) {
740-
AWS_PRECONDITION(endpoint);
741-
742-
struct aws_s3_client *client = endpoint->user_data;
743-
AWS_PRECONDITION(client);
744-
745-
bool clean_up_endpoint = false;
746-
747-
/* BEGIN CRITICAL SECTION */
748-
{
749-
aws_s3_client_lock_synced_data(client);
750-
751-
/* It is possible that before we were able to acquire the lock here, the critical section used for looking up
752-
* the endpoint in the table and assigning it to a new meta request was called in a different thread. To handle
753-
* this case, we check the ref count before removing it.*/
754-
if (aws_atomic_load_int(&endpoint->ref_count.ref_count) == 0) {
755-
aws_hash_table_remove(&client->synced_data.endpoints, endpoint->host_name, NULL, NULL);
756-
clean_up_endpoint = true;
757-
}
758-
759-
aws_s3_client_unlock_synced_data(client);
760-
}
761-
/* END CRITICAL SECTION */
762-
763-
return clean_up_endpoint;
764-
}
765-
766735
static void s_s3_client_endpoint_shutdown_callback(void *user_data) {
767736
struct aws_s3_client *client = user_data;
768737
AWS_PRECONDITION(client);
@@ -1677,7 +1646,8 @@ void aws_s3_client_notify_connection_finished(
16771646
aws_retry_token_release(connection->retry_token);
16781647
connection->retry_token = NULL;
16791648

1680-
aws_s3_endpoint_release(connection->endpoint);
1649+
/* The endpoint must be created by client here */
1650+
aws_s3_client_endpoint_release(client, connection->endpoint);
16811651
connection->endpoint = NULL;
16821652

16831653
aws_mem_release(client->allocator, connection);

source/s3_endpoint.c

+19-5
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ struct aws_s3_endpoint *aws_s3_endpoint_new(
104104
goto error_cleanup;
105105
}
106106

107-
endpoint->ref_count_zero_callback = options->ref_count_zero_callback;
108107
endpoint->shutdown_callback = options->shutdown_callback;
109108
endpoint->user_data = options->user_data;
110109

@@ -213,14 +212,29 @@ void aws_s3_endpoint_release(struct aws_s3_endpoint *endpoint) {
213212
aws_ref_count_release(&endpoint->ref_count);
214213
}
215214

215+
void aws_s3_client_endpoint_release(struct aws_s3_client *client, struct aws_s3_endpoint *endpoint) {
216+
AWS_PRECONDITION(endpoint);
217+
AWS_PRECONDITION(client);
218+
AWS_PRECONDITION(endpoint->handled_by_client);
219+
220+
/* BEGIN CRITICAL SECTION */
221+
{
222+
aws_s3_client_lock_synced_data(client);
223+
/* The last refcount to release */
224+
if (aws_atomic_load_int(&endpoint->ref_count.ref_count) == 1) {
225+
aws_hash_table_remove(&client->synced_data.endpoints, endpoint->host_name, NULL, NULL);
226+
}
227+
aws_s3_client_unlock_synced_data(client);
228+
}
229+
/* END CRITICAL SECTION */
230+
231+
aws_ref_count_release(&endpoint->ref_count);
232+
}
233+
216234
static void s_s3_endpoint_ref_count_zero(void *user_data) {
217235
struct aws_s3_endpoint *endpoint = user_data;
218236
AWS_PRECONDITION(endpoint);
219237

220-
if (endpoint->ref_count_zero_callback != NULL && !endpoint->ref_count_zero_callback(endpoint)) {
221-
return;
222-
}
223-
224238
if (endpoint->http_connection_manager != NULL) {
225239
struct aws_http_connection_manager *http_connection_manager = endpoint->http_connection_manager;
226240
endpoint->http_connection_manager = NULL;

source/s3_meta_request.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,8 @@ static void s_s3_meta_request_destroy(void *user_data) {
370370

371371
aws_cached_signing_config_destroy(meta_request->cached_signing_config);
372372
aws_mutex_clean_up(&meta_request->synced_data.lock);
373+
/* For a meta request created by client, the endpoint should always be cleaned up by meta request finish call. Just
374+
* use regular release here for those meta request not really went through the client */
373375
aws_s3_endpoint_release(meta_request->endpoint);
374376
aws_s3_client_release(meta_request->client);
375377

@@ -1402,7 +1404,8 @@ void aws_s3_meta_request_finish_default(struct aws_s3_meta_request *meta_request
14021404

14031405
aws_s3_meta_request_result_clean_up(meta_request, &finish_result);
14041406

1405-
aws_s3_endpoint_release(meta_request->endpoint);
1407+
/* The endpoint must be created by client here */
1408+
aws_s3_client_endpoint_release(meta_request->client, meta_request->endpoint);
14061409
meta_request->endpoint = NULL;
14071410

14081411
aws_s3_client_release(meta_request->client);

tests/CMakeLists.txt

-2
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,8 @@ add_net_test_case(test_s3_default_sending_meta_request_user_agent)
127127
add_net_test_case(test_s3_range_requests)
128128
add_net_test_case(test_s3_not_satisfiable_range)
129129

130-
add_net_test_case(test_s3_endpoint_ref)
131130
add_net_test_case(test_s3_bad_endpoint)
132131
add_net_test_case(test_s3_different_endpoints)
133-
add_net_test_case(test_s3_endpoint_resurrect)
134132

135133
add_test_case(test_s3_replace_quote_entities)
136134
add_test_case(test_s3_parse_content_range_response_header)

tests/s3_endpoint_tests.c

-122
Original file line numberDiff line numberDiff line change
@@ -8,128 +8,6 @@
88
#include <aws/io/channel_bootstrap.h>
99
#include <aws/testing/aws_test_harness.h>
1010

11-
static bool s_test_s3_endpoint_ref_zero_ref_callback(struct aws_s3_endpoint *endpoint) {
12-
struct aws_s3_tester *tester = endpoint->user_data;
13-
14-
if (aws_s3_tester_inc_counter1(tester) == 1) {
15-
return false;
16-
}
17-
18-
return true;
19-
}
20-
21-
static void s_test_s3_endpoint_ref_shutdown(void *user_data) {
22-
struct aws_s3_tester *tester = user_data;
23-
24-
aws_s3_tester_inc_counter1(tester);
25-
}
26-
27-
AWS_TEST_CASE(test_s3_endpoint_ref, s_test_s3_endpoint_ref)
28-
static int s_test_s3_endpoint_ref(struct aws_allocator *allocator, void *ctx) {
29-
(void)ctx;
30-
31-
struct aws_s3_tester tester;
32-
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester, false));
33-
aws_s3_tester_set_counter1_desired(&tester, 3);
34-
35-
struct aws_string *host_name =
36-
aws_s3_tester_build_endpoint_string(allocator, &g_test_public_bucket_name, &g_test_s3_region);
37-
38-
struct aws_s3_endpoint_options endpoint_options = {
39-
.host_name = host_name,
40-
.ref_count_zero_callback = s_test_s3_endpoint_ref_zero_ref_callback,
41-
.shutdown_callback = s_test_s3_endpoint_ref_shutdown,
42-
.client_bootstrap = tester.client_bootstrap,
43-
.tls_connection_options = NULL,
44-
.dns_host_address_ttl_seconds = 1,
45-
.user_data = &tester,
46-
.max_connections = 4,
47-
};
48-
49-
struct aws_s3_endpoint *endpoint = aws_s3_endpoint_new(allocator, &endpoint_options);
50-
51-
ASSERT_TRUE(endpoint->http_connection_manager != NULL);
52-
53-
/* During the first release, s_test_s3_endpoint_ref_zero_ref_callback will return false, which will mean it does not
54-
* get cleaned up. */
55-
aws_s3_endpoint_release(endpoint);
56-
ASSERT_TRUE(aws_atomic_load_int(&endpoint->ref_count.ref_count) == 0);
57-
58-
/* Perform an acquire, bumping the ref count to 1. */
59-
aws_s3_endpoint_acquire(endpoint);
60-
ASSERT_TRUE(aws_atomic_load_int(&endpoint->ref_count.ref_count) == 1);
61-
62-
/* During the secnod release, s_test_s3_endpoint_ref_zero_ref_callback will return true, causing clean up to
63-
* occur.*/
64-
aws_s3_endpoint_release(endpoint);
65-
66-
/* Wait for shutdown callback to increment the counter, hitting 3. */
67-
aws_s3_tester_wait_for_counters(&tester);
68-
69-
aws_string_destroy(host_name);
70-
aws_s3_tester_clean_up(&tester);
71-
72-
return 0;
73-
}
74-
75-
static bool s_test_s3_endpoint_resurrect_endpoint_ref_count_zero(struct aws_s3_endpoint *endpoint) {
76-
struct aws_s3_client *client = endpoint->user_data;
77-
struct aws_s3_tester *tester = client->shutdown_callback_user_data;
78-
79-
bool acquire_and_release_endpoint = false;
80-
81-
if (aws_s3_tester_inc_counter1(tester) == 1) {
82-
acquire_and_release_endpoint = true;
83-
aws_s3_endpoint_acquire(endpoint);
84-
}
85-
86-
struct aws_s3_client_vtable *original_client_vtable =
87-
aws_s3_tester_get_client_vtable_patch(tester, 0)->original_vtable;
88-
89-
bool result = original_client_vtable->endpoint_ref_count_zero(endpoint);
90-
91-
if (acquire_and_release_endpoint) {
92-
aws_s3_endpoint_release(endpoint);
93-
}
94-
95-
return result;
96-
}
97-
98-
AWS_TEST_CASE(test_s3_endpoint_resurrect, s_test_s3_endpoint_resurrect)
99-
static int s_test_s3_endpoint_resurrect(struct aws_allocator *allocator, void *ctx) {
100-
(void)ctx;
101-
102-
struct aws_s3_tester tester;
103-
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester, false));
104-
105-
struct aws_s3_tester_client_options client_options;
106-
AWS_ZERO_STRUCT(client_options);
107-
108-
struct aws_s3_client *client = NULL;
109-
ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client));
110-
111-
struct aws_s3_client_vtable *patched_client_vtable = aws_s3_tester_patch_client_vtable(&tester, client, NULL);
112-
patched_client_vtable->endpoint_ref_count_zero = s_test_s3_endpoint_resurrect_endpoint_ref_count_zero;
113-
114-
struct aws_s3_tester_meta_request_options options = {
115-
.allocator = allocator,
116-
.client = client,
117-
.meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT,
118-
.validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS,
119-
.get_options =
120-
{
121-
.object_path = g_pre_existing_object_1MB,
122-
},
123-
};
124-
125-
ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &options, NULL));
126-
127-
aws_s3_client_release(client);
128-
aws_s3_tester_clean_up(&tester);
129-
130-
return 0;
131-
}
132-
13311
AWS_TEST_CASE(test_s3_different_endpoints, s_test_s3_different_endpoints)
13412
static int s_test_s3_different_endpoints(struct aws_allocator *allocator, void *ctx) {
13513
(void)ctx;

tests/s3_tester.c

-7
Original file line numberDiff line numberDiff line change
@@ -595,12 +595,6 @@ static void s_s3_client_process_work_empty(struct aws_s3_client *client) {
595595
(void)client;
596596
}
597597

598-
static bool s_s3_client_endpoint_ref_count_zero_empty(struct aws_s3_endpoint *endpoint) {
599-
AWS_PRECONDITION(endpoint);
600-
(void)endpoint;
601-
return true;
602-
}
603-
604598
static void s_s3_client_endpoint_shutdown_callback_empty(void *user_data) {
605599
AWS_PRECONDITION(user_data);
606600
(void)user_data;
@@ -618,7 +612,6 @@ struct aws_s3_client_vtable g_aws_s3_client_mock_vtable = {
618612
.get_host_address_count = s_s3_client_get_host_address_count_empty,
619613
.schedule_process_work_synced = s_s3_client_schedule_process_work_synced_empty,
620614
.process_work = s_s3_client_process_work_empty,
621-
.endpoint_ref_count_zero = s_s3_client_endpoint_ref_count_zero_empty,
622615
.endpoint_shutdown_callback = s_s3_client_endpoint_shutdown_callback_empty,
623616
.finish_destroy = s_s3_client_finish_destroy_empty,
624617
};

0 commit comments

Comments
 (0)