diff --git a/source/s3_client.c b/source/s3_client.c index 7875542f9..3e88623b9 100644 --- a/source/s3_client.c +++ b/source/s3_client.c @@ -45,6 +45,66 @@ #include #include +/* + * The s3 client splits user operations (meta requests) into concurrent HTTP requests + * and schedules them across pooled connections. + * + * A meta request is a user-initiated operation (GetObject, PutObject, etc). Each type + * has a state machine that produces individual HTTP requests on demand (range-GETs, + * UploadParts, etc). See s3_auto_ranged_get.c, s3_auto_ranged_put.c, s3_copy_object.c, + * s3_default_meta_request.c. + * + * A request goes through: buffer acquire -> prepare (build message + read body) -> sign -> queue -> send. + * + * An endpoint represents an S3 hostname and owns an HTTP connection manager that pools + * connections to that host. + * + * Threading model: + * + * All scheduling decisions run on a single event loop thread (process_work_event_loop), + * pinned at client init from the bootstrap ELG. This means threaded_data needs no lock. + * + * Other threads (user threads, prepare callbacks, connection callbacks) push work into + * synced_data (protected by a mutex) and call schedule_process_work_synced() to wake + * the work loop. The lock is only held long enough to move list pointers. + * + * Client data falls into three categories: + * + * synced_data - mutex-protected, any thread. Mailbox for pending_meta_request_work + * and prepared_requests. External threads deposit here, work loop drains. + * + * threaded_data - no lock, only the process_work_event_loop thread touches it. + * Contains request_queue, active meta_requests list, and prep counters. + * Functions with "_threaded" suffix operate on this data. + * + * stats - atomic counters (num_requests_in_flight, etc). Any thread, no lock. + * + * Event loop groups: + * + * Bootstrap ELG (client_bootstrap->event_loop_group) drives networking. HTTP connections + * are pinned to loops in this group. One loop from this group is designated as the + * process_work_event_loop at init; it still handles normal networking work too. + * + * Body Streaming ELG (body_streaming_elg) is a separate group for delivering response + * bodies to user callbacks and for async request preparation. Keeps slow user callbacks + * from blocking networking or scheduling. + * + * The process_work_event_loop clock (aws_event_loop_current_clock_time) is used to + * schedule delayed housekeeping via schedule_task_future(): + * - Buffer pool trim after 5s idle. + * - Endpoint cleanup every 5s (removes endpoints with zero references). + * + * Scheduling flow (see comments above s_s3_client_schedule_process_work_synced): + * + * 1. External thread deposits work into synced_data, calls schedule_process_work_synced() + * which posts a task to the event loop. + * 2. The event loop runs s_s3_client_process_work_default() which drains synced_data into + * threaded_data, updates meta requests to produce new requests, and assigns queued + * requests to HTTP connections. + * 3. Requests complete on networking threads, deposit results back into synced_data, and + * re-trigger the work loop. + */ + #ifdef _MSC_VER # pragma warning(disable : 4232) /* function pointer to dll symbol */ #endif /* _MSC_VER */ @@ -1524,6 +1584,76 @@ static void s_s3_client_push_meta_request_synced( aws_linked_list_push_back(&client->synced_data.pending_meta_request_work, &meta_request_work->node); } +/* + * The scheduling system is a single-threaded work loop driven by event loop tasks. + * + * Any thread can call schedule_process_work_synced() (under lock) to post a task to + * the process_work_event_loop. If a task is already pending, this is a no-op. + * The task runs s_s3_client_process_work_task(), which calls the vtable's process_work, + * defaulting to s_s3_client_process_work_default(). All scheduling decisions happen there. + * + * schedule_process_work_synced() is called from: + * - aws_s3_client_make_meta_request() when the user creates a new operation. + * - s_s3_client_prepare_callback_queue_request() when a request finishes preparing. + * - s_s3_client_meta_request_finished_request() when a request completes on the network. + * - aws_s3_client_schedule_process_work() when a meta request's internal state changes. + * - s_s3_client_endpoint_ref_count_zero() when an endpoint is cleaned up. + * + * s_s3_client_process_work_default() runs in five steps: + * 1. Lock, swap pending_meta_request_work and prepared_requests into thread-local storage, + * adjust num_requests_being_prepared, set process_work_task_scheduled=false, unlock. + * 2. Add newly arrived meta requests to threaded_data.meta_requests. + * 3. Call update_meta_requests_threaded() to ask each meta request for its next request, + * then update_connections_threaded() to assign queued requests to HTTP connections. + * 4. Log stats. + * 5. If the client is shutting down and all work is drained, call finish_destroy(). + * + * A request moves through the pipeline as follows: + * meta_request->update() produces a request, which goes to s_acquire_mem_and_prepare_request() + * to reserve a buffer from the pool (may block if memory is full). Then vtable->prepare_request() + * builds the HTTP message (for PUT, this reads the body from the input stream). Then + * s_s3_meta_request_sign_request() signs it with SigV4. The signed request is pushed to + * synced_data.prepared_requests and the work loop is re-triggered. On the next run, + * process_work_default() drains it into threaded_data.request_queue, and + * update_connections_threaded() assigns it to an HTTP connection via the retry strategy + * and connection manager. + * + * Several counters track requests through the pipeline and enforce flow control: + * + * num_requests_in_flight (client, atomic) tracks requests from the moment they enter + * prepare until they complete on the network. Incremented in update_meta_requests_threaded(), + * decremented in s_s3_client_meta_request_finished_request(). Checked against + * max_requests_in_flight (= ideal_connection_count * 4) in s_s3_client_should_update_meta_request(). + * + * num_requests_being_prepared (client, threaded_data) tracks requests currently in the + * prepare+sign stage. Incremented in update_meta_requests_threaded(), decremented in + * process_work_default() step 1 by subtracting newly queued and failed counts. Checked + * against max_requests_prepare (= ideal_connection_count) in s_s3_client_should_update_meta_request(). + * + * num_request_being_prepared (per meta_request, atomic) is the same window as above but + * scoped to a single meta request. Incremented in update_meta_requests_threaded(), decremented + * in s_s3_client_prepare_callback_queue_request(). Checked against the per-meta-request + * connection cap in s_s3_client_should_update_meta_request(). + * + * num_requests_network (per meta_request, atomic) and num_requests_network_io (client, atomic) + * track requests from connection assignment until network completion. Incremented in + * s_s3_client_create_connection_for_request_default(), decremented in + * s_s3_client_connection_finished(). Checked against max_active_connections in + * update_connections_threaded(). + * + * num_parts_pending_read (PUT only, per meta_request, synced_data) tracks UploadPart requests + * from the moment update() produces them until the body read completes. Incremented in + * s_s3_auto_ranged_put_update() (s3_auto_ranged_put.c), decremented in + * s_s3_new_upload_part_info_after_body() (s3_auto_ranged_put.c). Checked by + * s_should_skip_scheduling_more_parts_based_on_flags() to limit read-ahead to 5 parts, + * preventing wasted buffer memory when reads are sequential. This counter is internal + * to the PUT meta request; the client does not see it. + * + * request_queue_size (client, threaded_data) counts prepared requests waiting for connection + * assignment. Updated by aws_s3_client_queue_requests_threaded() and + * aws_s3_client_dequeue_request_threaded(). Included in the prepare pipeline check in + * s_s3_client_should_update_meta_request(). + */ static void s_s3_client_schedule_process_work_synced(struct aws_s3_client *client) { AWS_PRECONDITION(client); AWS_PRECONDITION(client->vtable); @@ -2109,6 +2239,9 @@ void s_acquire_mem_and_prepare_request( aws_s3_meta_request_prepare_request(request->meta_request, request, callback, user_data); } +/* Iterate all active meta requests and call update() on each to get the next request to send. + * If update() returns a request, hand it off to async preparation. If update() returns + * work_remaining=false, the meta request is done and gets removed from threaded_data. */ void aws_s3_client_update_meta_requests_threaded(struct aws_s3_client *client) { AWS_PRECONDITION(client); @@ -2121,6 +2254,16 @@ void aws_s3_client_update_meta_requests_threaded(struct aws_s3_client *client) { uint32_t num_requests_in_flight = (uint32_t)aws_atomic_load_int(&client->stats.num_requests_in_flight); uint32_t num_requests_streaming = (uint32_t)aws_atomic_load_int(&client->stats.num_requests_streaming_request_body); + /* Two passes over the meta request list. The first pass is conservative: each meta request + * type self-limits how many requests it produces (GET caps at 8 in-flight, PUT caps at + * 1 pending read, COPY caps at 1 in-flight). This keeps any single meta request from + * flooding the prepare pipeline. + * The second pass has no per-type restriction, letting meta requests fill remaining + * capacity on a first-come-first-served basis. In this pass the only limits are: + * - GET: no internal limit (only client-level caps apply). + * - PUT: s_max_parts_pending_read (5) limits concurrent body reads. + * - COPY: no internal limit (only client-level caps apply). + * - Default: produces at most one request total (single-request operation). */ const uint32_t pass_flags[] = { AWS_S3_META_REQUEST_UPDATE_FLAG_CONSERVATIVE, 0, diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index e79d93249..f02f7171e 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -3,6 +3,48 @@ * SPDX-License-Identifier: Apache-2.0. */ +/* + * A meta request represents a single user-initiated S3 operation (GetObject, PutObject, + * CopyObject, or a pass-through default request). It is a state machine that produces + * individual HTTP requests on demand, not a pre-planned list. The client's work loop + * calls vtable->update() repeatedly, and the meta request decides what to produce next + * based on its current state and the results of previous requests. + * + * For PutObject, the state machine starts by producing a CreateMultipartUpload request. + * Once the response arrives with an upload_id, subsequent update() calls produce UploadPart + * requests until flow-control limits are hit or all parts are started. After all parts + * complete, it produces a CompleteMultipartUpload. When that finishes, update() returns + * work_remaining=false and the client removes the meta request. + * + * For GetObject, the state machine first sends a HeadObject or a ranged GET for part 1 to + * discover the object size. Once the size is known, subsequent update() calls produce + * ranged GETs (Range: bytes=X-Y) for each part until all parts are requested. + * + * Every aws_s3_request carries a backlink (request->meta_request) and a request_tag so + * completions route back to the right meta request and it knows which sub-operation + * finished. The client does not understand PUT/GET/COPY logic; it just calls update() + * on each meta request and routes completions back via the backlink by calling + * aws_s3_meta_request_finished_request(). + * + * Each meta request type implements three vtable functions: + * update() - produce the next request based on current state. + * prepare_request() - build the HTTP message (headers, body). + * finished_request() - handle completion, advance the state machine. + * See s3_auto_ranged_get.c, s3_auto_ranged_put.c, s3_copy_object.c, + * s3_default_meta_request.c. + * + * update() is called from the client's process_work_event_loop thread. + * prepare_request() runs async on the body_streaming_elg or the meta request's + * io_event_loop. finished_request() can be called from any networking thread. + * The meta request has its own synced_data/lock for state shared across threads, + * following the same pattern as the client. + * + * This file manages the preparation pipeline: vtable->prepare_request() builds the + * HTTP message and reads the body, then s_s3_meta_request_sign_request() signs it + * with SigV4, then the callback notifies the client that the request is ready for + * connection assignment. + */ + #include "aws/s3/private/s3_auto_ranged_get.h" #include "aws/s3/private/s3_auto_ranged_put.h" #include "aws/s3/private/s3_checksums.h"