4545#include <inttypes.h>
4646#include <math.h>
4747
48+ /*
49+ * The s3 client splits user operations (meta requests) into concurrent HTTP requests
50+ * and schedules them across pooled connections.
51+ *
52+ * A meta request is a user-initiated operation (GetObject, PutObject, etc). Each type
53+ * has a state machine that produces individual HTTP requests on demand (range-GETs,
54+ * UploadParts, etc). See s3_auto_ranged_get.c, s3_auto_ranged_put.c, s3_copy_object.c,
55+ * s3_default_meta_request.c.
56+ *
57+ * A request goes through: buffer acquire -> prepare (build message + read body) -> sign -> queue -> send.
58+ *
59+ * An endpoint represents an S3 hostname and owns an HTTP connection manager that pools
60+ * connections to that host.
61+ *
62+ * Threading model:
63+ *
64+ * All scheduling decisions run on a single event loop thread (process_work_event_loop),
65+ * pinned at client init from the bootstrap ELG. This means threaded_data needs no lock.
66+ *
67+ * Other threads (user threads, prepare callbacks, connection callbacks) push work into
68+ * synced_data (protected by a mutex) and call schedule_process_work_synced() to wake
69+ * the work loop. The lock is only held long enough to move list pointers.
70+ *
71+ * Client data falls into three categories:
72+ *
73+ * synced_data - mutex-protected, any thread. Mailbox for pending_meta_request_work
74+ * and prepared_requests. External threads deposit here, work loop drains.
75+ *
76+ * threaded_data - no lock, only the process_work_event_loop thread touches it.
77+ * Contains request_queue, active meta_requests list, and prep counters.
78+ * Functions with "_threaded" suffix operate on this data.
79+ *
80+ * stats - atomic counters (num_requests_in_flight, etc). Any thread, no lock.
81+ *
82+ * Event loop groups:
83+ *
84+ * Bootstrap ELG (client_bootstrap->event_loop_group) drives networking. HTTP connections
85+ * are pinned to loops in this group. One loop from this group is designated as the
86+ * process_work_event_loop at init; it still handles normal networking work too.
87+ *
88+ * Body Streaming ELG (body_streaming_elg) is a separate group for delivering response
89+ * bodies to user callbacks and for async request preparation. Keeps slow user callbacks
90+ * from blocking networking or scheduling.
91+ *
92+ * The process_work_event_loop clock (aws_event_loop_current_clock_time) is used to
93+ * schedule delayed housekeeping via schedule_task_future():
94+ * - Buffer pool trim after 5s idle.
95+ * - Endpoint cleanup every 5s (removes endpoints with zero references).
96+ *
97+ * Scheduling flow (see comments above s_s3_client_schedule_process_work_synced):
98+ *
99+ * 1. External thread deposits work into synced_data, calls schedule_process_work_synced()
100+ * which posts a task to the event loop.
101+ * 2. The event loop runs s_s3_client_process_work_default() which drains synced_data into
102+ * threaded_data, updates meta requests to produce new requests, and assigns queued
103+ * requests to HTTP connections.
104+ * 3. Requests complete on networking threads, deposit results back into synced_data, and
105+ * re-trigger the work loop.
106+ */
107+
48108#ifdef _MSC_VER
49109# pragma warning(disable : 4232) /* function pointer to dll symbol */
50110#endif /* _MSC_VER */
@@ -1524,6 +1584,76 @@ static void s_s3_client_push_meta_request_synced(
15241584 aws_linked_list_push_back (& client -> synced_data .pending_meta_request_work , & meta_request_work -> node );
15251585}
15261586
1587+ /*
1588+ * The scheduling system is a single-threaded work loop driven by event loop tasks.
1589+ *
1590+ * Any thread can call schedule_process_work_synced() (under lock) to post a task to
1591+ * the process_work_event_loop. If a task is already pending, this is a no-op.
1592+ * The task runs s_s3_client_process_work_task(), which calls the vtable's process_work,
1593+ * defaulting to s_s3_client_process_work_default(). All scheduling decisions happen there.
1594+ *
1595+ * schedule_process_work_synced() is called from:
1596+ * - aws_s3_client_make_meta_request() when the user creates a new operation.
1597+ * - s_s3_client_prepare_callback_queue_request() when a request finishes preparing.
1598+ * - s_s3_client_meta_request_finished_request() when a request completes on the network.
1599+ * - aws_s3_client_schedule_process_work() when a meta request's internal state changes.
1600+ * - s_s3_client_endpoint_ref_count_zero() when an endpoint is cleaned up.
1601+ *
1602+ * s_s3_client_process_work_default() runs in five steps:
1603+ * 1. Lock, swap pending_meta_request_work and prepared_requests into thread-local storage,
1604+ * adjust num_requests_being_prepared, set process_work_task_scheduled=false, unlock.
1605+ * 2. Add newly arrived meta requests to threaded_data.meta_requests.
1606+ * 3. Call update_meta_requests_threaded() to ask each meta request for its next request,
1607+ * then update_connections_threaded() to assign queued requests to HTTP connections.
1608+ * 4. Log stats.
1609+ * 5. If the client is shutting down and all work is drained, call finish_destroy().
1610+ *
1611+ * A request moves through the pipeline as follows:
1612+ * meta_request->update() produces a request, which goes to s_acquire_mem_and_prepare_request()
1613+ * to reserve a buffer from the pool (may block if memory is full). Then vtable->prepare_request()
1614+ * builds the HTTP message (for PUT, this reads the body from the input stream). Then
1615+ * s_s3_meta_request_sign_request() signs it with SigV4. The signed request is pushed to
1616+ * synced_data.prepared_requests and the work loop is re-triggered. On the next run,
1617+ * process_work_default() drains it into threaded_data.request_queue, and
1618+ * update_connections_threaded() assigns it to an HTTP connection via the retry strategy
1619+ * and connection manager.
1620+ *
1621+ * Several counters track requests through the pipeline and enforce flow control:
1622+ *
1623+ * num_requests_in_flight (client, atomic) tracks requests from the moment they enter
1624+ * prepare until they complete on the network. Incremented in update_meta_requests_threaded(),
1625+ * decremented in s_s3_client_meta_request_finished_request(). Checked against
1626+ * max_requests_in_flight (= ideal_connection_count * 4) in s_s3_client_should_update_meta_request().
1627+ *
1628+ * num_requests_being_prepared (client, threaded_data) tracks requests currently in the
1629+ * prepare+sign stage. Incremented in update_meta_requests_threaded(), decremented in
1630+ * process_work_default() step 1 by subtracting newly queued and failed counts. Checked
1631+ * against max_requests_prepare (= ideal_connection_count) in s_s3_client_should_update_meta_request().
1632+ *
1633+ * num_request_being_prepared (per meta_request, atomic) is the same window as above but
1634+ * scoped to a single meta request. Incremented in update_meta_requests_threaded(), decremented
1635+ * in s_s3_client_prepare_callback_queue_request(). Checked against the per-meta-request
1636+ * connection cap in s_s3_client_should_update_meta_request().
1637+ *
1638+ * num_requests_network (per meta_request, atomic) and num_requests_network_io (client, atomic)
1639+ * track requests from connection assignment until network completion. Incremented in
1640+ * s_s3_client_create_connection_for_request_default(), decremented in
1641+ * s_s3_client_connection_finished(). Checked against max_active_connections in
1642+ * update_connections_threaded().
1643+ *
1644+ * num_parts_pending_read (PUT only, per meta_request, synced_data) tracks UploadPart requests
1645+ * from the moment update() produces them until the body read completes. Incremented in
1646+ * s_s3_auto_ranged_put_update() (s3_auto_ranged_put.c), decremented in
1647+ * s_s3_new_upload_part_info_after_body() (s3_auto_ranged_put.c). Checked by
1648+ * s_should_skip_scheduling_more_parts_based_on_flags() to limit read-ahead to 5 parts,
1649+ * preventing wasted buffer memory when reads are sequential. This counter is internal
1650+ * to the PUT meta request; the client does not see it.
1651+ *
1652+ * request_queue_size (client, threaded_data) counts prepared requests waiting for connection
1653+ * assignment. Updated by aws_s3_client_queue_requests_threaded() and
1654+ * aws_s3_client_dequeue_request_threaded(). Included in the prepare pipeline check in
1655+ * s_s3_client_should_update_meta_request().
1656+ */
15271657static void s_s3_client_schedule_process_work_synced (struct aws_s3_client * client ) {
15281658 AWS_PRECONDITION (client );
15291659 AWS_PRECONDITION (client -> vtable );
@@ -2109,6 +2239,9 @@ void s_acquire_mem_and_prepare_request(
21092239 aws_s3_meta_request_prepare_request (request -> meta_request , request , callback , user_data );
21102240}
21112241
2242+ /* Iterate all active meta requests and call update() on each to get the next request to send.
2243+ * If update() returns a request, hand it off to async preparation. If update() returns
2244+ * work_remaining=false, the meta request is done and gets removed from threaded_data. */
21122245void aws_s3_client_update_meta_requests_threaded (struct aws_s3_client * client ) {
21132246 AWS_PRECONDITION (client );
21142247
@@ -2121,6 +2254,16 @@ void aws_s3_client_update_meta_requests_threaded(struct aws_s3_client *client) {
21212254 uint32_t num_requests_in_flight = (uint32_t )aws_atomic_load_int (& client -> stats .num_requests_in_flight );
21222255 uint32_t num_requests_streaming = (uint32_t )aws_atomic_load_int (& client -> stats .num_requests_streaming_request_body );
21232256
2257+ /* Two passes over the meta request list. The first pass is conservative: each meta request
2258+ * type self-limits how many requests it produces (GET caps at 8 in-flight, PUT caps at
2259+ * 1 pending read, COPY caps at 1 in-flight). This keeps any single meta request from
2260+ * flooding the prepare pipeline.
2261+ * The second pass has no per-type restriction, letting meta requests fill remaining
2262+ * capacity on a first-come-first-served basis. In this pass the only limits are:
2263+ * - GET: no internal limit (only client-level caps apply).
2264+ * - PUT: s_max_parts_pending_read (5) limits concurrent body reads.
2265+ * - COPY: no internal limit (only client-level caps apply).
2266+ * - Default: produces at most one request total (single-request operation). */
21242267 const uint32_t pass_flags [] = {
21252268 AWS_S3_META_REQUEST_UPDATE_FLAG_CONSERVATIVE ,
21262269 0 ,
0 commit comments