hack the input stream

TingDaoK · TingDaoK · commit bc5336e079e1 · 2025-06-27T17:27:10.000-07:00
diff --git a/include/aws/s3/private/s3_parallel_input_stream.h b/include/aws/s3/private/s3_parallel_input_stream.h
@@ -100,7 +100,12 @@ struct aws_parallel_input_stream *aws_parallel_input_stream_new_from_file(
     struct aws_byte_cursor file_name);
 
 const char *aws_parallel_input_stream_get_file_path(struct aws_parallel_input_stream *stream);
-
+struct aws_input_stream *aws_input_stream_new_from_parallel(
+    struct aws_allocator *allocator,
+    struct aws_parallel_input_stream *parallel_stream,
+    uint64_t offset,
+    size_t request_body_size);
+void aws_s3_part_streaming_input_stream_reset(struct aws_input_stream *stream);
 AWS_EXTERN_C_END
 AWS_POP_SANE_WARNING_LEVEL
 
diff --git a/source/s3_auto_ranged_put.c b/source/s3_auto_ranged_put.c
@@ -976,47 +976,54 @@ struct aws_future_http_message *s_s3_prepare_upload_part(struct aws_s3_request *
     part_prep->request = request;
     part_prep->on_complete = aws_future_http_message_acquire(message_future);
     if (request->parallel) {
-        printf("PARALLEL\n");
-        uint64_t offset = 0;
-        size_t request_body_size = s_compute_request_body_size(meta_request, request->part_number, &offset);
-        request->request_stream = aws_input_stream_new_from_file(
-            allocator, aws_parallel_input_stream_get_file_path(meta_request->request_body_parallel_stream));
-        request->content_length = request_body_size;
-        aws_input_stream_seek(request->request_stream, offset, AWS_SSB_BEGIN);
-        struct aws_s3_auto_ranged_put *auto_ranged_put = meta_request->impl;
-
-        /* BEGIN CRITICAL SECTION */
-        aws_s3_meta_request_lock_synced_data(meta_request);
+        if (request->num_times_prepared == 0) {
+            uint64_t offset = 0;
+            size_t request_body_size = s_compute_request_body_size(meta_request, request->part_number, &offset);
+            request->request_stream = aws_input_stream_new_from_parallel(
+                allocator, meta_request->request_body_parallel_stream, offset, request_body_size);
+            request->content_length = request_body_size;
+            struct aws_s3_auto_ranged_put *auto_ranged_put = meta_request->impl;
+
+            /* BEGIN CRITICAL SECTION */
+            aws_s3_meta_request_lock_synced_data(meta_request);
 
-        --auto_ranged_put->synced_data.num_parts_pending_read;
+            --auto_ranged_put->synced_data.num_parts_pending_read;
 
-        auto_ranged_put->synced_data.is_body_stream_at_end = false;
-        if (!request->is_noop) {
-            /* The part can finish out of order. Resize array-list to be long enough to hold this part,
-             * filling any intermediate slots with NULL. */
-            aws_array_list_ensure_capacity(&auto_ranged_put->synced_data.part_list, request->part_number);
-            while (aws_array_list_length(&auto_ranged_put->synced_data.part_list) < request->part_number) {
-                struct aws_s3_mpu_part_info *null_part = NULL;
-                aws_array_list_push_back(&auto_ranged_put->synced_data.part_list, &null_part);
+            auto_ranged_put->synced_data.is_body_stream_at_end = false;
+            if (!request->is_noop) {
+                /* The part can finish out of order. Resize array-list to be long enough to hold this part,
+                 * filling any intermediate slots with NULL. */
+                aws_array_list_ensure_capacity(&auto_ranged_put->synced_data.part_list, request->part_number);
+                while (aws_array_list_length(&auto_ranged_put->synced_data.part_list) < request->part_number) {
+                    struct aws_s3_mpu_part_info *null_part = NULL;
+                    aws_array_list_push_back(&auto_ranged_put->synced_data.part_list, &null_part);
+                }
+                /* Add part to array-list */
+                struct aws_s3_mpu_part_info *part =
+                    aws_mem_calloc(meta_request->allocator, 1, sizeof(struct aws_s3_mpu_part_info));
+                part->size = request->request_body.len;
+                aws_array_list_set_at(&auto_ranged_put->synced_data.part_list, &part, request->part_number - 1);
             }
-            /* Add part to array-list */
-            struct aws_s3_mpu_part_info *part =
-                aws_mem_calloc(meta_request->allocator, 1, sizeof(struct aws_s3_mpu_part_info));
-            part->size = request->request_body.len;
-            aws_array_list_set_at(&auto_ranged_put->synced_data.part_list, &part, request->part_number - 1);
-        }
-        aws_s3_meta_request_unlock_synced_data(meta_request);
-        /* END CRITICAL SECTION */
+            aws_s3_meta_request_unlock_synced_data(meta_request);
+            /* END CRITICAL SECTION */
 
-        /* We throttle the number of parts that can be "pending read"
-         * (e.g. only 1 at a time if reading from async-stream).
-         * Now that read is complete, poke the client to see if it can give us more work.
-         *
-         * Poking now gives measurable speedup (1%) for async streaming,
-         * vs waiting until all the part-prep steps are complete (still need to sign, etc) */
-        aws_s3_client_schedule_process_work(meta_request->client);
+            /* We throttle the number of parts that can be "pending read"
+             * (e.g. only 1 at a time if reading from async-stream).
+             * Now that read is complete, poke the client to see if it can give us more work.
+             *
+             * Poking now gives measurable speedup (1%) for async streaming,
+             * vs waiting until all the part-prep steps are complete (still need to sign, etc) */
+            aws_s3_client_schedule_process_work(meta_request->client);
 
-        s_s3_prepare_upload_part_finish(part_prep, AWS_ERROR_SUCCESS);
+            s_s3_prepare_upload_part_finish(part_prep, AWS_ERROR_SUCCESS);
+        } else {
+            printf("PARALLEL retry\n");
+            /* Not the first time preparing request (e.g. retry).
+             * We can skip over the async steps that read the body stream */
+            /* Seek back to beginning of the stream. */
+            aws_s3_part_streaming_input_stream_reset(request->request_stream);
+            s_s3_prepare_upload_part_finish(part_prep, AWS_ERROR_SUCCESS);
+        }
     } else if (request->num_times_prepared == 0) {
         /* Preparing request for the first time.
          * Next async step: read through the body stream until we've
diff --git a/source/s3_parallel_input_stream.c b/source/s3_parallel_input_stream.c
@@ -143,3 +143,100 @@ const char *aws_parallel_input_stream_get_file_path(struct aws_parallel_input_st
     struct aws_parallel_input_stream_from_file_impl *impl = stream->impl;
     return aws_string_c_str(impl->file_path);
 }
+
+struct aws_s3_part_streaming_input_stream_impl {
+    struct aws_input_stream base;
+    struct aws_input_stream *base_stream;
+    size_t offset;
+    size_t total_length;
+    size_t length_read;
+    struct aws_allocator *allocator;
+};
+
+static int s_aws_s3_part_streaming_input_stream_seek(
+    struct aws_input_stream *stream,
+    int64_t offset,
+    enum aws_stream_seek_basis basis) {
+    struct aws_s3_part_streaming_input_stream_impl *test_input_stream =
+        AWS_CONTAINER_OF(stream, struct aws_s3_part_streaming_input_stream_impl, base);
+    aws_input_stream_seek(test_input_stream->base_stream, offset + test_input_stream->offset, basis);
+    return AWS_OP_ERR;
+}
+
+static int s_aws_s3_part_streaming_input_stream_read(struct aws_input_stream *stream, struct aws_byte_buf *dest) {
+    struct aws_s3_part_streaming_input_stream_impl *test_input_stream =
+        AWS_CONTAINER_OF(stream, struct aws_s3_part_streaming_input_stream_impl, base);
+    int rt = aws_input_stream_read(test_input_stream->base_stream, dest);
+    test_input_stream->length_read += dest->len;
+    return rt;
+}
+
+static int s_aws_s3_part_streaming_input_stream_get_status(
+    struct aws_input_stream *stream,
+    struct aws_stream_status *status) {
+    (void)stream;
+    (void)status;
+
+    struct aws_s3_part_streaming_input_stream_impl *test_input_stream =
+        AWS_CONTAINER_OF(stream, struct aws_s3_part_streaming_input_stream_impl, base);
+
+    status->is_end_of_stream = test_input_stream->length_read == test_input_stream->total_length;
+    status->is_valid = true;
+
+    return AWS_OP_SUCCESS;
+}
+
+static int s_aws_s3_part_streaming_input_stream_get_length(struct aws_input_stream *stream, int64_t *out_length) {
+    AWS_ASSERT(stream != NULL);
+    struct aws_s3_part_streaming_input_stream_impl *test_input_stream =
+        AWS_CONTAINER_OF(stream, struct aws_s3_part_streaming_input_stream_impl, base);
+    *out_length = (int64_t)test_input_stream->total_length;
+    return AWS_OP_SUCCESS;
+}
+
+static void s_aws_s3_part_streaming_input_stream_destroy(
+    struct aws_s3_part_streaming_input_stream_impl *test_input_stream) {
+    aws_input_stream_release(test_input_stream->base_stream);
+    aws_mem_release(test_input_stream->allocator, test_input_stream);
+}
+
+static struct aws_input_stream_vtable s_aws_s3_part_streaming_input_stream_vtable = {
+    .seek = s_aws_s3_part_streaming_input_stream_seek,
+    .read = s_aws_s3_part_streaming_input_stream_read,
+    .get_status = s_aws_s3_part_streaming_input_stream_get_status,
+    .get_length = s_aws_s3_part_streaming_input_stream_get_length,
+};
+
+void aws_s3_part_streaming_input_stream_reset(struct aws_input_stream *stream) {
+    struct aws_s3_part_streaming_input_stream_impl *test_input_stream =
+        AWS_CONTAINER_OF(stream, struct aws_s3_part_streaming_input_stream_impl, base);
+    test_input_stream->length_read = 0;
+    aws_input_stream_seek(test_input_stream->base_stream, test_input_stream->offset, AWS_SSB_BEGIN);
+}
+
+struct aws_input_stream *aws_input_stream_new_from_parallel(
+    struct aws_allocator *allocator,
+    struct aws_parallel_input_stream *parallel_stream,
+    uint64_t offset,
+    size_t request_body_size) {
+
+    struct aws_s3_part_streaming_input_stream_impl *test_input_stream =
+        aws_mem_calloc(allocator, 1, sizeof(struct aws_s3_part_streaming_input_stream_impl));
+    aws_ref_count_init(
+        &test_input_stream->base.ref_count,
+        test_input_stream,
+        (aws_simple_completion_callback *)s_aws_s3_part_streaming_input_stream_destroy);
+
+    test_input_stream->base.vtable = &s_aws_s3_part_streaming_input_stream_vtable;
+    struct aws_parallel_input_stream_from_file_impl *impl = parallel_stream->impl;
+    aws_mem_calloc(allocator, 1, sizeof(struct aws_parallel_input_stream_from_file_impl));
+    aws_parallel_input_stream_init_base(&impl->base, allocator, &s_parallel_input_stream_from_file_vtable, impl);
+
+    test_input_stream->base_stream = aws_input_stream_new_from_file(allocator, aws_string_c_str(impl->file_path));
+    test_input_stream->total_length = request_body_size;
+    test_input_stream->offset = offset;
+    test_input_stream->length_read = 0;
+    aws_input_stream_seek(test_input_stream->base_stream, offset, AWS_SSB_BEGIN);
+
+    return &test_input_stream->base;
+}