From b59f4ebc79c3135c5197a04d0ad575646a553dab Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Tue, 5 May 2026 21:46:04 +0000 Subject: [PATCH 1/6] poc --- include/aws/s3/private/s3_meta_request_impl.h | 4 + include/aws/s3/s3_client.h | 3 +- source/s3_meta_request.c | 186 ++++++++++++++---- tests/CMakeLists.txt | 2 + tests/s3_data_plane_tests.c | 91 +++++++++ 5 files changed, 245 insertions(+), 41 deletions(-) diff --git a/include/aws/s3/private/s3_meta_request_impl.h b/include/aws/s3/private/s3_meta_request_impl.h index e9f3b5dc7..687d99af3 100644 --- a/include/aws/s3/private/s3_meta_request_impl.h +++ b/include/aws/s3/private/s3_meta_request_impl.h @@ -317,6 +317,10 @@ struct aws_s3_meta_request { FILE *recv_file; struct aws_string *recv_filepath; bool recv_file_delete_on_failure; + /* When true, use O_DIRECT for writing received data to file */ + bool recv_file_direct_io; + /* Base file position for O_DIRECT writes (from recv_file_position option) */ + uint64_t recv_file_base_position; /* File I/O options. */ struct aws_s3_file_io_options fio_opts; diff --git a/include/aws/s3/s3_client.h b/include/aws/s3/s3_client.h index 0513e5768..f6d8e8445 100644 --- a/include/aws/s3/s3_client.h +++ b/include/aws/s3/s3_client.h @@ -349,7 +349,8 @@ struct aws_s3_file_io_options { * Enable direct IO to bypass the OS cache. Helpful when the disk I/O outperforms the kernel cache. * Notes: * - Only supported on linux for now. - * - Only supports upload for now. + * - Supported for both upload (send_filepath) and download (recv_filepath). + * - For download, O_DIRECT is not supported with AWS_S3_RECV_FILE_CREATE_OR_APPEND. * - Check NOTES for O_DIRECT for additional info https://man7.org/linux/man-pages/man2/openat.2.html * In summary, O_DIRECT is a potentially powerful tool that should be used with caution. */ diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index e79d93249..84acb212f 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -287,51 +287,92 @@ int aws_s3_meta_request_init_base( if (options->recv_filepath.len > 0) { meta_request->recv_filepath = aws_string_new_from_cursor(allocator, &options->recv_filepath); - switch (options->recv_file_option) { - case AWS_S3_RECV_FILE_CREATE_OR_REPLACE: - meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); - break; + meta_request->recv_file_delete_on_failure = options->recv_file_delete_on_failure; - case AWS_S3_RECV_FILE_CREATE_NEW: - if (aws_path_exists(meta_request->recv_filepath)) { - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Cannot receive file via CREATE_NEW: file already exists", - (void *)meta_request); - aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS); - break; - } else { + /* When direct_io is enabled, use O_DIRECT fd-based writes instead of FILE* fwrite. + * Supported for CREATE_OR_REPLACE, CREATE_NEW, and WRITE_TO_POSITION. + * APPEND is incompatible with O_DIRECT (no offset-based writes). */ + if (meta_request->fio_opts.direct_io && + options->recv_file_option != AWS_S3_RECV_FILE_CREATE_OR_APPEND) { + + /* Validate preconditions same as the FILE* path */ + if (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW && + aws_path_exists(meta_request->recv_filepath)) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Cannot receive file via CREATE_NEW: file already exists", + (void *)meta_request); + aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS); + goto error; + } + if (options->recv_file_option == AWS_S3_RECV_FILE_WRITE_TO_POSITION && + !aws_path_exists(meta_request->recv_filepath)) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Cannot receive file via WRITE_TO_POSITION: file not found.", + (void *)meta_request); + aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND); + goto error; + } + + meta_request->recv_file_direct_io = true; + meta_request->recv_file_base_position = + (options->recv_file_option == AWS_S3_RECV_FILE_WRITE_TO_POSITION) ? options->recv_file_position : 0; + + AWS_LOGF_DEBUG( + AWS_LS_S3_META_REQUEST, + "id=%p: O_DIRECT enabled for download write path. base_position:%" PRIu64, + (void *)meta_request, + meta_request->recv_file_base_position); + } else { + /* Standard FILE* path */ + switch (options->recv_file_option) { + case AWS_S3_RECV_FILE_CREATE_OR_REPLACE: meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); break; - } - case AWS_S3_RECV_FILE_CREATE_OR_APPEND: - meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "ab"); - break; - case AWS_S3_RECV_FILE_WRITE_TO_POSITION: - if (!aws_path_exists(meta_request->recv_filepath)) { - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Cannot receive file via WRITE_TO_POSITION: file not found.", - (void *)meta_request); - aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND); - break; - } else { - meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); - if (meta_request->recv_file && - aws_fseek(meta_request->recv_file, options->recv_file_position, SEEK_SET) != AWS_OP_SUCCESS) { - /* error out. */ - goto error; + + case AWS_S3_RECV_FILE_CREATE_NEW: + if (aws_path_exists(meta_request->recv_filepath)) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Cannot receive file via CREATE_NEW: file already exists", + (void *)meta_request); + aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS); + break; + } else { + meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); + break; } + case AWS_S3_RECV_FILE_CREATE_OR_APPEND: + meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "ab"); break; - } + case AWS_S3_RECV_FILE_WRITE_TO_POSITION: + if (!aws_path_exists(meta_request->recv_filepath)) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Cannot receive file via WRITE_TO_POSITION: file not found.", + (void *)meta_request); + aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND); + break; + } else { + meta_request->recv_file = + aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); + if (meta_request->recv_file && + aws_fseek(meta_request->recv_file, options->recv_file_position, SEEK_SET) != + AWS_OP_SUCCESS) { + goto error; + } + break; + } - default: - AWS_ASSERT(false); - aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); - break; - } - if (!meta_request->recv_file) { - goto error; + default: + AWS_ASSERT(false); + aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); + break; + } + if (!meta_request->recv_file) { + goto error; + } } } @@ -569,6 +610,9 @@ static void s_s3_meta_request_destroy(void *user_data) { /* If the meta request succeed, the file should be closed from finish call. So it must be failing. */ aws_file_delete(meta_request->recv_filepath); } + } else if (meta_request->recv_file_direct_io && meta_request->recv_file_delete_on_failure) { + /* O_DIRECT path: no FILE* to close, but still honor delete-on-failure during teardown */ + aws_file_delete(meta_request->recv_filepath); } aws_string_destroy(meta_request->recv_filepath); @@ -2139,7 +2183,65 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a aws_high_res_clock_get_ticks((uint64_t *)&metric->time_metrics.deliver_start_timestamp_ns); } - if (meta_request->recv_file) { + if (meta_request->recv_file_direct_io) { + /* O_DIRECT write path — use offset-based direct I/O */ + uint64_t write_offset = + meta_request->recv_file_base_position + delivery_range_start; + struct aws_byte_cursor write_cursor = + aws_byte_cursor_from_array(response_body.ptr, response_body.len); + if (aws_file_path_write_to_offset_direct_io( + meta_request->recv_filepath, write_offset, write_cursor)) { + if (aws_last_error() == AWS_ERROR_UNSUPPORTED_OPERATION) { + /* O_DIRECT not supported, fall back to FILE* for remainder */ + AWS_LOGF_WARN( + AWS_LS_S3_META_REQUEST, + "id=%p: O_DIRECT write not supported, falling back to buffered I/O", + (void *)meta_request); + meta_request->recv_file_direct_io = false; + aws_reset_error(); + /* Open FILE* and seek to current position */ + meta_request->recv_file = aws_fopen( + aws_string_c_str(meta_request->recv_filepath), "r+"); + if (meta_request->recv_file && + aws_fseek( + meta_request->recv_file, + (int64_t)(meta_request->recv_file_base_position + delivery_range_start), + SEEK_SET) == AWS_OP_SUCCESS) { + /* Retry this write with fwrite */ + if (fwrite( + (void *)response_body.ptr, + response_body.len, + 1, + meta_request->recv_file) < 1) { + int errno_value = + ferror(meta_request->recv_file) ? errno : 0; + aws_translate_and_raise_io_error_or( + errno_value, AWS_ERROR_FILE_WRITE_FAILURE); + error_code = aws_last_error(); + } + } else { + error_code = aws_last_error(); + } + if (error_code != AWS_ERROR_SUCCESS) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Failed O_DIRECT fallback to buffered write. aws-error:%s", + (void *)meta_request, + aws_error_name(error_code)); + } + } else { + error_code = aws_last_error(); + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Failed writing to file with O_DIRECT. aws-error:%s", + (void *)meta_request, + aws_error_name(error_code)); + } + } + if (meta_request->client->enable_read_backpressure) { + aws_s3_meta_request_increment_read_window(meta_request, response_body.len); + } + } else if (meta_request->recv_file) { /* Write the data directly to the file. No need to seek, since the event will always be * delivered with the right order. */ if (fwrite((void *)response_body.ptr, response_body.len, 1, meta_request->recv_file) < 1) { @@ -2417,6 +2519,10 @@ void aws_s3_meta_request_finish_default(struct aws_s3_meta_request *meta_request if (finish_result.error_code && meta_request->recv_file_delete_on_failure) { aws_file_delete(meta_request->recv_filepath); } + } else if (meta_request->recv_file_direct_io && finish_result.error_code && + meta_request->recv_file_delete_on_failure) { + /* O_DIRECT path has no FILE* to close, but still honor delete-on-failure */ + aws_file_delete(meta_request->recv_filepath); } while (!aws_linked_list_empty(&release_request_list)) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 842279c62..e94e1dd0f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -78,6 +78,8 @@ add_net_test_case(test_s3_get_object_file_path) add_net_test_case(test_s3_get_object_file_path_create_new) add_net_test_case(test_s3_get_object_file_path_append) add_net_test_case(test_s3_get_object_file_path_to_position) +add_net_test_case(test_s3_get_object_file_path_direct_io) +add_net_test_case(test_s3_get_object_file_path_direct_io_to_position) add_net_test_case(test_s3_get_object_empty_object) add_net_test_case(test_s3_get_object_multiple) add_net_test_case(test_s3_get_object_multiple_serial) diff --git a/tests/s3_data_plane_tests.c b/tests/s3_data_plane_tests.c index 4d3e2ce71..401b8ea9c 100644 --- a/tests/s3_data_plane_tests.c +++ b/tests/s3_data_plane_tests.c @@ -1647,6 +1647,97 @@ static int s_test_s3_get_object_file_path_to_position(struct aws_allocator *allo return 0; } +AWS_TEST_CASE(test_s3_get_object_file_path_direct_io, s_test_s3_get_object_file_path_direct_io) +static int s_test_s3_get_object_file_path_direct_io(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + + struct aws_s3_tester tester; + AWS_ZERO_STRUCT(tester); + ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); + + struct aws_s3_tester_client_options client_options = { + .part_size = MB_TO_BYTES(5), + }; + + struct aws_s3_client *client = NULL; + ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); + + struct aws_s3_file_io_options fio_opts = { + .direct_io = true, + }; + + struct aws_byte_cursor object_path = aws_byte_cursor_from_c_str("/pre-existing-1MB"); + struct aws_s3_tester_meta_request_options get_options = { + .allocator = allocator, + .meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, + .validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS, + .client = client, + .fio_opts = &fio_opts, + .get_options = + { + .object_path = object_path, + .file_on_disk = true, + }, + }; + + ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, NULL)); + + client = aws_s3_client_release(client); + aws_s3_tester_clean_up(&tester); + return 0; +} + +AWS_TEST_CASE( + test_s3_get_object_file_path_direct_io_to_position, + s_test_s3_get_object_file_path_direct_io_to_position) +static int s_test_s3_get_object_file_path_direct_io_to_position(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + + struct aws_s3_tester tester; + AWS_ZERO_STRUCT(tester); + ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); + + struct aws_s3_tester_client_options client_options = { + .part_size = MB_TO_BYTES(5), + }; + + struct aws_s3_meta_request_test_results meta_request_test_results; + aws_s3_meta_request_test_results_init(&meta_request_test_results, allocator); + struct aws_s3_client *client = NULL; + ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); + + struct aws_s3_file_io_options fio_opts = { + .direct_io = true, + }; + + struct aws_byte_cursor object_path = aws_byte_cursor_from_c_str("/pre-existing-1MB"); + uint64_t pre_exist_file_length = 10; + struct aws_s3_tester_meta_request_options get_options = { + .allocator = allocator, + .meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, + .validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS, + .client = client, + .fio_opts = &fio_opts, + .get_options = + { + .object_path = object_path, + .file_on_disk = true, + .recv_file_option = AWS_S3_RECV_FILE_WRITE_TO_POSITION, + .recv_file_position = 20, + .pre_exist_file_length = pre_exist_file_length, + }, + }; + + ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, &meta_request_test_results)); + ASSERT_UINT_EQUALS( + get_options.get_options.recv_file_position + MB_TO_BYTES(1), meta_request_test_results.received_file_size); + + aws_s3_meta_request_test_results_clean_up(&meta_request_test_results); + client = aws_s3_client_release(client); + aws_s3_tester_clean_up(&tester); + return 0; +} + AWS_TEST_CASE(test_s3_get_object_empty_object, s_test_s3_get_object_empty_default) static int s_test_s3_get_object_empty_default(struct aws_allocator *allocator, void *ctx) { (void)ctx; From b039c65556dc51fcb2a267df6649f53b7f826813 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Tue, 12 May 2026 23:58:25 +0000 Subject: [PATCH 2/6] continues --- include/aws/s3/private/s3_meta_request_impl.h | 2 - include/aws/s3/s3_client.h | 4 +- source/s3_meta_request.c | 56 +++--- tests/CMakeLists.txt | 4 +- tests/s3_data_plane_tests.c | 174 ++++++++++++++++-- 5 files changed, 189 insertions(+), 51 deletions(-) diff --git a/include/aws/s3/private/s3_meta_request_impl.h b/include/aws/s3/private/s3_meta_request_impl.h index 687d99af3..f12070e51 100644 --- a/include/aws/s3/private/s3_meta_request_impl.h +++ b/include/aws/s3/private/s3_meta_request_impl.h @@ -319,8 +319,6 @@ struct aws_s3_meta_request { bool recv_file_delete_on_failure; /* When true, use O_DIRECT for writing received data to file */ bool recv_file_direct_io; - /* Base file position for O_DIRECT writes (from recv_file_position option) */ - uint64_t recv_file_base_position; /* File I/O options. */ struct aws_s3_file_io_options fio_opts; diff --git a/include/aws/s3/s3_client.h b/include/aws/s3/s3_client.h index f6d8e8445..bd3cfaa98 100644 --- a/include/aws/s3/s3_client.h +++ b/include/aws/s3/s3_client.h @@ -350,7 +350,9 @@ struct aws_s3_file_io_options { * Notes: * - Only supported on linux for now. * - Supported for both upload (send_filepath) and download (recv_filepath). - * - For download, O_DIRECT is not supported with AWS_S3_RECV_FILE_CREATE_OR_APPEND. + * - For download, O_DIRECT is only supported with AWS_S3_RECV_FILE_CREATE_OR_REPLACE + * and AWS_S3_RECV_FILE_CREATE_NEW (i.e. writing from the beginning of the file). + * APPEND and WRITE_TO_POSITION are not supported with O_DIRECT. * - Check NOTES for O_DIRECT for additional info https://man7.org/linux/man-pages/man2/openat.2.html * In summary, O_DIRECT is a potentially powerful tool that should be used with caution. */ diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index 84acb212f..8ea0f63ea 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -290,10 +290,10 @@ int aws_s3_meta_request_init_base( meta_request->recv_file_delete_on_failure = options->recv_file_delete_on_failure; /* When direct_io is enabled, use O_DIRECT fd-based writes instead of FILE* fwrite. - * Supported for CREATE_OR_REPLACE, CREATE_NEW, and WRITE_TO_POSITION. - * APPEND is incompatible with O_DIRECT (no offset-based writes). */ + * Only supported for CREATE_OR_REPLACE and CREATE_NEW (writing from the beginning). */ if (meta_request->fio_opts.direct_io && - options->recv_file_option != AWS_S3_RECV_FILE_CREATE_OR_APPEND) { + (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_OR_REPLACE || + options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW)) { /* Validate preconditions same as the FILE* path */ if (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW && @@ -305,26 +305,23 @@ int aws_s3_meta_request_init_base( aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS); goto error; } - if (options->recv_file_option == AWS_S3_RECV_FILE_WRITE_TO_POSITION && - !aws_path_exists(meta_request->recv_filepath)) { - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Cannot receive file via WRITE_TO_POSITION: file not found.", - (void *)meta_request); - aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND); - goto error; - } meta_request->recv_file_direct_io = true; - meta_request->recv_file_base_position = - (options->recv_file_option == AWS_S3_RECV_FILE_WRITE_TO_POSITION) ? options->recv_file_position : 0; AWS_LOGF_DEBUG( AWS_LS_S3_META_REQUEST, - "id=%p: O_DIRECT enabled for download write path. base_position:%" PRIu64, - (void *)meta_request, - meta_request->recv_file_base_position); + "id=%p: O_DIRECT enabled for download write path.", + (void *)meta_request); } else { + /* Fail if user requested O_DIRECT with APPEND or WRITE_TO_POSITION */ + if (meta_request->fio_opts.direct_io) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p O_DIRECT for download is only supported with CREATE_OR_REPLACE and CREATE_NEW", + (void *)meta_request); + aws_raise_error(AWS_ERROR_UNSUPPORTED_OPERATION); + goto error; + } /* Standard FILE* path */ switch (options->recv_file_option) { case AWS_S3_RECV_FILE_CREATE_OR_REPLACE: @@ -2185,29 +2182,26 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a if (meta_request->recv_file_direct_io) { /* O_DIRECT write path — use offset-based direct I/O */ - uint64_t write_offset = - meta_request->recv_file_base_position + delivery_range_start; + uint64_t write_offset = delivery_range_start; struct aws_byte_cursor write_cursor = aws_byte_cursor_from_array(response_body.ptr, response_body.len); if (aws_file_path_write_to_offset_direct_io( meta_request->recv_filepath, write_offset, write_cursor)) { if (aws_last_error() == AWS_ERROR_UNSUPPORTED_OPERATION) { - /* O_DIRECT not supported, fall back to FILE* for remainder */ + /* Platform doesn't support O_DIRECT, fall back to buffered I/O */ AWS_LOGF_WARN( AWS_LS_S3_META_REQUEST, - "id=%p: O_DIRECT write not supported, falling back to buffered I/O", + "id=%p: O_DIRECT write not supported on this platform, " + "falling back to buffered I/O", (void *)meta_request); meta_request->recv_file_direct_io = false; aws_reset_error(); - /* Open FILE* and seek to current position */ + /* Open FILE* and write this chunk */ meta_request->recv_file = aws_fopen( aws_string_c_str(meta_request->recv_filepath), "r+"); if (meta_request->recv_file && - aws_fseek( - meta_request->recv_file, - (int64_t)(meta_request->recv_file_base_position + delivery_range_start), - SEEK_SET) == AWS_OP_SUCCESS) { - /* Retry this write with fwrite */ + aws_fseek(meta_request->recv_file, (int64_t)write_offset, SEEK_SET) == + AWS_OP_SUCCESS) { if (fwrite( (void *)response_body.ptr, response_body.len, @@ -2222,14 +2216,8 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a } else { error_code = aws_last_error(); } - if (error_code != AWS_ERROR_SUCCESS) { - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Failed O_DIRECT fallback to buffered write. aws-error:%s", - (void *)meta_request, - aws_error_name(error_code)); - } } else { + /* Real I/O error — hard fail */ error_code = aws_last_error(); AWS_LOGF_ERROR( AWS_LS_S3_META_REQUEST, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e94e1dd0f..3c4cb2802 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -79,7 +79,9 @@ add_net_test_case(test_s3_get_object_file_path_create_new) add_net_test_case(test_s3_get_object_file_path_append) add_net_test_case(test_s3_get_object_file_path_to_position) add_net_test_case(test_s3_get_object_file_path_direct_io) -add_net_test_case(test_s3_get_object_file_path_direct_io_to_position) +add_net_test_case(test_s3_get_object_file_path_direct_io_content_verify) +add_net_test_case(test_s3_get_object_file_path_direct_io_unsupported_append) +add_net_test_case(test_s3_get_object_file_path_direct_io_unsupported_write_to_position) add_net_test_case(test_s3_get_object_empty_object) add_net_test_case(test_s3_get_object_multiple) add_net_test_case(test_s3_get_object_multiple_serial) diff --git a/tests/s3_data_plane_tests.c b/tests/s3_data_plane_tests.c index 401b8ea9c..ed9788cfb 100644 --- a/tests/s3_data_plane_tests.c +++ b/tests/s3_data_plane_tests.c @@ -5,6 +5,7 @@ #include "aws/s3/private/s3_checksums.h" #include "aws/s3/private/s3_client_impl.h" +#include #include "aws/s3/private/s3_default_buffer_pool.h" #include "aws/s3/private/s3_meta_request_impl.h" #include "aws/s3/private/s3_util.h" @@ -28,6 +29,12 @@ #include #include +/* Forward declaration for checksum validation callback used in O_DIRECT tests */ +void s_s3_test_validate_checksum( + struct aws_s3_meta_request *meta_request, + const struct aws_s3_meta_request_result *result, + void *user_data); + AWS_TEST_CASE(test_s3_client_create_destroy, s_test_s3_client_create_destroy) static int s_test_s3_client_create_destroy(struct aws_allocator *allocator, void *ctx) { (void)ctx; @@ -1688,21 +1695,122 @@ static int s_test_s3_get_object_file_path_direct_io(struct aws_allocator *alloca } AWS_TEST_CASE( - test_s3_get_object_file_path_direct_io_to_position, - s_test_s3_get_object_file_path_direct_io_to_position) -static int s_test_s3_get_object_file_path_direct_io_to_position(struct aws_allocator *allocator, void *ctx) { + test_s3_get_object_file_path_direct_io_content_verify, + s_test_s3_get_object_file_path_direct_io_content_verify) +static int s_test_s3_get_object_file_path_direct_io_content_verify(struct aws_allocator *allocator, void *ctx) { (void)ctx; struct aws_s3_tester tester; - AWS_ZERO_STRUCT(tester); ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); struct aws_s3_tester_client_options client_options = { .part_size = MB_TO_BYTES(5), }; + struct aws_s3_client *client = NULL; + ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); + + /* Upload with CRC32 checksum */ + struct aws_byte_buf path_buf; + AWS_ZERO_STRUCT(path_buf); + ASSERT_SUCCESS(aws_s3_tester_upload_file_path_init( + allocator, &path_buf, aws_byte_cursor_from_c_str("/prefix/round_trip/direct_io_verify.txt"))); + struct aws_byte_cursor object_path = aws_byte_cursor_from_buf(&path_buf); + + struct aws_s3_tester_meta_request_options put_options = { + .allocator = allocator, + .meta_request_type = AWS_S3_META_REQUEST_TYPE_PUT_OBJECT, + .client = client, + .checksum_algorithm = AWS_SCA_CRC32, + .put_options = + { + .object_size_mb = 1, + .object_path_override = object_path, + }, + }; + ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &put_options, NULL)); + + /* Download with O_DIRECT to a known file path */ + const char *local_file_path = "/tmp/aws_s3_direct_io_crc32_verify"; + struct aws_string *host_name = + aws_s3_tester_build_endpoint_string(allocator, &g_test_bucket_name, &g_test_s3_region); + struct aws_byte_cursor host_cursor = aws_byte_cursor_from_string(host_name); + struct aws_http_message *message = aws_s3_test_get_object_request_new(allocator, host_cursor, object_path); + + struct aws_s3_checksum_config checksum_config = { + .validate_response_checksum = true, + }; + struct aws_s3_file_io_options fio_opts = { + .direct_io = true, + }; + struct aws_s3_meta_request_options meta_request_options = { + .type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, + .message = message, + .recv_filepath = aws_byte_cursor_from_c_str(local_file_path), + .checksum_config = &checksum_config, + .fio_opts = &fio_opts, + }; + struct aws_s3_meta_request_test_results meta_request_test_results; aws_s3_meta_request_test_results_init(&meta_request_test_results, allocator); + ASSERT_SUCCESS(aws_s3_tester_bind_meta_request(&tester, &meta_request_options, &meta_request_test_results)); + + struct aws_s3_meta_request *meta_request = aws_s3_client_make_meta_request(client, &meta_request_options); + ASSERT_NOT_NULL(meta_request); + + aws_s3_tester_wait_for_meta_request_finish(&tester); + ASSERT_INT_EQUALS(AWS_ERROR_SUCCESS, meta_request_test_results.finished_error_code); + /* S3 response checksum was validated */ + ASSERT_TRUE(meta_request_test_results.did_validate); + ASSERT_INT_EQUALS(AWS_SCA_CRC32, meta_request_test_results.validation_algorithm); + + aws_s3_meta_request_release(meta_request); + aws_s3_tester_wait_for_meta_request_shutdown(&tester); + + /* Read file back from disk and compute CRC32. + * The tester uploads content using AWS_AUTOGEN_LOREM_IPSUM pattern. */ + size_t expected_size = MB_TO_BYTES(1); + struct aws_byte_buf expected_buf; + s_byte_buf_init_autogenned(&expected_buf, allocator, expected_size, AWS_AUTOGEN_LOREM_IPSUM); + uint32_t expected_crc = aws_checksums_crc32(expected_buf.buffer, (int)expected_buf.len, 0); + + FILE *verify_file = aws_fopen(local_file_path, "rb"); + ASSERT_NOT_NULL(verify_file); + struct aws_byte_buf file_buf; + aws_byte_buf_init(&file_buf, allocator, expected_size); + file_buf.len = fread(file_buf.buffer, 1, expected_size, verify_file); + fclose(verify_file); + + ASSERT_UINT_EQUALS(expected_size, file_buf.len); + uint32_t actual_crc = aws_checksums_crc32(file_buf.buffer, (int)file_buf.len, 0); + ASSERT_UINT_EQUALS(expected_crc, actual_crc); + + /* Cleanup */ + remove(local_file_path); + aws_byte_buf_clean_up(&file_buf); + aws_byte_buf_clean_up(&expected_buf); + aws_s3_meta_request_test_results_clean_up(&meta_request_test_results); + aws_http_message_release(message); + aws_string_destroy(host_name); + aws_byte_buf_clean_up(&path_buf); + aws_s3_client_release(client); + aws_s3_tester_clean_up(&tester); + return 0; +} + +AWS_TEST_CASE( + test_s3_get_object_file_path_direct_io_unsupported_append, + s_test_s3_get_object_file_path_direct_io_unsupported_append) +static int s_test_s3_get_object_file_path_direct_io_unsupported_append(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + + struct aws_s3_tester tester; + ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); + + struct aws_s3_tester_client_options client_options = { + .part_size = MB_TO_BYTES(5), + }; + struct aws_s3_client *client = NULL; ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); @@ -1711,11 +1819,54 @@ static int s_test_s3_get_object_file_path_direct_io_to_position(struct aws_alloc }; struct aws_byte_cursor object_path = aws_byte_cursor_from_c_str("/pre-existing-1MB"); - uint64_t pre_exist_file_length = 10; struct aws_s3_tester_meta_request_options get_options = { .allocator = allocator, .meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, - .validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS, + .validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_FAILURE, + .client = client, + .fio_opts = &fio_opts, + .get_options = + { + .object_path = object_path, + .file_on_disk = true, + .recv_file_option = AWS_S3_RECV_FILE_CREATE_OR_APPEND, + }, + }; + + ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, NULL)); + + aws_s3_client_release(client); + aws_s3_tester_clean_up(&tester); + return 0; +} + +AWS_TEST_CASE( + test_s3_get_object_file_path_direct_io_unsupported_write_to_position, + s_test_s3_get_object_file_path_direct_io_unsupported_write_to_position) +static int s_test_s3_get_object_file_path_direct_io_unsupported_write_to_position( + struct aws_allocator *allocator, + void *ctx) { + (void)ctx; + + struct aws_s3_tester tester; + ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); + + struct aws_s3_tester_client_options client_options = { + .part_size = MB_TO_BYTES(5), + }; + + struct aws_s3_client *client = NULL; + ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); + + struct aws_s3_file_io_options fio_opts = { + .direct_io = true, + }; + + struct aws_byte_cursor object_path = aws_byte_cursor_from_c_str("/pre-existing-1MB"); + struct aws_s3_tester_meta_request_options get_options = { + .allocator = allocator, + .meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, + .validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_FAILURE, .client = client, .fio_opts = &fio_opts, .get_options = @@ -1723,17 +1874,14 @@ static int s_test_s3_get_object_file_path_direct_io_to_position(struct aws_alloc .object_path = object_path, .file_on_disk = true, .recv_file_option = AWS_S3_RECV_FILE_WRITE_TO_POSITION, - .recv_file_position = 20, - .pre_exist_file_length = pre_exist_file_length, + .recv_file_position = 4096, + .pre_exist_file_length = 10, }, }; - ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, &meta_request_test_results)); - ASSERT_UINT_EQUALS( - get_options.get_options.recv_file_position + MB_TO_BYTES(1), meta_request_test_results.received_file_size); + ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, NULL)); - aws_s3_meta_request_test_results_clean_up(&meta_request_test_results); - client = aws_s3_client_release(client); + aws_s3_client_release(client); aws_s3_tester_clean_up(&tester); return 0; } From a62377e42b6bef27655a6b1ec1059cdc9dce985c Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Tue, 12 May 2026 17:02:42 -0700 Subject: [PATCH 3/6] format --- source/s3_meta_request.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index 8ea0f63ea..1dc739bd8 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -291,9 +291,8 @@ int aws_s3_meta_request_init_base( /* When direct_io is enabled, use O_DIRECT fd-based writes instead of FILE* fwrite. * Only supported for CREATE_OR_REPLACE and CREATE_NEW (writing from the beginning). */ - if (meta_request->fio_opts.direct_io && - (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_OR_REPLACE || - options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW)) { + if (meta_request->fio_opts.direct_io && (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_OR_REPLACE || + options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW)) { /* Validate preconditions same as the FILE* path */ if (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW && @@ -309,9 +308,7 @@ int aws_s3_meta_request_init_base( meta_request->recv_file_direct_io = true; AWS_LOGF_DEBUG( - AWS_LS_S3_META_REQUEST, - "id=%p: O_DIRECT enabled for download write path.", - (void *)meta_request); + AWS_LS_S3_META_REQUEST, "id=%p: O_DIRECT enabled for download write path.", (void *)meta_request); } else { /* Fail if user requested O_DIRECT with APPEND or WRITE_TO_POSITION */ if (meta_request->fio_opts.direct_io) { @@ -352,8 +349,7 @@ int aws_s3_meta_request_init_base( aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND); break; } else { - meta_request->recv_file = - aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); + meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); if (meta_request->recv_file && aws_fseek(meta_request->recv_file, options->recv_file_position, SEEK_SET) != AWS_OP_SUCCESS) { @@ -2197,8 +2193,8 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a meta_request->recv_file_direct_io = false; aws_reset_error(); /* Open FILE* and write this chunk */ - meta_request->recv_file = aws_fopen( - aws_string_c_str(meta_request->recv_filepath), "r+"); + meta_request->recv_file = + aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); if (meta_request->recv_file && aws_fseek(meta_request->recv_file, (int64_t)write_offset, SEEK_SET) == AWS_OP_SUCCESS) { @@ -2207,8 +2203,7 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a response_body.len, 1, meta_request->recv_file) < 1) { - int errno_value = - ferror(meta_request->recv_file) ? errno : 0; + int errno_value = ferror(meta_request->recv_file) ? errno : 0; aws_translate_and_raise_io_error_or( errno_value, AWS_ERROR_FILE_WRITE_FAILURE); error_code = aws_last_error(); @@ -2507,8 +2502,8 @@ void aws_s3_meta_request_finish_default(struct aws_s3_meta_request *meta_request if (finish_result.error_code && meta_request->recv_file_delete_on_failure) { aws_file_delete(meta_request->recv_filepath); } - } else if (meta_request->recv_file_direct_io && finish_result.error_code && - meta_request->recv_file_delete_on_failure) { + } else if ( + meta_request->recv_file_direct_io && finish_result.error_code && meta_request->recv_file_delete_on_failure) { /* O_DIRECT path has no FILE* to close, but still honor delete-on-failure */ aws_file_delete(meta_request->recv_filepath); } From db8c93a28f9bcaddf717f3cd424fb39bb8aa9608 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 13 May 2026 15:58:59 -0700 Subject: [PATCH 4/6] fixing --- source/s3_meta_request.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index 1dc739bd8..18c8bd31d 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -2192,12 +2192,11 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a (void *)meta_request); meta_request->recv_file_direct_io = false; aws_reset_error(); - /* Open FILE* and write this chunk */ + /* Open FILE* for writing — file may not exist yet since + * O_DIRECT path doesn't pre-create it */ meta_request->recv_file = - aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); - if (meta_request->recv_file && - aws_fseek(meta_request->recv_file, (int64_t)write_offset, SEEK_SET) == - AWS_OP_SUCCESS) { + aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); + if (meta_request->recv_file) { if (fwrite( (void *)response_body.ptr, response_body.len, From 4c98d5680a0158852fd6438e4a516cc16e5f6414 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Wed, 13 May 2026 16:47:19 -0700 Subject: [PATCH 5/6] fix build --- tests/s3_data_plane_tests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/s3_data_plane_tests.c b/tests/s3_data_plane_tests.c index ed9788cfb..bb5b3af57 100644 --- a/tests/s3_data_plane_tests.c +++ b/tests/s3_data_plane_tests.c @@ -5,12 +5,12 @@ #include "aws/s3/private/s3_checksums.h" #include "aws/s3/private/s3_client_impl.h" -#include #include "aws/s3/private/s3_default_buffer_pool.h" #include "aws/s3/private/s3_meta_request_impl.h" #include "aws/s3/private/s3_util.h" #include "aws/s3/s3_client.h" #include "s3_tester.h" +#include #include #include #include @@ -1731,7 +1731,7 @@ static int s_test_s3_get_object_file_path_direct_io_content_verify(struct aws_al ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &put_options, NULL)); /* Download with O_DIRECT to a known file path */ - const char *local_file_path = "/tmp/aws_s3_direct_io_crc32_verify"; + const char *local_file_path = "aws_s3_direct_io_crc32_verify_test_file"; struct aws_string *host_name = aws_s3_tester_build_endpoint_string(allocator, &g_test_bucket_name, &g_test_s3_region); struct aws_byte_cursor host_cursor = aws_byte_cursor_from_string(host_name); From b67ff795ae379b95958428af7f9a3bf98cb01f7a Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 18 May 2026 23:43:50 +0000 Subject: [PATCH 6/6] some cleanup --- include/aws/s3/private/s3_meta_request_impl.h | 5 + source/s3_auto_ranged_get.c | 11 + source/s3_meta_request.c | 264 ++++++++++-------- tests/CMakeLists.txt | 3 + tests/s3_data_plane_tests.c | 223 +++++++++++++++ tests/s3_tester.c | 2 + tests/s3_tester.h | 4 + 7 files changed, 399 insertions(+), 113 deletions(-) diff --git a/include/aws/s3/private/s3_meta_request_impl.h b/include/aws/s3/private/s3_meta_request_impl.h index f12070e51..4a065f3ed 100644 --- a/include/aws/s3/private/s3_meta_request_impl.h +++ b/include/aws/s3/private/s3_meta_request_impl.h @@ -319,6 +319,11 @@ struct aws_s3_meta_request { bool recv_file_delete_on_failure; /* When true, use O_DIRECT for writing received data to file */ bool recv_file_direct_io; + /* Counter for how many times we fell back from O_DIRECT to buffered I/O for a single part. + * For unaligned last part: expected to be 1. + * For unsupported platform: 1 on first fallback, then direct_io is disabled (no further increments). + * The warning is only logged when this transitions from 0, to avoid log spam. */ + size_t recv_file_direct_io_fallback_count; /* File I/O options. */ struct aws_s3_file_io_options fio_opts; diff --git a/source/s3_auto_ranged_get.c b/source/s3_auto_ranged_get.c index 6eeed8774..5fc188ad6 100644 --- a/source/s3_auto_ranged_get.c +++ b/source/s3_auto_ranged_get.c @@ -9,6 +9,7 @@ #include "aws/s3/private/s3_request_messages.h" #include "aws/s3/private/s3_util.h" #include +#include #include /* Dont use buffer pool when we know response size, and its below this number, @@ -837,6 +838,16 @@ static void s_s3_auto_ranged_get_request_finished( /* Apply a buffer pool alignment to the calculated result. */ out_request_optimal_range_size = aws_s3_buffer_pool_derive_aligned_buffer_size( meta_request->client->buffer_pool, out_request_optimal_range_size); + /* For O_DIRECT download, also ensure page alignment. + * Buffer pool typically aligns to chunk_size which is page-aligned, but apply + * a defensive round-up here to guarantee the invariant. */ + if (meta_request->recv_file_direct_io) { + size_t page_size = aws_system_info_page_size(); + if (out_request_optimal_range_size % page_size != 0) { + out_request_optimal_range_size = + ((out_request_optimal_range_size / page_size) + 1) * page_size; + } + } AWS_LOGF_INFO( AWS_LS_S3_META_REQUEST, "id=%p: Override the part size to be optimal. part_size=%" PRIu64 ".", diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index 18c8bd31d..70a2dc799 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -289,83 +289,95 @@ int aws_s3_meta_request_init_base( meta_request->recv_filepath = aws_string_new_from_cursor(allocator, &options->recv_filepath); meta_request->recv_file_delete_on_failure = options->recv_file_delete_on_failure; - /* When direct_io is enabled, use O_DIRECT fd-based writes instead of FILE* fwrite. - * Only supported for CREATE_OR_REPLACE and CREATE_NEW (writing from the beginning). */ - if (meta_request->fio_opts.direct_io && (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_OR_REPLACE || - options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW)) { - - /* Validate preconditions same as the FILE* path */ - if (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW && - aws_path_exists(meta_request->recv_filepath)) { + bool direct_io = meta_request->fio_opts.direct_io; + + /* For O_DIRECT download, part_size must be page-aligned so that all parts except the last + * can be written via O_DIRECT. The last part's unaligned tail falls back to buffered write. */ + if (direct_io) { + size_t page_size = aws_system_info_page_size(); + if (part_size % page_size != 0) { AWS_LOGF_ERROR( AWS_LS_S3_META_REQUEST, - "id=%p Cannot receive file via CREATE_NEW: file already exists", - (void *)meta_request); - aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS); + "id=%p: Invalid meta request configuration - direct_io download requires part size " + "to be aligned with page size. part size is:%zu, while page size is:%zu", + (void *)meta_request, + part_size, + page_size); + aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); goto error; } + } - meta_request->recv_file_direct_io = true; + switch (options->recv_file_option) { + case AWS_S3_RECV_FILE_CREATE_OR_REPLACE: + meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); + break; - AWS_LOGF_DEBUG( - AWS_LS_S3_META_REQUEST, "id=%p: O_DIRECT enabled for download write path.", (void *)meta_request); - } else { - /* Fail if user requested O_DIRECT with APPEND or WRITE_TO_POSITION */ - if (meta_request->fio_opts.direct_io) { - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p O_DIRECT for download is only supported with CREATE_OR_REPLACE and CREATE_NEW", - (void *)meta_request); - aws_raise_error(AWS_ERROR_UNSUPPORTED_OPERATION); - goto error; - } - /* Standard FILE* path */ - switch (options->recv_file_option) { - case AWS_S3_RECV_FILE_CREATE_OR_REPLACE: - meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); - break; + case AWS_S3_RECV_FILE_CREATE_NEW: + if (aws_path_exists(meta_request->recv_filepath)) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Cannot receive file via CREATE_NEW: file already exists", + (void *)meta_request); + aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS); + goto error; + } + meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); + break; - case AWS_S3_RECV_FILE_CREATE_NEW: - if (aws_path_exists(meta_request->recv_filepath)) { - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Cannot receive file via CREATE_NEW: file already exists", - (void *)meta_request); - aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS); - break; - } else { - meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); - break; - } - case AWS_S3_RECV_FILE_CREATE_OR_APPEND: - meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "ab"); - break; - case AWS_S3_RECV_FILE_WRITE_TO_POSITION: - if (!aws_path_exists(meta_request->recv_filepath)) { - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Cannot receive file via WRITE_TO_POSITION: file not found.", - (void *)meta_request); - aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND); - break; - } else { - meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); - if (meta_request->recv_file && - aws_fseek(meta_request->recv_file, options->recv_file_position, SEEK_SET) != - AWS_OP_SUCCESS) { - goto error; - } - break; - } + case AWS_S3_RECV_FILE_CREATE_OR_APPEND: + if (direct_io) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p O_DIRECT for download is only supported with CREATE_OR_REPLACE and CREATE_NEW", + (void *)meta_request); + aws_raise_error(AWS_ERROR_UNSUPPORTED_OPERATION); + goto error; + } + meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "ab"); + break; - default: - AWS_ASSERT(false); - aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); - break; - } - if (!meta_request->recv_file) { + case AWS_S3_RECV_FILE_WRITE_TO_POSITION: + if (direct_io) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p O_DIRECT for download is only supported with CREATE_OR_REPLACE and CREATE_NEW", + (void *)meta_request); + aws_raise_error(AWS_ERROR_UNSUPPORTED_OPERATION); + goto error; + } + if (!aws_path_exists(meta_request->recv_filepath)) { + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Cannot receive file via WRITE_TO_POSITION: file not found.", + (void *)meta_request); + aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND); + goto error; + } + meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+"); + if (meta_request->recv_file && + aws_fseek(meta_request->recv_file, options->recv_file_position, SEEK_SET) != AWS_OP_SUCCESS) { + goto error; + } + break; + + default: + AWS_ASSERT(false); + aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); goto error; - } + } + + if (!meta_request->recv_file) { + goto error; + } + + /* For O_DIRECT, the file is already created via aws_fopen above (file now exists on disk). + * Keep the FILE* open — it's used as the fallback when O_DIRECT can't be used + * (unaligned last part or platform doesn't support O_DIRECT). */ + if (direct_io) { + meta_request->recv_file_direct_io = true; + AWS_LOGF_DEBUG( + AWS_LS_S3_META_REQUEST, "id=%p: O_DIRECT enabled for download write path.", (void *)meta_request); } } @@ -2051,6 +2063,24 @@ static bool s_should_apply_backpressure(struct aws_s3_request *request) { return false; } +/* Helper: write the response body to recv_file with fwrite. Sets *out_error_code on failure. */ +static void s_buffered_write_to_recv_file( + struct aws_s3_meta_request *meta_request, + const struct aws_byte_cursor *response_body, + int *out_error_code) { + if (fwrite((void *)response_body->ptr, response_body->len, 1, meta_request->recv_file) < 1) { + int errno_value = ferror(meta_request->recv_file) ? errno : 0; /* Always cache errno */ + aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_WRITE_FAILURE); + *out_error_code = aws_last_error(); + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Failed writing to file. errno:%d. aws-error:%s", + (void *)meta_request, + errno_value, + aws_error_name(*out_error_code)); + } +} + /* Deliver events in event_delivery_array. * This task runs on the meta-request's io_event_loop thread. */ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *arg, enum aws_task_status task_status) { @@ -2181,62 +2211,70 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a uint64_t write_offset = delivery_range_start; struct aws_byte_cursor write_cursor = aws_byte_cursor_from_array(response_body.ptr, response_body.len); - if (aws_file_path_write_to_offset_direct_io( - meta_request->recv_filepath, write_offset, write_cursor)) { - if (aws_last_error() == AWS_ERROR_UNSUPPORTED_OPERATION) { - /* Platform doesn't support O_DIRECT, fall back to buffered I/O */ - AWS_LOGF_WARN( - AWS_LS_S3_META_REQUEST, - "id=%p: O_DIRECT write not supported on this platform, " - "falling back to buffered I/O", - (void *)meta_request); - meta_request->recv_file_direct_io = false; - aws_reset_error(); - /* Open FILE* for writing — file may not exist yet since - * O_DIRECT path doesn't pre-create it */ - meta_request->recv_file = - aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb"); - if (meta_request->recv_file) { - if (fwrite( - (void *)response_body.ptr, - response_body.len, - 1, - meta_request->recv_file) < 1) { - int errno_value = ferror(meta_request->recv_file) ? errno : 0; - aws_translate_and_raise_io_error_or( - errno_value, AWS_ERROR_FILE_WRITE_FAILURE); - error_code = aws_last_error(); + + /* Check if this chunk is page-aligned. Only the last part of a download + * can have unaligned length. Use buffered write for that case. */ + size_t page_size = aws_system_info_page_size(); + bool use_direct_io = (response_body.len % page_size == 0); + + if (!use_direct_io && meta_request->recv_file_direct_io_fallback_count == 0) { + AWS_LOGF_WARN( + AWS_LS_S3_META_REQUEST, + "id=%p: O_DIRECT requested but data length %zu is not page-aligned " + "(page size %zu). Falling back to buffered I/O for this part. " + "This is expected for the last part of a download.", + (void *)meta_request, + response_body.len, + page_size); + } + if (!use_direct_io) { + ++meta_request->recv_file_direct_io_fallback_count; + } + + if (use_direct_io) { + if (aws_file_path_write_to_offset_direct_io( + meta_request->recv_filepath, write_offset, write_cursor)) { + if (aws_last_error() == AWS_ERROR_UNSUPPORTED_OPERATION) { + /* Platform doesn't support O_DIRECT, fall back to buffered I/O */ + if (meta_request->recv_file_direct_io_fallback_count == 0) { + AWS_LOGF_WARN( + AWS_LS_S3_META_REQUEST, + "id=%p: O_DIRECT write not supported on this platform, " + "falling back to buffered I/O for the rest of this download", + (void *)meta_request); } + ++meta_request->recv_file_direct_io_fallback_count; + meta_request->recv_file_direct_io = false; + aws_reset_error(); + use_direct_io = false; } else { + /* Real I/O error — hard fail */ error_code = aws_last_error(); + AWS_LOGF_ERROR( + AWS_LS_S3_META_REQUEST, + "id=%p Failed writing to file with O_DIRECT. aws-error:%s", + (void *)meta_request, + aws_error_name(error_code)); } - } else { - /* Real I/O error — hard fail */ + } + } + + if (!use_direct_io && error_code == AWS_ERROR_SUCCESS) { + /* Buffered write fallback. recv_file is already open from init. + * Need to seek because direct_io path doesn't update FILE* position. */ + if (aws_fseek(meta_request->recv_file, (int64_t)write_offset, SEEK_SET) != + AWS_OP_SUCCESS) { error_code = aws_last_error(); - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Failed writing to file with O_DIRECT. aws-error:%s", - (void *)meta_request, - aws_error_name(error_code)); + } else { + s_buffered_write_to_recv_file(meta_request, &response_body, &error_code); } } if (meta_request->client->enable_read_backpressure) { aws_s3_meta_request_increment_read_window(meta_request, response_body.len); } } else if (meta_request->recv_file) { - /* Write the data directly to the file. No need to seek, since the event will always be - * delivered with the right order. */ - if (fwrite((void *)response_body.ptr, response_body.len, 1, meta_request->recv_file) < 1) { - int errno_value = ferror(meta_request->recv_file) ? errno : 0; /* Always cache errno */ - aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_WRITE_FAILURE); - error_code = aws_last_error(); - AWS_LOGF_ERROR( - AWS_LS_S3_META_REQUEST, - "id=%p Failed writing to file. errno:%d. aws-error:%s", - (void *)meta_request, - errno_value, - aws_error_name(error_code)); - } + /* Regular FILE* path. No need to seek — events arrive in order. */ + s_buffered_write_to_recv_file(meta_request, &response_body, &error_code); if (meta_request->client->enable_read_backpressure) { aws_s3_meta_request_increment_read_window(meta_request, response_body.len); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3c4cb2802..c10701698 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -82,6 +82,9 @@ add_net_test_case(test_s3_get_object_file_path_direct_io) add_net_test_case(test_s3_get_object_file_path_direct_io_content_verify) add_net_test_case(test_s3_get_object_file_path_direct_io_unsupported_append) add_net_test_case(test_s3_get_object_file_path_direct_io_unsupported_write_to_position) +add_net_test_case(test_s3_get_object_file_path_direct_io_multi_part) +add_net_test_case(test_s3_get_object_file_path_direct_io_unaligned_part_size) +add_net_test_case(test_s3_get_object_file_path_direct_io_unaligned_last_part) add_net_test_case(test_s3_get_object_empty_object) add_net_test_case(test_s3_get_object_multiple) add_net_test_case(test_s3_get_object_multiple_serial) diff --git a/tests/s3_data_plane_tests.c b/tests/s3_data_plane_tests.c index bb5b3af57..63fc6b164 100644 --- a/tests/s3_data_plane_tests.c +++ b/tests/s3_data_plane_tests.c @@ -1763,6 +1763,12 @@ static int s_test_s3_get_object_file_path_direct_io_content_verify(struct aws_al /* S3 response checksum was validated */ ASSERT_TRUE(meta_request_test_results.did_validate); ASSERT_INT_EQUALS(AWS_SCA_CRC32, meta_request_test_results.validation_algorithm); + /* 1 MiB single-part download: all aligned on Linux, fallback once on non-Linux */ +#if defined(__linux__) + ASSERT_UINT_EQUALS(0, meta_request->recv_file_direct_io_fallback_count); +#else + ASSERT_UINT_EQUALS(1, meta_request->recv_file_direct_io_fallback_count); +#endif aws_s3_meta_request_release(meta_request); aws_s3_tester_wait_for_meta_request_shutdown(&tester); @@ -1886,6 +1892,223 @@ static int s_test_s3_get_object_file_path_direct_io_unsupported_write_to_positio return 0; } +/* Multi-part download with O_DIRECT: download a 10MB object with 4MB part_size, + * exercises the per-part write loop. All parts are page-aligned in this case. */ +AWS_TEST_CASE( + test_s3_get_object_file_path_direct_io_multi_part, + s_test_s3_get_object_file_path_direct_io_multi_part) +static int s_test_s3_get_object_file_path_direct_io_multi_part(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + + struct aws_s3_tester tester; + ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); + + struct aws_s3_tester_client_options client_options = { + .part_size = MB_TO_BYTES(4), + }; + + struct aws_s3_client *client = NULL; + ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); + + struct aws_s3_file_io_options fio_opts = { + .direct_io = true, + }; + + struct aws_s3_tester_meta_request_options get_options = { + .allocator = allocator, + .meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, + .validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS, + .client = client, + .fio_opts = &fio_opts, + .validate_get_response_checksum = true, + .expected_validate_checksum_alg = AWS_SCA_CRC32, + .finish_callback = s_s3_test_validate_checksum, + .get_options = + { + .object_path = g_pre_existing_object_10MB, + .file_on_disk = true, + }, + }; + + struct aws_s3_meta_request_test_results meta_request_test_results; + aws_s3_meta_request_test_results_init(&meta_request_test_results, allocator); + ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, &meta_request_test_results)); + ASSERT_INT_EQUALS((int64_t)MB_TO_BYTES(10), meta_request_test_results.received_file_size); + /* On Linux, all parts (4+4+2 MiB) are page-aligned so no fallback should occur. + * On non-Linux, the first write triggers UNSUPPORTED_OPERATION fallback exactly once. */ +#if defined(__linux__) + ASSERT_UINT_EQUALS(0, meta_request_test_results.recv_file_direct_io_fallback_count); +#else + ASSERT_UINT_EQUALS(1, meta_request_test_results.recv_file_direct_io_fallback_count); +#endif + + aws_s3_meta_request_test_results_clean_up(&meta_request_test_results); + aws_s3_client_release(client); + aws_s3_tester_clean_up(&tester); + return 0; +} + +/* O_DIRECT requires part_size to be page-aligned. Verify init fails when it's not. */ +AWS_TEST_CASE( + test_s3_get_object_file_path_direct_io_unaligned_part_size, + s_test_s3_get_object_file_path_direct_io_unaligned_part_size) +static int s_test_s3_get_object_file_path_direct_io_unaligned_part_size(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + + struct aws_s3_tester tester; + ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); + + /* part_size = 5 MB - 1 byte, not page-aligned */ + struct aws_s3_tester_client_options client_options = { + .part_size = MB_TO_BYTES(5) - 1, + }; + + struct aws_s3_client *client = NULL; + ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); + + struct aws_s3_file_io_options fio_opts = { + .direct_io = true, + }; + + struct aws_s3_tester_meta_request_options get_options = { + .allocator = allocator, + .meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, + .validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_FAILURE, + .client = client, + .fio_opts = &fio_opts, + .get_options = + { + .object_path = g_pre_existing_object_1MB, + .file_on_disk = true, + }, + }; + + ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, NULL)); + + aws_s3_client_release(client); + aws_s3_tester_clean_up(&tester); + return 0; +} + +/* Round-trip with byte-precise object size (10MiB - 1 byte). Multi-part download with O_DIRECT. + * Verifies the unaligned last-part fallback path: all but the last part go through O_DIRECT, + * the last part has unaligned length and falls back to buffered fwrite. Read back from disk + * and compare CRC32 to confirm all bytes are correct. */ +AWS_TEST_CASE( + test_s3_get_object_file_path_direct_io_unaligned_last_part, + s_test_s3_get_object_file_path_direct_io_unaligned_last_part) +static int s_test_s3_get_object_file_path_direct_io_unaligned_last_part(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + + struct aws_s3_tester tester; + ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester)); + + /* 4 MB part_size: 10 MiB - 1 = 9 parts of 4 MiB + 1 last part of (10 MiB - 1 - 8 MiB) bytes. + * Wait: 10 MiB = 10485760, 4 MiB = 4194304. 10485760 - 1 = 10485759. + * 10485759 / 4194304 = 2 full parts (2 * 4194304 = 8388608), remainder = 2097151 bytes. + * Last part = 2097151 = 2 MiB - 1 byte (NOT page-aligned). */ + struct aws_s3_tester_client_options client_options = { + .part_size = MB_TO_BYTES(4), + }; + + struct aws_s3_client *client = NULL; + ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client)); + + /* Upload 10 MiB - 1 byte */ + size_t object_size = MB_TO_BYTES(10) - 1; + struct aws_byte_buf upload_buf; + aws_s3_create_test_buffer(allocator, object_size, &upload_buf); + struct aws_byte_cursor upload_cursor = aws_byte_cursor_from_buf(&upload_buf); + struct aws_input_stream *upload_stream = aws_input_stream_new_from_cursor(allocator, &upload_cursor); + + struct aws_byte_buf path_buf; + AWS_ZERO_STRUCT(path_buf); + ASSERT_SUCCESS(aws_s3_tester_upload_file_path_init( + allocator, &path_buf, aws_byte_cursor_from_c_str("/prefix/round_trip/direct_io_unaligned_last.bin"))); + struct aws_byte_cursor object_path = aws_byte_cursor_from_buf(&path_buf); + + struct aws_string *host_name = + aws_s3_tester_build_endpoint_string(allocator, &g_test_bucket_name, &g_test_s3_region); + struct aws_byte_cursor host_cursor = aws_byte_cursor_from_string(host_name); + + /* Use the lower-level API to upload the byte-precise body */ + struct aws_http_message *put_message = aws_s3_test_put_object_request_new( + allocator, &host_cursor, object_path, g_test_body_content_type, upload_stream, 0); + + struct aws_s3_meta_request_options put_options = { + .type = AWS_S3_META_REQUEST_TYPE_PUT_OBJECT, + .message = put_message, + }; + + struct aws_s3_meta_request_test_results put_results; + aws_s3_meta_request_test_results_init(&put_results, allocator); + ASSERT_SUCCESS(aws_s3_tester_bind_meta_request(&tester, &put_options, &put_results)); + + struct aws_s3_meta_request *put_request = aws_s3_client_make_meta_request(client, &put_options); + ASSERT_NOT_NULL(put_request); + aws_s3_tester_wait_for_meta_request_finish(&tester); + ASSERT_INT_EQUALS(AWS_ERROR_SUCCESS, put_results.finished_error_code); + aws_s3_meta_request_release(put_request); + aws_s3_tester_wait_for_meta_request_shutdown(&tester); + aws_http_message_release(put_message); + aws_s3_meta_request_test_results_clean_up(&put_results); + + /* Download with O_DIRECT to a known path */ + const char *local_file_path = "aws_s3_direct_io_unaligned_last_test_file"; + struct aws_http_message *get_message = aws_s3_test_get_object_request_new(allocator, host_cursor, object_path); + + struct aws_s3_file_io_options fio_opts = { + .direct_io = true, + }; + struct aws_s3_meta_request_options get_options = { + .type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT, + .message = get_message, + .recv_filepath = aws_byte_cursor_from_c_str(local_file_path), + .fio_opts = &fio_opts, + }; + + struct aws_s3_meta_request_test_results get_results; + aws_s3_meta_request_test_results_init(&get_results, allocator); + ASSERT_SUCCESS(aws_s3_tester_bind_meta_request(&tester, &get_options, &get_results)); + + struct aws_s3_meta_request *get_request = aws_s3_client_make_meta_request(client, &get_options); + ASSERT_NOT_NULL(get_request); + aws_s3_tester_wait_for_meta_request_finish(&tester); + ASSERT_INT_EQUALS(AWS_ERROR_SUCCESS, get_results.finished_error_code); + /* On Linux: 2 aligned parts go through O_DIRECT, 1 unaligned last part falls back. count == 1. + * On non-Linux: first write triggers UNSUPPORTED_OPERATION fallback, count == 1. */ + ASSERT_UINT_EQUALS(1, get_request->recv_file_direct_io_fallback_count); + aws_s3_meta_request_release(get_request); + aws_s3_tester_wait_for_meta_request_shutdown(&tester); + + /* Read back from disk and verify CRC32 matches the upload */ + uint32_t expected_crc = aws_checksums_crc32(upload_buf.buffer, (int)upload_buf.len, 0); + + FILE *verify_file = aws_fopen(local_file_path, "rb"); + ASSERT_NOT_NULL(verify_file); + struct aws_byte_buf file_buf; + aws_byte_buf_init(&file_buf, allocator, object_size); + file_buf.len = fread(file_buf.buffer, 1, object_size, verify_file); + fclose(verify_file); + + ASSERT_UINT_EQUALS(object_size, file_buf.len); + uint32_t actual_crc = aws_checksums_crc32(file_buf.buffer, (int)file_buf.len, 0); + ASSERT_UINT_EQUALS(expected_crc, actual_crc); + + /* Cleanup */ + remove(local_file_path); + aws_byte_buf_clean_up(&file_buf); + aws_s3_meta_request_test_results_clean_up(&get_results); + aws_http_message_release(get_message); + aws_string_destroy(host_name); + aws_input_stream_release(upload_stream); + aws_byte_buf_clean_up(&upload_buf); + aws_byte_buf_clean_up(&path_buf); + aws_s3_client_release(client); + aws_s3_tester_clean_up(&tester); + return 0; +} + AWS_TEST_CASE(test_s3_get_object_empty_object, s_test_s3_get_object_empty_default) static int s_test_s3_get_object_empty_default(struct aws_allocator *allocator, void *ctx) { (void)ctx; diff --git a/tests/s3_tester.c b/tests/s3_tester.c index 7fec8f7ec..c74760410 100644 --- a/tests/s3_tester.c +++ b/tests/s3_tester.c @@ -194,6 +194,8 @@ static void s_s3_test_meta_request_finish( meta_request_test_results->finished_error_code = result->error_code; meta_request_test_results->did_validate = result->did_validate; meta_request_test_results->validation_algorithm = result->validation_algorithm; + meta_request_test_results->recv_file_direct_io_fallback_count = + meta_request->recv_file_direct_io_fallback_count; if (meta_request_test_results->finish_callback != NULL) { meta_request_test_results->finish_callback(meta_request, result, user_data); diff --git a/tests/s3_tester.h b/tests/s3_tester.h index 8591ad654..016cc90ec 100644 --- a/tests/s3_tester.h +++ b/tests/s3_tester.h @@ -277,6 +277,10 @@ struct aws_s3_meta_request_test_results { bool did_validate; enum aws_s3_checksum_algorithm validation_algorithm; + /* Captured from meta_request->recv_file_direct_io_fallback_count via a finish callback. + * Tests can check this to verify the expected number of O_DIRECT fallbacks occurred. */ + size_t recv_file_direct_io_fallback_count; + /* Record data from progress_callback() */ struct { uint64_t content_length; /* Remember progress->content_length */