Skip to content

Commit b59f4eb

Browse files
committed
poc
1 parent e21f11b commit b59f4eb

5 files changed

Lines changed: 245 additions & 41 deletions

File tree

include/aws/s3/private/s3_meta_request_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,10 @@ struct aws_s3_meta_request {
317317
FILE *recv_file;
318318
struct aws_string *recv_filepath;
319319
bool recv_file_delete_on_failure;
320+
/* When true, use O_DIRECT for writing received data to file */
321+
bool recv_file_direct_io;
322+
/* Base file position for O_DIRECT writes (from recv_file_position option) */
323+
uint64_t recv_file_base_position;
320324

321325
/* File I/O options. */
322326
struct aws_s3_file_io_options fio_opts;

include/aws/s3/s3_client.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,8 @@ struct aws_s3_file_io_options {
349349
* Enable direct IO to bypass the OS cache. Helpful when the disk I/O outperforms the kernel cache.
350350
* Notes:
351351
* - Only supported on linux for now.
352-
* - Only supports upload for now.
352+
* - Supported for both upload (send_filepath) and download (recv_filepath).
353+
* - For download, O_DIRECT is not supported with AWS_S3_RECV_FILE_CREATE_OR_APPEND.
353354
* - Check NOTES for O_DIRECT for additional info https://man7.org/linux/man-pages/man2/openat.2.html
354355
* In summary, O_DIRECT is a potentially powerful tool that should be used with caution.
355356
*/

source/s3_meta_request.c

Lines changed: 146 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -287,51 +287,92 @@ int aws_s3_meta_request_init_base(
287287
if (options->recv_filepath.len > 0) {
288288

289289
meta_request->recv_filepath = aws_string_new_from_cursor(allocator, &options->recv_filepath);
290-
switch (options->recv_file_option) {
291-
case AWS_S3_RECV_FILE_CREATE_OR_REPLACE:
292-
meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb");
293-
break;
290+
meta_request->recv_file_delete_on_failure = options->recv_file_delete_on_failure;
294291

295-
case AWS_S3_RECV_FILE_CREATE_NEW:
296-
if (aws_path_exists(meta_request->recv_filepath)) {
297-
AWS_LOGF_ERROR(
298-
AWS_LS_S3_META_REQUEST,
299-
"id=%p Cannot receive file via CREATE_NEW: file already exists",
300-
(void *)meta_request);
301-
aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS);
302-
break;
303-
} else {
292+
/* When direct_io is enabled, use O_DIRECT fd-based writes instead of FILE* fwrite.
293+
* Supported for CREATE_OR_REPLACE, CREATE_NEW, and WRITE_TO_POSITION.
294+
* APPEND is incompatible with O_DIRECT (no offset-based writes). */
295+
if (meta_request->fio_opts.direct_io &&
296+
options->recv_file_option != AWS_S3_RECV_FILE_CREATE_OR_APPEND) {
297+
298+
/* Validate preconditions same as the FILE* path */
299+
if (options->recv_file_option == AWS_S3_RECV_FILE_CREATE_NEW &&
300+
aws_path_exists(meta_request->recv_filepath)) {
301+
AWS_LOGF_ERROR(
302+
AWS_LS_S3_META_REQUEST,
303+
"id=%p Cannot receive file via CREATE_NEW: file already exists",
304+
(void *)meta_request);
305+
aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS);
306+
goto error;
307+
}
308+
if (options->recv_file_option == AWS_S3_RECV_FILE_WRITE_TO_POSITION &&
309+
!aws_path_exists(meta_request->recv_filepath)) {
310+
AWS_LOGF_ERROR(
311+
AWS_LS_S3_META_REQUEST,
312+
"id=%p Cannot receive file via WRITE_TO_POSITION: file not found.",
313+
(void *)meta_request);
314+
aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND);
315+
goto error;
316+
}
317+
318+
meta_request->recv_file_direct_io = true;
319+
meta_request->recv_file_base_position =
320+
(options->recv_file_option == AWS_S3_RECV_FILE_WRITE_TO_POSITION) ? options->recv_file_position : 0;
321+
322+
AWS_LOGF_DEBUG(
323+
AWS_LS_S3_META_REQUEST,
324+
"id=%p: O_DIRECT enabled for download write path. base_position:%" PRIu64,
325+
(void *)meta_request,
326+
meta_request->recv_file_base_position);
327+
} else {
328+
/* Standard FILE* path */
329+
switch (options->recv_file_option) {
330+
case AWS_S3_RECV_FILE_CREATE_OR_REPLACE:
304331
meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb");
305332
break;
306-
}
307-
case AWS_S3_RECV_FILE_CREATE_OR_APPEND:
308-
meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "ab");
309-
break;
310-
case AWS_S3_RECV_FILE_WRITE_TO_POSITION:
311-
if (!aws_path_exists(meta_request->recv_filepath)) {
312-
AWS_LOGF_ERROR(
313-
AWS_LS_S3_META_REQUEST,
314-
"id=%p Cannot receive file via WRITE_TO_POSITION: file not found.",
315-
(void *)meta_request);
316-
aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND);
317-
break;
318-
} else {
319-
meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+");
320-
if (meta_request->recv_file &&
321-
aws_fseek(meta_request->recv_file, options->recv_file_position, SEEK_SET) != AWS_OP_SUCCESS) {
322-
/* error out. */
323-
goto error;
333+
334+
case AWS_S3_RECV_FILE_CREATE_NEW:
335+
if (aws_path_exists(meta_request->recv_filepath)) {
336+
AWS_LOGF_ERROR(
337+
AWS_LS_S3_META_REQUEST,
338+
"id=%p Cannot receive file via CREATE_NEW: file already exists",
339+
(void *)meta_request);
340+
aws_raise_error(AWS_ERROR_S3_RECV_FILE_ALREADY_EXISTS);
341+
break;
342+
} else {
343+
meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "wb");
344+
break;
324345
}
346+
case AWS_S3_RECV_FILE_CREATE_OR_APPEND:
347+
meta_request->recv_file = aws_fopen(aws_string_c_str(meta_request->recv_filepath), "ab");
325348
break;
326-
}
349+
case AWS_S3_RECV_FILE_WRITE_TO_POSITION:
350+
if (!aws_path_exists(meta_request->recv_filepath)) {
351+
AWS_LOGF_ERROR(
352+
AWS_LS_S3_META_REQUEST,
353+
"id=%p Cannot receive file via WRITE_TO_POSITION: file not found.",
354+
(void *)meta_request);
355+
aws_raise_error(AWS_ERROR_S3_RECV_FILE_NOT_FOUND);
356+
break;
357+
} else {
358+
meta_request->recv_file =
359+
aws_fopen(aws_string_c_str(meta_request->recv_filepath), "r+");
360+
if (meta_request->recv_file &&
361+
aws_fseek(meta_request->recv_file, options->recv_file_position, SEEK_SET) !=
362+
AWS_OP_SUCCESS) {
363+
goto error;
364+
}
365+
break;
366+
}
327367

328-
default:
329-
AWS_ASSERT(false);
330-
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
331-
break;
332-
}
333-
if (!meta_request->recv_file) {
334-
goto error;
368+
default:
369+
AWS_ASSERT(false);
370+
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
371+
break;
372+
}
373+
if (!meta_request->recv_file) {
374+
goto error;
375+
}
335376
}
336377
}
337378

@@ -569,6 +610,9 @@ static void s_s3_meta_request_destroy(void *user_data) {
569610
/* If the meta request succeed, the file should be closed from finish call. So it must be failing. */
570611
aws_file_delete(meta_request->recv_filepath);
571612
}
613+
} else if (meta_request->recv_file_direct_io && meta_request->recv_file_delete_on_failure) {
614+
/* O_DIRECT path: no FILE* to close, but still honor delete-on-failure during teardown */
615+
aws_file_delete(meta_request->recv_filepath);
572616
}
573617
aws_string_destroy(meta_request->recv_filepath);
574618

@@ -2139,7 +2183,65 @@ static void s_s3_meta_request_event_delivery_task(struct aws_task *task, void *a
21392183
aws_high_res_clock_get_ticks((uint64_t *)&metric->time_metrics.deliver_start_timestamp_ns);
21402184
}
21412185

2142-
if (meta_request->recv_file) {
2186+
if (meta_request->recv_file_direct_io) {
2187+
/* O_DIRECT write path — use offset-based direct I/O */
2188+
uint64_t write_offset =
2189+
meta_request->recv_file_base_position + delivery_range_start;
2190+
struct aws_byte_cursor write_cursor =
2191+
aws_byte_cursor_from_array(response_body.ptr, response_body.len);
2192+
if (aws_file_path_write_to_offset_direct_io(
2193+
meta_request->recv_filepath, write_offset, write_cursor)) {
2194+
if (aws_last_error() == AWS_ERROR_UNSUPPORTED_OPERATION) {
2195+
/* O_DIRECT not supported, fall back to FILE* for remainder */
2196+
AWS_LOGF_WARN(
2197+
AWS_LS_S3_META_REQUEST,
2198+
"id=%p: O_DIRECT write not supported, falling back to buffered I/O",
2199+
(void *)meta_request);
2200+
meta_request->recv_file_direct_io = false;
2201+
aws_reset_error();
2202+
/* Open FILE* and seek to current position */
2203+
meta_request->recv_file = aws_fopen(
2204+
aws_string_c_str(meta_request->recv_filepath), "r+");
2205+
if (meta_request->recv_file &&
2206+
aws_fseek(
2207+
meta_request->recv_file,
2208+
(int64_t)(meta_request->recv_file_base_position + delivery_range_start),
2209+
SEEK_SET) == AWS_OP_SUCCESS) {
2210+
/* Retry this write with fwrite */
2211+
if (fwrite(
2212+
(void *)response_body.ptr,
2213+
response_body.len,
2214+
1,
2215+
meta_request->recv_file) < 1) {
2216+
int errno_value =
2217+
ferror(meta_request->recv_file) ? errno : 0;
2218+
aws_translate_and_raise_io_error_or(
2219+
errno_value, AWS_ERROR_FILE_WRITE_FAILURE);
2220+
error_code = aws_last_error();
2221+
}
2222+
} else {
2223+
error_code = aws_last_error();
2224+
}
2225+
if (error_code != AWS_ERROR_SUCCESS) {
2226+
AWS_LOGF_ERROR(
2227+
AWS_LS_S3_META_REQUEST,
2228+
"id=%p Failed O_DIRECT fallback to buffered write. aws-error:%s",
2229+
(void *)meta_request,
2230+
aws_error_name(error_code));
2231+
}
2232+
} else {
2233+
error_code = aws_last_error();
2234+
AWS_LOGF_ERROR(
2235+
AWS_LS_S3_META_REQUEST,
2236+
"id=%p Failed writing to file with O_DIRECT. aws-error:%s",
2237+
(void *)meta_request,
2238+
aws_error_name(error_code));
2239+
}
2240+
}
2241+
if (meta_request->client->enable_read_backpressure) {
2242+
aws_s3_meta_request_increment_read_window(meta_request, response_body.len);
2243+
}
2244+
} else if (meta_request->recv_file) {
21432245
/* Write the data directly to the file. No need to seek, since the event will always be
21442246
* delivered with the right order. */
21452247
if (fwrite((void *)response_body.ptr, response_body.len, 1, meta_request->recv_file) < 1) {
@@ -2417,6 +2519,10 @@ void aws_s3_meta_request_finish_default(struct aws_s3_meta_request *meta_request
24172519
if (finish_result.error_code && meta_request->recv_file_delete_on_failure) {
24182520
aws_file_delete(meta_request->recv_filepath);
24192521
}
2522+
} else if (meta_request->recv_file_direct_io && finish_result.error_code &&
2523+
meta_request->recv_file_delete_on_failure) {
2524+
/* O_DIRECT path has no FILE* to close, but still honor delete-on-failure */
2525+
aws_file_delete(meta_request->recv_filepath);
24202526
}
24212527

24222528
while (!aws_linked_list_empty(&release_request_list)) {

tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ add_net_test_case(test_s3_get_object_file_path)
7878
add_net_test_case(test_s3_get_object_file_path_create_new)
7979
add_net_test_case(test_s3_get_object_file_path_append)
8080
add_net_test_case(test_s3_get_object_file_path_to_position)
81+
add_net_test_case(test_s3_get_object_file_path_direct_io)
82+
add_net_test_case(test_s3_get_object_file_path_direct_io_to_position)
8183
add_net_test_case(test_s3_get_object_empty_object)
8284
add_net_test_case(test_s3_get_object_multiple)
8385
add_net_test_case(test_s3_get_object_multiple_serial)

tests/s3_data_plane_tests.c

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,97 @@ static int s_test_s3_get_object_file_path_to_position(struct aws_allocator *allo
16471647
return 0;
16481648
}
16491649

1650+
AWS_TEST_CASE(test_s3_get_object_file_path_direct_io, s_test_s3_get_object_file_path_direct_io)
1651+
static int s_test_s3_get_object_file_path_direct_io(struct aws_allocator *allocator, void *ctx) {
1652+
(void)ctx;
1653+
1654+
struct aws_s3_tester tester;
1655+
AWS_ZERO_STRUCT(tester);
1656+
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));
1657+
1658+
struct aws_s3_tester_client_options client_options = {
1659+
.part_size = MB_TO_BYTES(5),
1660+
};
1661+
1662+
struct aws_s3_client *client = NULL;
1663+
ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client));
1664+
1665+
struct aws_s3_file_io_options fio_opts = {
1666+
.direct_io = true,
1667+
};
1668+
1669+
struct aws_byte_cursor object_path = aws_byte_cursor_from_c_str("/pre-existing-1MB");
1670+
struct aws_s3_tester_meta_request_options get_options = {
1671+
.allocator = allocator,
1672+
.meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT,
1673+
.validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS,
1674+
.client = client,
1675+
.fio_opts = &fio_opts,
1676+
.get_options =
1677+
{
1678+
.object_path = object_path,
1679+
.file_on_disk = true,
1680+
},
1681+
};
1682+
1683+
ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, NULL));
1684+
1685+
client = aws_s3_client_release(client);
1686+
aws_s3_tester_clean_up(&tester);
1687+
return 0;
1688+
}
1689+
1690+
AWS_TEST_CASE(
1691+
test_s3_get_object_file_path_direct_io_to_position,
1692+
s_test_s3_get_object_file_path_direct_io_to_position)
1693+
static int s_test_s3_get_object_file_path_direct_io_to_position(struct aws_allocator *allocator, void *ctx) {
1694+
(void)ctx;
1695+
1696+
struct aws_s3_tester tester;
1697+
AWS_ZERO_STRUCT(tester);
1698+
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));
1699+
1700+
struct aws_s3_tester_client_options client_options = {
1701+
.part_size = MB_TO_BYTES(5),
1702+
};
1703+
1704+
struct aws_s3_meta_request_test_results meta_request_test_results;
1705+
aws_s3_meta_request_test_results_init(&meta_request_test_results, allocator);
1706+
struct aws_s3_client *client = NULL;
1707+
ASSERT_SUCCESS(aws_s3_tester_client_new(&tester, &client_options, &client));
1708+
1709+
struct aws_s3_file_io_options fio_opts = {
1710+
.direct_io = true,
1711+
};
1712+
1713+
struct aws_byte_cursor object_path = aws_byte_cursor_from_c_str("/pre-existing-1MB");
1714+
uint64_t pre_exist_file_length = 10;
1715+
struct aws_s3_tester_meta_request_options get_options = {
1716+
.allocator = allocator,
1717+
.meta_request_type = AWS_S3_META_REQUEST_TYPE_GET_OBJECT,
1718+
.validate_type = AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS,
1719+
.client = client,
1720+
.fio_opts = &fio_opts,
1721+
.get_options =
1722+
{
1723+
.object_path = object_path,
1724+
.file_on_disk = true,
1725+
.recv_file_option = AWS_S3_RECV_FILE_WRITE_TO_POSITION,
1726+
.recv_file_position = 20,
1727+
.pre_exist_file_length = pre_exist_file_length,
1728+
},
1729+
};
1730+
1731+
ASSERT_SUCCESS(aws_s3_tester_send_meta_request_with_options(&tester, &get_options, &meta_request_test_results));
1732+
ASSERT_UINT_EQUALS(
1733+
get_options.get_options.recv_file_position + MB_TO_BYTES(1), meta_request_test_results.received_file_size);
1734+
1735+
aws_s3_meta_request_test_results_clean_up(&meta_request_test_results);
1736+
client = aws_s3_client_release(client);
1737+
aws_s3_tester_clean_up(&tester);
1738+
return 0;
1739+
}
1740+
16501741
AWS_TEST_CASE(test_s3_get_object_empty_object, s_test_s3_get_object_empty_default)
16511742
static int s_test_s3_get_object_empty_default(struct aws_allocator *allocator, void *ctx) {
16521743
(void)ctx;

0 commit comments

Comments
 (0)