Skip to content

Commit 1c80418

Browse files
authored
Improve Copy Operation by taking the Source URI (#482)
1 parent 0559eb8 commit 1c80418

File tree

11 files changed

+276
-47
lines changed

11 files changed

+276
-47
lines changed

.github/workflows/codecov.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ jobs:
3030
run: |
3131
python3 -c "from urllib.request import urlretrieve; urlretrieve('${{ env.BUILDER_HOST }}/${{ env.BUILDER_SOURCE }}/${{ env.BUILDER_VERSION }}/builder.pyz?run=${{ env.RUN }}', 'builder')"
3232
chmod a+x builder
33-
./builder build -p ${{ env.PACKAGE_NAME }} --compiler=gcc-9 --cmake-extra=-DASSERT_LOCK_HELD=ON --coverage --coverage-exclude=source/s3_copy_object.c
33+
./builder build -p ${{ env.PACKAGE_NAME }} --compiler=gcc-12 --cmake-extra=-DASSERT_LOCK_HELD=ON --coverage --coverage-exclude=source/s3_copy_object.c

include/aws/s3/private/s3_copy_object.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include "aws/s3/private/s3_meta_request_impl.h"
10+
#include <aws/common/uri.h>
1011

1112
enum aws_s3_copy_object_request_tag {
1213
AWS_S3_COPY_OBJECT_REQUEST_TAG_GET_OBJECT_SIZE,
@@ -25,6 +26,9 @@ struct aws_s3_copy_object {
2526
/* Usable after the Create Multipart Upload request succeeds. */
2627
struct aws_string *upload_id;
2728

29+
/* (Optional) source_uri for the copy operation. */
30+
struct aws_uri source_uri;
31+
2832
/* Only meant for use in the update function, which is never called concurrently. */
2933
struct {
3034
uint32_t next_part_number;

include/aws/s3/private/s3_request_messages.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ struct aws_http_message *aws_s3_get_object_size_message_new(
136136
AWS_S3_API
137137
struct aws_http_message *aws_s3_get_source_object_size_message_new(
138138
struct aws_allocator *allocator,
139-
struct aws_http_message *base_message);
139+
struct aws_http_message *base_message,
140+
struct aws_uri *source_uri);
140141

141142
/* Add content-md5 header to the http message passed in. The MD5 will be computed from the input_buf */
142143
AWS_S3_API

include/aws/s3/s3_client.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,14 @@ enum aws_s3_meta_request_type {
7777
* a CopyObject request to S3 if the object size is not large enough for
7878
* a multipart upload.
7979
* Note: copy support is still in development and has following limitations:
80-
* - host header must use virtual host addressing style (path style is not
80+
* 1. host header must use virtual host addressing style (path style is not
8181
* supported) and both source and dest buckets must have dns compliant name
82-
* - only {bucket}/{key} format is supported for source and passing arn as
82+
* 2. only {bucket}/{key} format is supported for source and passing arn as
8383
* source will not work
84-
* - source bucket is assumed to be in the same region as dest
85-
* - source bucket and dest bucket must both be either directory buckets or regular buckets.
84+
* 3. source bucket is assumed to be in the same region as dest
85+
* 4. source bucket and dest bucket must both be either directory buckets or regular buckets.
86+
*
87+
* Provide the `meta_request_options.copy_source_uri` to bypass limitation 1 & 2.
8688
*/
8789
AWS_S3_META_REQUEST_TYPE_COPY_OBJECT,
8890

@@ -869,6 +871,13 @@ struct aws_s3_meta_request_options {
869871
* This is just used as an estimate, so it's okay to provide an approximate value if the exact size is unknown.
870872
*/
871873
const uint64_t *object_size_hint;
874+
875+
/*
876+
* (Optional)
877+
* If performing a copy operation, provide the source URI here to bypass limitations 1 and 2 of the copy operation.
878+
* This will be ignored for other operations.
879+
*/
880+
const struct aws_byte_cursor copy_source_uri;
872881
};
873882

874883
/* Result details of a meta request.

source/s3_copy_object.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,22 @@ struct aws_s3_meta_request *aws_s3_meta_request_copy_object_new(
9292
copy_object->synced_data.content_length = UNKNOWN_CONTENT_LENGTH;
9393
copy_object->synced_data.total_num_parts = UNKNOWN_NUM_PARTS;
9494
copy_object->threaded_update_data.next_part_number = 1;
95+
if (options->copy_source_uri.len != 0) {
96+
if (aws_uri_init_parse(&copy_object->source_uri, allocator, &options->copy_source_uri)) {
97+
AWS_LOGF_ERROR(
98+
AWS_LS_S3_META_REQUEST,
99+
"Unable to parse the copy_source_uri provided in the request: " PRInSTR "",
100+
AWS_BYTE_CURSOR_PRI(options->copy_source_uri));
101+
goto on_error;
102+
}
103+
}
95104

96105
AWS_LOGF_DEBUG(AWS_LS_S3_META_REQUEST, "id=%p Created new CopyObject Meta Request.", (void *)&copy_object->base);
97106

98107
return &copy_object->base;
108+
on_error:
109+
aws_s3_meta_request_release(&copy_object->base);
110+
return NULL;
99111
}
100112

101113
static void s_s3_meta_request_copy_object_destroy(struct aws_s3_meta_request *meta_request) {
@@ -105,6 +117,7 @@ static void s_s3_meta_request_copy_object_destroy(struct aws_s3_meta_request *me
105117
struct aws_s3_copy_object *copy_object = meta_request->impl;
106118

107119
aws_string_destroy(copy_object->upload_id);
120+
aws_uri_clean_up(&copy_object->source_uri);
108121
copy_object->upload_id = NULL;
109122

110123
for (size_t part_index = 0; part_index < aws_array_list_length(&copy_object->synced_data.part_list); ++part_index) {
@@ -364,7 +377,7 @@ static struct aws_future_void *s_s3_copy_object_prepare_request(struct aws_s3_re
364377
/* Prepares the GetObject HEAD request to get the source object size. */
365378
case AWS_S3_COPY_OBJECT_REQUEST_TAG_GET_OBJECT_SIZE: {
366379
message = aws_s3_get_source_object_size_message_new(
367-
meta_request->allocator, meta_request->initial_request_message);
380+
meta_request->allocator, meta_request->initial_request_message, &copy_object->source_uri);
368381
break;
369382
}
370383

source/s3_request_messages.c

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <aws/common/byte_buf.h>
1111
#include <aws/common/encoding.h>
1212
#include <aws/common/string.h>
13+
#include <aws/common/uri.h>
1314
#include <aws/http/request_response.h>
1415
#include <aws/io/async_stream.h>
1516
#include <aws/io/stream.h>
@@ -457,30 +458,63 @@ static const struct aws_byte_cursor s_slash_char = AWS_BYTE_CUR_INIT_FROM_STRING
457458
*/
458459
struct aws_http_message *aws_s3_get_source_object_size_message_new(
459460
struct aws_allocator *allocator,
460-
struct aws_http_message *base_message) {
461-
struct aws_http_message *message = NULL;
461+
struct aws_http_message *base_message,
462+
struct aws_uri *source_uri) {
463+
464+
struct aws_http_message *message = aws_http_message_new_request(allocator);
462465
struct aws_byte_buf head_object_host_header;
463466
AWS_ZERO_STRUCT(head_object_host_header);
464467

468+
if (message == NULL) {
469+
goto error_cleanup;
470+
}
471+
472+
if (aws_http_message_set_request_method(message, g_head_method)) {
473+
goto error_cleanup;
474+
}
475+
if (source_uri != NULL && source_uri->self_size > 0) {
476+
/* Parse source host header and path from the provided URI */
477+
struct aws_byte_cursor host = *aws_uri_host_name(source_uri);
478+
struct aws_byte_cursor path = *aws_uri_path(source_uri);
479+
if (host.len == 0 || path.len == 0) {
480+
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
481+
goto error_cleanup;
482+
}
483+
struct aws_http_header host_header = {
484+
.name = g_host_header_name,
485+
.value = host,
486+
};
487+
if (aws_http_message_add_header(message, host_header)) {
488+
goto error_cleanup;
489+
}
490+
491+
if (aws_http_message_set_request_path(message, path)) {
492+
goto error_cleanup;
493+
}
494+
return message;
495+
}
496+
497+
/* Parse the source host header and path from the x-amz-copy-source header and the destination URI */
498+
465499
AWS_PRECONDITION(allocator);
466500

467501
/* Find the x-amz-copy-source header, to extract source bucket/key information. */
468502
struct aws_http_headers *headers = aws_http_message_get_headers(base_message);
469503
if (!headers) {
470504
AWS_LOGF_ERROR(AWS_LS_S3_GENERAL, "CopyRequest is missing headers");
471-
return NULL;
505+
goto error_cleanup;
472506
}
473507

474508
struct aws_byte_cursor source_header;
475509
const struct aws_byte_cursor copy_source_header = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("x-amz-copy-source");
476510
if (aws_http_headers_get(headers, copy_source_header, &source_header) != AWS_OP_SUCCESS) {
477511
AWS_LOGF_ERROR(AWS_LS_S3_GENERAL, "CopyRequest is missing the x-amz-copy-source header");
478-
return NULL;
512+
goto error_cleanup;
479513
}
480514
struct aws_byte_cursor host;
481515
if (aws_http_headers_get(headers, g_host_header_name, &host) != AWS_OP_SUCCESS) {
482516
AWS_LOGF_ERROR(AWS_LS_S3_GENERAL, "CopyRequest is missing the Host header");
483-
return NULL;
517+
goto error_cleanup;
484518
}
485519

486520
struct aws_byte_cursor request_path = source_header;
@@ -529,15 +563,6 @@ struct aws_http_message *aws_s3_get_source_object_size_message_new(
529563
goto error_cleanup;
530564
}
531565

532-
message = aws_http_message_new_request(allocator);
533-
if (message == NULL) {
534-
goto error_cleanup;
535-
}
536-
537-
if (aws_http_message_set_request_method(message, g_head_method)) {
538-
goto error_cleanup;
539-
}
540-
541566
struct aws_http_header host_header = {
542567
.name = g_host_header_name,
543568
.value = aws_byte_cursor_from_buf(&head_object_host_header),

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ add_net_test_case(test_s3_multipart_copy_large_object_special_char)
302302
add_net_test_case(test_s3_multipart_copy_large_object)
303303
add_net_test_case(test_s3_copy_object_invalid_source_key)
304304
add_net_test_case(test_s3_copy_source_prefixed_by_slash)
305+
add_net_test_case(test_s3_copy_invalid_source_uri)
305306
add_net_test_case(test_s3_copy_source_prefixed_by_slash_multipart)
306307
add_net_test_case(test_s3_put_pause_resume_happy_path)
307308
add_net_test_case(test_s3_put_pause_resume_all_parts_done)

0 commit comments

Comments
 (0)