Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/aws_s3_setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Create an IAM Policy called `MedusaStorageStrategy`, with the following definiti
"s3:GetReplicationConfiguration",
"s3:ListMultipartUploadParts",
"s3:PutObject",
"s3:PutObjectAcl",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:PutObjectRetention",
Expand Down
3 changes: 3 additions & 0 deletions medusa-example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ concurrent_transfers = 1
; Size over which S3 uploads will be using the awscli with multi part uploads. Defaults to 100MB.
multi_part_upload_threshold = 104857600

; Canned ACL for uploaded objects on S3. Defaults to private
canned_acl = private

[monitoring]
;monitoring_provider = <Provider used for sending metrics. Currently either of "ffwd" or "local">

Expand Down
4 changes: 3 additions & 1 deletion medusa/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
'StorageConfig',
['bucket_name', 'key_file', 'prefix', 'fqdn', 'host_file_separator', 'storage_provider',
'base_path', 'max_backup_age', 'max_backup_count', 'api_profile', 'transfer_max_bandwidth',
'concurrent_transfers', 'multi_part_upload_threshold', 'host', 'region', 'port', 'secure', 'aws_cli_path']
'concurrent_transfers', 'multi_part_upload_threshold', 'canned_acl', 'host',
'region', 'port', 'secure', 'aws_cli_path']
)

CassandraConfig = collections.namedtuple(
Expand Down Expand Up @@ -95,6 +96,7 @@ def load_config(args, config_file):
'aws_cli_path': 'aws',
'fqdn': socket.getfqdn(),
'region': 'default',
'canned_acl': 'private',
}

config['logging'] = {
Expand Down
4 changes: 3 additions & 1 deletion medusa/storage/aws_s3_storage/awscli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class AwsCli(object):
def __init__(self, storage):
self._config = storage.config
self.storage = storage
self.canned_acl = storage.config.canned_acl

@property
def bucket_name(self):
Expand Down Expand Up @@ -73,7 +74,8 @@ def cp_upload(self, *, srcs, bucket_name, dest, max_retries=5):
awscli_output = "/tmp/awscli_{0}.output".format(job_id)
objects = []
for src in srcs:
cmd = [self._aws_cli_path, "s3", "cp", str(src), "s3://{}/{}".format(bucket_name, dest)]
cmd = [self._aws_cli_path, "s3", "cp", "--acl", self.canned_acl,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this be backwards compatible with the current behavior if set to private by default?
I seem to understand that it would, but want to make sure of it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, we're moving to a consolidated set of classes for all S3 compatible backends in #272. I'm wondering how this would affect other storage providers than AWS.
I'd be in favor of making this flag added to the aws command line only if canned_acl is set to anything else but private (or maybe we could leave it unset by default).
@burmanm, wdyt?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I only added parameters to the command line if they differ from the default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adejanovski according to aws s3 documentation, private is the default ACL (https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl) which keeping it as a default option if is not set shouldn't change the behavior.

str(src), "s3://{}/{}".format(bucket_name, dest)]
objects.append(self.upload_file(cmd, dest, awscli_output))

return objects
Expand Down
13 changes: 7 additions & 6 deletions medusa/storage/aws_s3_storage/concurrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def with_storage(self, iterable):


def upload_blobs(
storage, src, dest, bucket, max_workers=None, multi_part_upload_threshold=0
storage, src, dest, bucket, canned_acl, max_workers=None, multi_part_upload_threshold=0
):
"""
Uploads a list of files from local storage concurrently to the remote storage.
Expand All @@ -80,14 +80,14 @@ def upload_blobs(
job = StorageJob(
storage,
lambda storage, connection, src_file: __upload_file(
storage, connection, src_file, dest, bucket, multi_part_upload_threshold
storage, connection, src_file, dest, bucket, canned_acl, multi_part_upload_threshold
),
max_workers,
)
return job.execute(list(src))


def __upload_file(storage, connection, src, dest, bucket, multi_part_upload_threshold):
def __upload_file(storage, connection, src, dest, bucket, canned_acl, multi_part_upload_threshold):
"""
This function is called by StorageJob. It may be called concurrently by multiple threads.

Expand Down Expand Up @@ -116,15 +116,16 @@ def __upload_file(storage, connection, src, dest, bucket, multi_part_upload_thre
obj = _upload_multi_part(storage, connection, src, bucket, full_object_name)
else:
logging.debug("Uploading {} as single part".format(full_object_name))
obj = _upload_single_part(connection, src, bucket, full_object_name)
obj = _upload_single_part(connection, src, bucket, full_object_name, canned_acl)

return medusa.storage.ManifestObject(obj.name, int(obj.size), obj.hash)


@retry(stop_max_attempt_number=MAX_UP_DOWN_LOAD_RETRIES, wait_fixed=5000)
def _upload_single_part(connection, src, bucket, object_name):
def _upload_single_part(connection, src, bucket, object_name, canned_acl):
extra = {'content_type': 'application/octet-stream', 'acl': canned_acl}
obj = connection.upload_object(
str(src), container=bucket, object_name=object_name
str(src), container=bucket, object_name=object_name, extra=extra
)

return obj
Expand Down
1 change: 1 addition & 0 deletions medusa/storage/s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def upload_blobs(self, srcs, dest):
srcs,
dest,
self.bucket,
canned_acl=self.config.canned_acl,
max_workers=self.config.concurrent_transfers,
multi_part_upload_threshold=int(self.config.multi_part_upload_threshold),
)
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/features/steps/integration_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,8 @@ def i_am_using_storage_provider_with_grpc_server(context, storage_provider, clie
"multi_part_upload_threshold": 1 * 1024,
"concurrent_transfers": 4,
"prefix": storage_prefix,
"aws_cli_path": "aws"
"aws_cli_path": "aws",
"canned_acl": "private"
}

config["cassandra"] = {
Expand Down