From 19104b9fe6c2f4fea9188a0dccbb01b953d81cc2 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Wed, 11 Mar 2026 10:28:51 +0100 Subject: [PATCH] push: skip already existing files in the s3 bucket on push When we push data (i.e. sha254-abcd) files to the s3 bucket we currently do this unconditionally. But if the file is already in the bucket this is unnecessary. So try to "touch" it first and only upload if the file is not already there. This saves some time even on a fast connection (and bandwidth of course). Note that we use "touch" here because we want to make sure the object metadata gets updated so that any time-based expiration policies are still honored. --- src/ctl/push.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/ctl/push.py b/src/ctl/push.py index 8efd452..96febb3 100644 --- a/src/ctl/push.py +++ b/src/ctl/push.py @@ -47,14 +47,28 @@ def push_data_s3(self, storage, platform_id): for entry in entries: i_total += 1 + key = f"data/{storage}/{path}/{entry}" + + try: + s3c.copy_object( + Bucket="rpmrepo-storage", + Key=key, + CopySource={"Bucket": "rpmrepo-storage", "Key": key}, + MetadataDirective="COPY", + ) + print(f"[{i_total}/{n_total}] '{key}' (exists, touched)") + continue + except s3c.exceptions.ClientError as e: + if e.response["Error"]["Code"] not in ("404", "NoSuchKey"): + raise - print(f"[{i_total}/{n_total}] 'data/{storage}/{path}/{entry}'") + print(f"[{i_total}/{n_total}] '{key}'") with open(os.path.join(level, entry), "rb") as filp: s3c.upload_fileobj( filp, "rpmrepo-storage", - f"data/{storage}/{path}/{entry}", + key, ) def push_snapshot_s3(self, snapshot_id, snapshot_suffix):