From 19104b9fe6c2f4fea9188a0dccbb01b953d81cc2 Mon Sep 17 00:00:00 2001
From: Michael Vogt <michael@amutable.com>
Date: Wed, 11 Mar 2026 10:28:51 +0100
Subject: [PATCH] push: skip already existing files in the s3 bucket on push

When we push data (i.e. sha254-abcd) files to the s3 bucket we
currently do this unconditionally. But if the file is already in
the bucket this is unnecessary. So try to "touch" it first and
only upload if the file is not already there. This saves some
time even on a fast connection (and bandwidth of course).

Note that we use "touch" here because we want to make sure the
object metadata gets updated so that any time-based expiration
policies are still honored.
---
 src/ctl/push.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/ctl/push.py b/src/ctl/push.py
index 8efd452..96febb3 100644
--- a/src/ctl/push.py
+++ b/src/ctl/push.py
@@ -47,14 +47,28 @@ def push_data_s3(self, storage, platform_id):
 
             for entry in entries:
                 i_total += 1
+                key = f"data/{storage}/{path}/{entry}"
+
+                try:
+                    s3c.copy_object(
+                        Bucket="rpmrepo-storage",
+                        Key=key,
+                        CopySource={"Bucket": "rpmrepo-storage", "Key": key},
+                        MetadataDirective="COPY",
+                    )
+                    print(f"[{i_total}/{n_total}] '{key}' (exists, touched)")
+                    continue
+                except s3c.exceptions.ClientError as e:
+                    if e.response["Error"]["Code"] not in ("404", "NoSuchKey"):
+                        raise
 
-                print(f"[{i_total}/{n_total}] 'data/{storage}/{path}/{entry}'")
+                print(f"[{i_total}/{n_total}] '{key}'")
 
                 with open(os.path.join(level, entry), "rb") as filp:
                     s3c.upload_fileobj(
                         filp,
                         "rpmrepo-storage",
-                        f"data/{storage}/{path}/{entry}",
+                        key,
                     )
 
     def push_snapshot_s3(self, snapshot_id, snapshot_suffix):