yugabyte · hari90 · Apr 30, 2026 · May 7, 2026
diff --git a/python/yugabyte/download_and_extract_archive.py b/python/yugabyte/download_and_extract_archive.py
@@ -42,6 +42,8 @@
 EXPECTED_ARCHIVE_EXTENSION = '.tar.gz'
 CHECKSUM_EXTENSION = '.sha256'
 ARTIFACT_URL_SUFFIX = '/zip'
+MAX_DOWNLOAD_ATTEMPTS = 5
+RETRY_DELAY_SEC = 5
 
 
 def remove_ignore_errors(file_path: str) -> None:
@@ -98,11 +100,18 @@ def verify_sha256sum(checksum_file_path: str, data_file_path: str) -> bool:
         raise ValueError("Checksum file path must end with '%s', got: %s" % (
             CHECKSUM_EXTENSION, checksum_file_path))
 
-    # Guard against someone passing in the actual data file instead of the checksum file.
+    # Guard against someone passing in the actual data file instead of the checksum file, or
+    # against the server returning an HTML error page in place of the checksum.
     checksum_file_size = os.stat(checksum_file_path).st_size
     if checksum_file_size > 4096:
-        raise IOError("Checksum file size is too big: %d bytes (file path: %s)" % (
-            checksum_file_size, checksum_file_path))
+        try:
+            with open(checksum_file_path, 'rb') as f:
+                preview = f.read(1024).decode('utf-8', errors='replace')
+        except Exception as ex:
+            preview = "<failed to read file: %s>" % ex
+        raise IOError(
+            "Checksum file size is too big: %d bytes (file path: %s). First 1024 bytes:\n%s" % (
+                checksum_file_size, checksum_file_path, preview))
 
     expected_checksum = read_file_and_strip(checksum_file_path).split()[0]
 
@@ -122,7 +131,24 @@ def download_url(url: str, dest_path: str, other_curl_flags: List[str] = []) ->
     dest_dir = os.path.dirname(dest_path)
     if not os.path.isdir(dest_dir):
         raise IOError("Destination directory %s does not exist" % dest_dir)
-    run_cmd(['curl', '-LsS', url, '-o', dest_path] + other_curl_flags)
+    # -f: exit 22 on HTTP errors instead of saving the error page as the artifact.
+    # Retry at the Python level so any curl failure is retried, including transient
+    # non-5xx HTTP errors (e.g. 403s on GitHub's signed release-asset redirects)
+    # that curl's --retry skips. --retry-all-errors would cover this but requires
+    # curl >= 7.71.0, which is unavailable on AlmaLinux 8 / RHEL 8 and similar.
+    cmd = ['curl', '-LsSf', url, '-o', dest_path] + other_curl_flags
+    for attempt in range(1, MAX_DOWNLOAD_ATTEMPTS + 1):
+        try:
+            run_cmd(cmd)
+            break
+        except subprocess.CalledProcessError as ex:
+            if attempt == MAX_DOWNLOAD_ATTEMPTS:
+                raise
+            logging.warning(
+                "curl failed downloading %s (attempt %d/%d) with exit code %d, "
+                "retrying in %ds",
+                url, attempt, MAX_DOWNLOAD_ATTEMPTS, ex.returncode, RETRY_DELAY_SEC)
+            time.sleep(RETRY_DELAY_SEC)
     if not os.path.exists(dest_path):
         raise IOError("Failed to download %s: file %s does not exist" % (url, dest_path))
     elapsed_sec = time.time() - start_time_sec