Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions python/yugabyte/download_and_extract_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
EXPECTED_ARCHIVE_EXTENSION = '.tar.gz'
CHECKSUM_EXTENSION = '.sha256'
ARTIFACT_URL_SUFFIX = '/zip'
MAX_DOWNLOAD_ATTEMPTS = 5
RETRY_DELAY_SEC = 5


def remove_ignore_errors(file_path: str) -> None:
Expand Down Expand Up @@ -98,11 +100,18 @@ def verify_sha256sum(checksum_file_path: str, data_file_path: str) -> bool:
raise ValueError("Checksum file path must end with '%s', got: %s" % (
CHECKSUM_EXTENSION, checksum_file_path))

# Guard against someone passing in the actual data file instead of the checksum file.
# Guard against someone passing in the actual data file instead of the checksum file, or
# against the server returning an HTML error page in place of the checksum.
checksum_file_size = os.stat(checksum_file_path).st_size
if checksum_file_size > 4096:
raise IOError("Checksum file size is too big: %d bytes (file path: %s)" % (
checksum_file_size, checksum_file_path))
try:
with open(checksum_file_path, 'rb') as f:
preview = f.read(1024).decode('utf-8', errors='replace')
except Exception as ex:
preview = "<failed to read file: %s>" % ex
raise IOError(
"Checksum file size is too big: %d bytes (file path: %s). First 1024 bytes:\n%s" % (
checksum_file_size, checksum_file_path, preview))

expected_checksum = read_file_and_strip(checksum_file_path).split()[0]

Expand All @@ -122,7 +131,20 @@ def download_url(url: str, dest_path: str, other_curl_flags: List[str] = []) ->
dest_dir = os.path.dirname(dest_path)
if not os.path.isdir(dest_dir):
raise IOError("Destination directory %s does not exist" % dest_dir)
run_cmd(['curl', '-LsS', url, '-o', dest_path] + other_curl_flags)
# -f / --fail: don't write the response body for HTTP error responses, so we don't end up
# with an HTML error page on disk masquerading as the requested artifact.
# --retry / --retry-delay: retry transient failures (5xx, connection errors) before giving up.
# Note: curl --retry counts retries after the initial attempt, so we pass
# MAX_DOWNLOAD_ATTEMPTS - 1 to get MAX_DOWNLOAD_ATTEMPTS total attempts.
# --retry-connrefused: also retry on ECONNREFUSED, which curl does not retry by default
# and which is a common transient failure in CI environments.
run_cmd([
'curl', '-LsSf',
'--retry', str(MAX_DOWNLOAD_ATTEMPTS - 1),
'--retry-delay', str(RETRY_DELAY_SEC),
'--retry-connrefused',

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The --retry-connrefused flag was introduced in curl 7.52.0. Some older environments supported by YugabyteDB, such as CentOS 7, typically come with an older version of curl (e.g., 7.29.0) that does not support this flag. Please verify if the build environment is guaranteed to have a sufficiently recent curl version, or consider making this flag conditional to avoid breaking builds on older platforms.

References
  1. Focus on substantive issues including correctness and portability across supported environments.

url, '-o', dest_path,
] + other_curl_flags)
if not os.path.exists(dest_path):
raise IOError("Failed to download %s: file %s does not exist" % (url, dest_path))
elapsed_sec = time.time() - start_time_sec
Expand Down