Skip to content

Commit 7b29601

Browse files
committed
feat(RELEASE-2460): add disk-image support to artifact helpers
- compress_artifacts: copy disk images as-is; fix multi-file per-arch scenario (e.g. ISO + QCOW2 sharing one os+arch) - push_unsigned: pass disk images through without unpacking, handling both files[] and staged.files[] sources - push_artifacts: inject staged.files[] into files[] for CGW when disk-image component has no files[] entries - extract_artifacts: fail early when disk-image component declares a non-linux OS entry, preventing signing failures - all helpers: detect disk-image via contentType at both component level and contentGateway.contentType Assisted-by: Cursor AI Signed-off-by: Scott Wickersham <swickers@redhat.com>
1 parent eb74eb6 commit 7b29601

8 files changed

Lines changed: 631 additions & 45 deletions

scripts/python/helpers/compress_artifacts.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
* Pulls signed macOS and Windows OCI artifacts from Quay into a ``signed/`` directory.
66
* Restores supplementary files (readme, license, changelog) that were held during signing.
77
* Compresses each file entry into the final deliverable format:
8-
- macOS / Linux → ``.tar.gz`` (from ``os/arch/`` directory)
8+
- macOS / Linux (non-disk-image) → ``.tar.gz`` (from ``os/arch/`` directory)
9+
- Linux disk images (``.qcow2``, ``.iso``) → copied as-is to ``ready_for_distribution/``
910
- Windows → ``.zip`` (from ``os/arch/`` directory, extension corrected from
1011
``.tar.gz``/``.tar``)
1112
* Updates ``SNAPSHOT_JSON`` to reflect corrected Windows filenames in ``files[]``.
@@ -39,6 +40,23 @@
3940

4041
PROG = "compress_artifacts.py"
4142

43+
# Unambiguous disk-image file suffixes (simple and compound). Files matching
44+
# these are copied as-is to ready_for_distribution without being wrapped in a
45+
# tar archive, even when the component does not carry contentType: disk-image.
46+
# NOTE: .tar.gz is intentionally excluded — it is ambiguous between binary
47+
# archives and disk images (e.g. GCP images packaged as tarballs). Use
48+
# contentType: disk-image on the component to handle those cases.
49+
_DISK_IMAGE_SUFFIXES: frozenset[str] = frozenset(
50+
{".qcow2", ".iso", ".iso.gz", ".raw.gz", ".vhd.gz"}
51+
)
52+
53+
54+
def _is_disk_image_file(filename: str) -> bool:
55+
"""Return True if *filename* has an unambiguous disk-image file suffix."""
56+
lower = filename.lower()
57+
return any(lower.endswith(ext) for ext in _DISK_IMAGE_SUFFIXES)
58+
59+
4260
QUAY_SECRET_MOUNT = Path(os.environ.get("QUAY_SECRET_MOUNT", "/mnt/quaySecret"))
4361
CONTENT_DIR = Path(os.environ.get("CONTENT_DIR", "/shared/artifacts"))
4462
SHARED_DIR = Path(os.environ.get("SHARED_DIR", "/shared"))
@@ -103,19 +121,25 @@ def _windows_filename(source_filename: str) -> str:
103121

104122

105123
def _compress_file_entry(
106-
entry: dict, array_name: str, component_dir: Path, ready_dir: Path
124+
entry: dict,
125+
array_name: str,
126+
component_dir: Path,
127+
ready_dir: Path,
128+
*,
129+
is_disk_image_component: bool = False,
107130
) -> str:
108131
"""Compress one file entry into ready_dir and return the (possibly normalized) source path.
109132
110133
For macOS and Linux entries the source path is returned unchanged. For Windows entries
111134
the archive is created as a ``.zip`` instead of ``.tar.gz``/``.tar``, and the returned
112135
source path reflects the corrected filename so the snapshot can be updated accordingly.
113136
114-
Raises RuntimeError on failure (missing source, unknown OS, or empty arch directory).
137+
Files are copied directly to ``ready_dir`` (without archiving) when either:
138+
- *is_disk_image_component* is True (set when contentType: disk-image), or
139+
- the filename has an unambiguous disk-image suffix (.qcow2, .iso, .iso.gz,
140+
.raw.gz, .vhd.gz).
115141
116-
Note: all files are currently compressed regardless of type. ISOs should be
117-
passed through as-is rather than wrapped in a tarball — this will need to be
118-
addressed before ISO delivery is supported.
142+
Raises RuntimeError on failure (missing source, unknown OS, or empty arch directory).
119143
"""
120144
source = entry.get("source")
121145
if not source:
@@ -139,12 +163,23 @@ def _compress_file_entry(
139163

140164
# macOS and Linux follow the Unix convention of tar.gz archives; Windows uses zip
141165
# because that is the standard expected by Windows users and Developer Portal tooling.
166+
# Disk images are an exception: they are delivered as-is without any archiving.
142167
if os_name in ("darwin", "linux"):
143168
out_path = ready_dir / source_filename
144-
with tarfile.open(str(out_path), "w:gz") as tf:
145-
for item in sorted(arch_dir.rglob("*")):
146-
if item.is_file():
147-
tf.add(str(item), arcname=str(item.relative_to(arch_dir)))
169+
if is_disk_image_component or _is_disk_image_file(source_filename):
170+
# Use the known filename directly — multiple disk images may share
171+
# the same arch directory, so scanning the whole dir is incorrect.
172+
src_file = arch_dir / source_filename
173+
if not src_file.is_file():
174+
raise RuntimeError(
175+
f"Disk image file '{source_filename}' not found in {arch_dir}"
176+
)
177+
shutil.copy2(str(src_file), str(out_path))
178+
else:
179+
with tarfile.open(str(out_path), "w:gz") as tf:
180+
for item in sorted(arch_dir.rglob("*")):
181+
if item.is_file():
182+
tf.add(str(item), arcname=str(item.relative_to(arch_dir)))
148183
logger.info(" Created (%s): %s", array_name, source_filename)
149184
return source
150185

@@ -174,13 +209,20 @@ def compress_component(component: dict, snapshot: dict) -> dict:
174209
files_entries = list(component.get("files") or [])
175210
staged_entries = list((component.get("staged") or {}).get("files") or [])
176211

212+
is_disk_image = (
213+
component.get("contentType") == "disk-image"
214+
or (component.get("contentGateway") or {}).get("contentType") == "disk-image"
215+
)
216+
177217
normalized_files = []
178218
if files_entries:
179219
logger.info(
180220
" Processing %d files from files[] (Developer Portal):", len(files_entries)
181221
)
182222
for entry in files_entries:
183-
normalized_source = _compress_file_entry(entry, "files", component_dir, ready_dir)
223+
normalized_source = _compress_file_entry(
224+
entry, "files", component_dir, ready_dir, is_disk_image_component=is_disk_image
225+
)
184226
normalized_entry = dict(entry)
185227
# no-op for mac/linux, .zip correction for windows
186228
normalized_entry["source"] = normalized_source
@@ -191,7 +233,13 @@ def compress_component(component: dict, snapshot: dict) -> dict:
191233
" Processing %d files from staged.files[] (Customer Portal):", len(staged_entries)
192234
)
193235
for entry in staged_entries:
194-
_compress_file_entry(entry, "staged.files", component_dir, ready_dir)
236+
_compress_file_entry(
237+
entry,
238+
"staged.files",
239+
component_dir,
240+
ready_dir,
241+
is_disk_image_component=is_disk_image,
242+
)
195243

196244
updated_component = dict(component)
197245
if files_entries:

scripts/python/helpers/extract_artifacts.py

Lines changed: 94 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,46 @@ def _safe_extract_layer(
121121
return found
122122

123123

124+
def _extract_from_oras(
125+
manifest: dict,
126+
tmp_dir: Path,
127+
wanted_files: list[str],
128+
destination: Path,
129+
component_name: str,
130+
) -> None:
131+
"""Copy raw ORAS blob layers to destination, matching by filename.
132+
133+
ORAS artifacts store raw file blobs as layers with an
134+
``org.opencontainers.image.title`` annotation containing the filename.
135+
We match each wanted file (by basename) to its blob and copy it directly.
136+
"""
137+
title_to_blob: dict[str, Path] = {}
138+
for layer in manifest.get("layers", []):
139+
title = (layer.get("annotations") or {}).get("org.opencontainers.image.title")
140+
digest = layer.get("digest", "")
141+
if title and digest:
142+
blob_path = tmp_dir / digest.removeprefix("sha256:")
143+
title_to_blob[title] = blob_path
144+
145+
logger.info(
146+
"ORAS artifact detected for '%s'; available blobs: %s",
147+
component_name,
148+
list(title_to_blob),
149+
)
150+
151+
for wanted in wanted_files:
152+
basename = Path(wanted).name
153+
blob = title_to_blob.get(basename)
154+
if blob is None or not blob.is_file():
155+
raise RuntimeError(
156+
f"ORAS layer with title '{basename}' not found in component '{component_name}'. "
157+
f"Available titles: {sorted(title_to_blob)}"
158+
)
159+
out = destination / basename
160+
shutil.copy2(str(blob), str(out))
161+
logger.info("Copied ORAS blob '%s' -> %s", basename, out)
162+
163+
124164
def process_component(component: dict) -> None:
125165
"""Pull and extract one component's artifacts into CONTENT_DIR/<name>/."""
126166
name = component.get("name")
@@ -174,32 +214,42 @@ def process_component(component: dict) -> None:
174214
logger.info("Files to extract from RPA: %s", wanted_files)
175215

176216
manifest = json.loads((tmp_dir / "manifest.json").read_text())
177-
layer_digests = [layer["digest"] for layer in manifest.get("layers", [])]
178-
179-
for digest in layer_digests:
180-
layer_file = tmp_dir / digest.removeprefix("sha256:")
181-
if not layer_file.exists():
182-
continue
183-
with tarfile.open(str(layer_file)) as tf:
184-
for image_path in extract_dirs:
185-
if _safe_extract_layer(tf, image_path, tmp_dir, layer_file.name):
186-
logger.info("Extracting %s/ from %s...", image_path, layer_file.name)
187-
else:
188-
logger.info(
189-
"skipping %s. It doesn't contain the %s dir",
190-
layer_file.name,
191-
image_path,
192-
)
193-
194-
for wanted in wanted_files:
195-
src = tmp_dir / wanted
196-
if src.is_file():
197-
shutil.copy2(str(src), str(destination / src.name))
198-
else:
199-
logger.error("Expected file not found in container: %s", wanted)
200-
raise RuntimeError(
201-
f"File '{wanted}' declared in RPA was not found in any container layer"
202-
)
217+
218+
config_media_type = manifest.get("config", {}).get("mediaType", "")
219+
if config_media_type == "application/vnd.oci.empty.v1+json":
220+
# ORAS artifact: layers are raw file blobs, not tar archives.
221+
# Each layer carries an org.opencontainers.image.title annotation
222+
# that holds the original filename. Copy blobs directly to destination.
223+
_extract_from_oras(manifest, tmp_dir, wanted_files, destination, name)
224+
else:
225+
layer_digests = [layer["digest"] for layer in manifest.get("layers", [])]
226+
227+
for digest in layer_digests:
228+
layer_file = tmp_dir / digest.removeprefix("sha256:")
229+
if not layer_file.exists():
230+
continue
231+
with tarfile.open(str(layer_file)) as tf:
232+
for image_path in extract_dirs:
233+
if _safe_extract_layer(tf, image_path, tmp_dir, layer_file.name):
234+
logger.info(
235+
"Extracting %s/ from %s...", image_path, layer_file.name
236+
)
237+
else:
238+
logger.info(
239+
"skipping %s. It doesn't contain the %s dir",
240+
layer_file.name,
241+
image_path,
242+
)
243+
244+
for wanted in wanted_files:
245+
src = tmp_dir / wanted
246+
if src.is_file():
247+
shutil.copy2(str(src), str(destination / src.name))
248+
else:
249+
logger.error("Expected file not found in container: %s", wanted)
250+
raise RuntimeError(
251+
f"File '{wanted}' declared in RPA was not found in any container layer"
252+
)
203253
finally:
204254
shutil.rmtree(tmp_dir, ignore_errors=True)
205255

@@ -227,6 +277,24 @@ def _matches(entry: dict, os_name: str) -> bool:
227277
return True
228278
return False
229279

280+
is_disk_image_component = (
281+
component.get("contentType") == "disk-image"
282+
or (component.get("contentGateway") or {}).get("contentType") == "disk-image"
283+
)
284+
285+
# Disk images must always be os: linux. Fail immediately if the RPA declares a
286+
# disk-image component with macOS or Windows entries — those would route into
287+
# the signing workflow and produce failures deep in the pipeline.
288+
if is_disk_image_component:
289+
for entry in all_file_entries:
290+
entry_os = entry.get("os", "")
291+
if entry_os in ("darwin", "windows"):
292+
raise RuntimeError(
293+
f"Component '{name}' has contentType: disk-image but entry "
294+
f"'{entry.get('source', '<unknown>')}' has os: {entry_os}. "
295+
f"Disk images must be os: linux. Fix the RPA before releasing."
296+
)
297+
230298
if any(_matches(e, "darwin") for e in all_file_entries):
231299
(component_dir / "has_mac").touch()
232300
logger.info(" - macOS content detected")

scripts/python/helpers/push_artifacts.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,19 @@ def run(exodus_gw_env: str, cgw_hostname: str, cert_expiration_warn_days: int) -
365365
cg = component.get("contentGateway") or {}
366366
cg["contentDir"] = str(component_dir)
367367
component["contentGateway"] = cg
368+
# For disk-image components the RPA uses staged.files[] (not files[]) to
369+
# describe the deliverables, but those files still need to appear in CGW.
370+
# Merge staged.files into files[] so publish_to_cgw_wrapper registers them.
371+
# NOTE: this intentionally mutates the component dict in-place. It is safe
372+
# because the Pulp push and CDN exclusion logic for this component have already
373+
# completed above, and the only remaining consumer is publish_to_cgw_wrapper
374+
# called below via json.dumps(snapshot).
375+
is_disk_image = (
376+
component.get("contentType") == "disk-image"
377+
or cg.get("contentType") == "disk-image"
378+
)
379+
if is_disk_image and not component.get("files"):
380+
component["files"] = (component.get("staged") or {}).get("files", [])
368381

369382
cgw_push = any(bool(c.get("contentGateway")) for c in snapshot.get("components", []))
370383
if cgw_push:

scripts/python/helpers/push_unsigned.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,23 @@
4040
SUPPLEMENTARY_NAMES = {"readme", "license", "changelog"}
4141
SUPPLEMENTARY_EXTS = {".md", ".txt"}
4242

43+
# Unambiguous disk-image file suffixes (simple and compound). Files matching
44+
# these are moved directly to the target directory without tar extraction even
45+
# when the component does not carry contentType: disk-image.
46+
# NOTE: .tar.gz is intentionally excluded — it is ambiguous between binary
47+
# archives and disk images (e.g. GCP images packaged as tarballs). Use
48+
# contentType: disk-image on the component to handle those cases.
49+
_DISK_IMAGE_SUFFIXES: frozenset[str] = frozenset(
50+
{".qcow2", ".iso", ".iso.gz", ".raw.gz", ".vhd.gz"}
51+
)
52+
53+
54+
def _is_disk_image_file(filename: str) -> bool:
55+
"""Return True if *filename* has an unambiguous disk-image file suffix."""
56+
lower = filename.lower()
57+
return any(lower.endswith(ext) for ext in _DISK_IMAGE_SUFFIXES)
58+
59+
4360
logger = logging.getLogger(__name__)
4461

4562

@@ -80,8 +97,21 @@ def move_supplementary_out(src_root: Path, hold_root: Path) -> None:
8097
logger.info(" Held supplementary file: %s", rel)
8198

8299

83-
def _unpack_file_entries(entries: list[dict], component_dir: Path, unsigned_dir: Path) -> None:
84-
"""Extract each archive from entries into its OS/arch subdirectory under unsigned_dir."""
100+
def _unpack_file_entries(
101+
entries: list[dict],
102+
component_dir: Path,
103+
unsigned_dir: Path,
104+
*,
105+
is_disk_image_component: bool = False,
106+
) -> None:
107+
"""Extract each archive from entries into its OS/arch subdirectory under unsigned_dir.
108+
109+
Files are moved directly (without unpacking) when either:
110+
- *is_disk_image_component* is True (set when contentType: disk-image), or
111+
- the filename has an unambiguous disk-image suffix (.qcow2, .iso, .iso.gz,
112+
.raw.gz, .vhd.gz).
113+
All other files are treated as tar archives and extracted.
114+
"""
85115
for entry in entries:
86116
source = entry.get("source", "")
87117
os_name = entry.get("os", "")
@@ -103,9 +133,12 @@ def _unpack_file_entries(entries: list[dict], component_dir: Path, unsigned_dir:
103133
continue
104134

105135
target_dir.mkdir(parents=True, exist_ok=True)
106-
with tarfile.open(str(archive_path)) as tf:
107-
_safe_extract_archive(tf, target_dir, archive_name)
108-
archive_path.unlink()
136+
if is_disk_image_component or _is_disk_image_file(archive_name):
137+
shutil.move(str(archive_path), str(target_dir / archive_name))
138+
else:
139+
with tarfile.open(str(archive_path)) as tf:
140+
_safe_extract_archive(tf, target_dir, archive_name)
141+
archive_path.unlink()
109142

110143

111144
def _safe_extract_archive(tf: tarfile.TarFile, target_dir: Path, archive_name: str) -> None:
@@ -157,9 +190,21 @@ def run(quay_url: str, pipeline_run_uid: str) -> None:
157190
if has_linux:
158191
(component_dir / "linux").mkdir(parents=True, exist_ok=True)
159192

160-
_unpack_file_entries(component.get("files") or [], component_dir, unsigned_dir)
193+
is_disk_image = (
194+
component.get("contentType") == "disk-image"
195+
or (component.get("contentGateway") or {}).get("contentType") == "disk-image"
196+
)
197+
_unpack_file_entries(
198+
component.get("files") or [],
199+
component_dir,
200+
unsigned_dir,
201+
is_disk_image_component=is_disk_image,
202+
)
161203
_unpack_file_entries(
162-
(component.get("staged") or {}).get("files") or [], component_dir, unsigned_dir
204+
(component.get("staged") or {}).get("files") or [],
205+
component_dir,
206+
unsigned_dir,
207+
is_disk_image_component=is_disk_image,
163208
)
164209

165210
supp_hold = component_dir / "supplementary"

0 commit comments

Comments
 (0)