Skip to content

Commit b761b1d

Browse files
committed
feat(RELEASE-2460): add disk-image support to artifact helpers
- compress_artifacts: copy disk images as-is; fix multi-file per-arch scenario (e.g. ISO + QCOW2 sharing one os+arch) - push_unsigned: pass disk images through without unpacking, handling both files[] and staged.files[] sources - push_artifacts: inject staged.files[] into files[] for CGW when disk-image component has no files[] entries - extract_artifacts: fail early when disk-image component declares a non-linux OS entry, preventing signing failures - all helpers: detect disk-image via contentType at both component level and contentGateway.contentType Assisted-by: Cursor AI Signed-off-by: Scott Wickersham <swickers@redhat.com>
1 parent eb74eb6 commit b761b1d

10 files changed

Lines changed: 749 additions & 46 deletions

scripts/python/helpers/compress_artifacts.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
* Pulls signed macOS and Windows OCI artifacts from Quay into a ``signed/`` directory.
66
* Restores supplementary files (readme, license, changelog) that were held during signing.
77
* Compresses each file entry into the final deliverable format:
8-
- macOS / Linux → ``.tar.gz`` (from ``os/arch/`` directory)
8+
- macOS / Linux (non-disk-image) → ``.tar.gz`` (from ``os/arch/`` directory)
9+
- Linux disk images (``.qcow2``, ``.iso``) → copied as-is to ``ready_for_distribution/``
910
- Windows → ``.zip`` (from ``os/arch/`` directory, extension corrected from
1011
``.tar.gz``/``.tar``)
1112
* Updates ``SNAPSHOT_JSON`` to reflect corrected Windows filenames in ``files[]``.
@@ -35,10 +36,12 @@
3536
import zipfile
3637
from pathlib import Path
3738

39+
import disk_image_utils
3840
import oras_utils
3941

4042
PROG = "compress_artifacts.py"
4143

44+
4245
QUAY_SECRET_MOUNT = Path(os.environ.get("QUAY_SECRET_MOUNT", "/mnt/quaySecret"))
4346
CONTENT_DIR = Path(os.environ.get("CONTENT_DIR", "/shared/artifacts"))
4447
SHARED_DIR = Path(os.environ.get("SHARED_DIR", "/shared"))
@@ -103,19 +106,25 @@ def _windows_filename(source_filename: str) -> str:
103106

104107

105108
def _compress_file_entry(
106-
entry: dict, array_name: str, component_dir: Path, ready_dir: Path
109+
entry: dict,
110+
array_name: str,
111+
component_dir: Path,
112+
ready_dir: Path,
113+
*,
114+
is_disk_image_component: bool = False,
107115
) -> str:
108116
"""Compress one file entry into ready_dir and return the (possibly normalized) source path.
109117
110118
For macOS and Linux entries the source path is returned unchanged. For Windows entries
111119
the archive is created as a ``.zip`` instead of ``.tar.gz``/``.tar``, and the returned
112120
source path reflects the corrected filename so the snapshot can be updated accordingly.
113121
114-
Raises RuntimeError on failure (missing source, unknown OS, or empty arch directory).
122+
Files are copied directly to ``ready_dir`` (without archiving) when either:
123+
- *is_disk_image_component* is True (set when contentType: disk-image), or
124+
- the filename has an unambiguous disk-image suffix (.qcow2, .iso, .iso.gz,
125+
.raw.gz, .vhd.gz).
115126
116-
Note: all files are currently compressed regardless of type. ISOs should be
117-
passed through as-is rather than wrapped in a tarball — this will need to be
118-
addressed before ISO delivery is supported.
127+
Raises RuntimeError on failure (missing source, unknown OS, or empty arch directory).
119128
"""
120129
source = entry.get("source")
121130
if not source:
@@ -139,12 +148,23 @@ def _compress_file_entry(
139148

140149
# macOS and Linux follow the Unix convention of tar.gz archives; Windows uses zip
141150
# because that is the standard expected by Windows users and Developer Portal tooling.
151+
# Disk images are an exception: they are delivered as-is without any archiving.
142152
if os_name in ("darwin", "linux"):
143153
out_path = ready_dir / source_filename
144-
with tarfile.open(str(out_path), "w:gz") as tf:
145-
for item in sorted(arch_dir.rglob("*")):
146-
if item.is_file():
147-
tf.add(str(item), arcname=str(item.relative_to(arch_dir)))
154+
if is_disk_image_component or disk_image_utils.is_disk_image_file(source_filename):
155+
# Use the known filename directly — multiple disk images may share
156+
# the same arch directory, so scanning the whole dir is incorrect.
157+
src_file = arch_dir / source_filename
158+
if not src_file.is_file():
159+
raise RuntimeError(
160+
f"Disk image file '{source_filename}' not found in {arch_dir}"
161+
)
162+
shutil.copy2(str(src_file), str(out_path))
163+
else:
164+
with tarfile.open(str(out_path), "w:gz") as tf:
165+
for item in sorted(arch_dir.rglob("*")):
166+
if item.is_file():
167+
tf.add(str(item), arcname=str(item.relative_to(arch_dir)))
148168
logger.info(" Created (%s): %s", array_name, source_filename)
149169
return source
150170

@@ -174,13 +194,17 @@ def compress_component(component: dict, snapshot: dict) -> dict:
174194
files_entries = list(component.get("files") or [])
175195
staged_entries = list((component.get("staged") or {}).get("files") or [])
176196

197+
is_disk_image = disk_image_utils.is_disk_image_component(component)
198+
177199
normalized_files = []
178200
if files_entries:
179201
logger.info(
180202
" Processing %d files from files[] (Developer Portal):", len(files_entries)
181203
)
182204
for entry in files_entries:
183-
normalized_source = _compress_file_entry(entry, "files", component_dir, ready_dir)
205+
normalized_source = _compress_file_entry(
206+
entry, "files", component_dir, ready_dir, is_disk_image_component=is_disk_image
207+
)
184208
normalized_entry = dict(entry)
185209
# no-op for mac/linux, .zip correction for windows
186210
normalized_entry["source"] = normalized_source
@@ -191,7 +215,13 @@ def compress_component(component: dict, snapshot: dict) -> dict:
191215
" Processing %d files from staged.files[] (Customer Portal):", len(staged_entries)
192216
)
193217
for entry in staged_entries:
194-
_compress_file_entry(entry, "staged.files", component_dir, ready_dir)
218+
_compress_file_entry(
219+
entry,
220+
"staged.files",
221+
component_dir,
222+
ready_dir,
223+
is_disk_image_component=is_disk_image,
224+
)
195225

196226
updated_component = dict(component)
197227
if files_entries:
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Shared helpers for identifying disk-image files and components."""
2+
3+
from __future__ import annotations
4+
5+
# Unambiguous disk-image file suffixes (simple and compound). Files matching
6+
# these are handled as raw binary blobs rather than tar archives, even when the
7+
# component does not carry contentType: disk-image.
8+
# NOTE: .tar.gz is intentionally excluded — it is ambiguous between binary
9+
# archives and disk images (e.g. GCP images packaged as tarballs). Use
10+
# contentType: disk-image on the component to handle those cases.
11+
DISK_IMAGE_SUFFIXES: frozenset[str] = frozenset(
12+
{".qcow2", ".iso", ".iso.gz", ".raw.gz", ".vhd.gz"}
13+
)
14+
15+
16+
def is_disk_image_file(filename: str) -> bool:
17+
"""Return True if *filename* has an unambiguous disk-image file suffix."""
18+
lower = filename.lower()
19+
return any(lower.endswith(ext) for ext in DISK_IMAGE_SUFFIXES)
20+
21+
22+
def is_disk_image_component(component: dict) -> bool:
23+
"""Return True if *component* is declared as a disk-image release.
24+
25+
A component is a disk-image if contentType: disk-image appears at the
26+
top-level component field OR nested under contentGateway.
27+
"""
28+
return (
29+
component.get("contentType") == "disk-image"
30+
or (component.get("contentGateway") or {}).get("contentType") == "disk-image"
31+
)

scripts/python/helpers/extract_artifacts.py

Lines changed: 106 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pathlib import Path
3535

3636
import authentication
37+
import disk_image_utils
3738

3839
PROG = "extract_artifacts.py"
3940

@@ -121,6 +122,47 @@ def _safe_extract_layer(
121122
return found
122123

123124

125+
def _extract_from_oras(
126+
manifest: dict,
127+
tmp_dir: Path,
128+
wanted_files: list[str],
129+
destination: Path,
130+
component_name: str,
131+
) -> None:
132+
"""Copy raw ORAS blob layers to destination, matching by filename.
133+
134+
ORAS artifacts store raw file blobs as layers with an
135+
``org.opencontainers.image.title`` annotation containing the filename.
136+
We match each wanted file (by basename) to its blob and copy it directly.
137+
"""
138+
title_to_blob: dict[str, Path] = {}
139+
for layer in manifest.get("layers", []):
140+
title = (layer.get("annotations") or {}).get("org.opencontainers.image.title")
141+
digest = layer.get("digest", "")
142+
if title and digest:
143+
blob_path = tmp_dir / digest.removeprefix("sha256:")
144+
title_to_blob[title] = blob_path
145+
146+
logger.info(
147+
"ORAS artifact detected for '%s'; available blobs: %s",
148+
component_name,
149+
list(title_to_blob),
150+
)
151+
152+
for wanted in wanted_files:
153+
basename = Path(wanted).name
154+
blob = title_to_blob.get(basename)
155+
if blob is None or not blob.is_file():
156+
available = sorted(title_to_blob)
157+
raise RuntimeError(
158+
f"ORAS layer with title '{basename}' not found in component "
159+
f"'{component_name}'. Available titles: {available}"
160+
)
161+
out = destination / basename
162+
shutil.copy2(str(blob), str(out))
163+
logger.info("Copied ORAS blob '%s' -> %s", basename, out)
164+
165+
124166
def process_component(component: dict) -> None:
125167
"""Pull and extract one component's artifacts into CONTENT_DIR/<name>/."""
126168
name = component.get("name")
@@ -174,32 +216,42 @@ def process_component(component: dict) -> None:
174216
logger.info("Files to extract from RPA: %s", wanted_files)
175217

176218
manifest = json.loads((tmp_dir / "manifest.json").read_text())
177-
layer_digests = [layer["digest"] for layer in manifest.get("layers", [])]
178-
179-
for digest in layer_digests:
180-
layer_file = tmp_dir / digest.removeprefix("sha256:")
181-
if not layer_file.exists():
182-
continue
183-
with tarfile.open(str(layer_file)) as tf:
184-
for image_path in extract_dirs:
185-
if _safe_extract_layer(tf, image_path, tmp_dir, layer_file.name):
186-
logger.info("Extracting %s/ from %s...", image_path, layer_file.name)
187-
else:
188-
logger.info(
189-
"skipping %s. It doesn't contain the %s dir",
190-
layer_file.name,
191-
image_path,
192-
)
193-
194-
for wanted in wanted_files:
195-
src = tmp_dir / wanted
196-
if src.is_file():
197-
shutil.copy2(str(src), str(destination / src.name))
198-
else:
199-
logger.error("Expected file not found in container: %s", wanted)
200-
raise RuntimeError(
201-
f"File '{wanted}' declared in RPA was not found in any container layer"
202-
)
219+
220+
config_media_type = manifest.get("config", {}).get("mediaType", "")
221+
if config_media_type == "application/vnd.oci.empty.v1+json":
222+
# ORAS artifact: layers are raw file blobs, not tar archives.
223+
# Each layer carries an org.opencontainers.image.title annotation
224+
# that holds the original filename. Copy blobs directly to destination.
225+
_extract_from_oras(manifest, tmp_dir, wanted_files, destination, name)
226+
else:
227+
layer_digests = [layer["digest"] for layer in manifest.get("layers", [])]
228+
229+
for digest in layer_digests:
230+
layer_file = tmp_dir / digest.removeprefix("sha256:")
231+
if not layer_file.exists():
232+
continue
233+
with tarfile.open(str(layer_file)) as tf:
234+
for image_path in extract_dirs:
235+
if _safe_extract_layer(tf, image_path, tmp_dir, layer_file.name):
236+
logger.info(
237+
"Extracting %s/ from %s...", image_path, layer_file.name
238+
)
239+
else:
240+
logger.info(
241+
"skipping %s. It doesn't contain the %s dir",
242+
layer_file.name,
243+
image_path,
244+
)
245+
246+
for wanted in wanted_files:
247+
src = tmp_dir / wanted
248+
if src.is_file():
249+
shutil.copy2(str(src), str(destination / src.name))
250+
else:
251+
logger.error("Expected file not found in container: %s", wanted)
252+
raise RuntimeError(
253+
f"File '{wanted}' declared in RPA was not found in any container layer"
254+
)
203255
finally:
204256
shutil.rmtree(tmp_dir, ignore_errors=True)
205257

@@ -240,14 +292,41 @@ def _matches(entry: dict, os_name: str) -> bool:
240292
logger.info(" - Linux content detected")
241293

242294

295+
def _validate_disk_image_components(components: list[dict]) -> None:
296+
"""Fail fast if any disk-image component has non-linux file entries.
297+
298+
Disk images must always target os: linux. Detecting this before pulling
299+
images avoids wasting time on downloads only to fail deep in the pipeline.
300+
"""
301+
for component in components:
302+
if not disk_image_utils.is_disk_image_component(component):
303+
continue
304+
name = component.get("name", "<unknown>")
305+
all_file_entries = list(component.get("files") or []) + list(
306+
(component.get("staged") or {}).get("files") or []
307+
)
308+
for entry in all_file_entries:
309+
entry_os = entry.get("os", "")
310+
if entry_os in ("darwin", "windows"):
311+
raise RuntimeError(
312+
f"Component '{name}' has contentType: disk-image but entry "
313+
f"'{entry.get('source', '<unknown>')}' has os: {entry_os}. "
314+
f"Disk images must be os: linux. Fix the RPA before releasing."
315+
)
316+
317+
243318
def run(concurrent_limit: int) -> None:
244319
"""Extract artifacts from all snapshot components and write OS flag files."""
245320
snapshot = json.loads(os.environ["SNAPSHOT_JSON"])
246321

322+
components = snapshot.get("components", [])
323+
324+
# Validate disk-image component constraints before doing any image pulls.
325+
_validate_disk_image_components(components)
326+
247327
_setup_docker_config()
248328
CONTENT_DIR.mkdir(parents=True, exist_ok=True)
249329

250-
components = snapshot.get("components", [])
251330
errors: list[str] = []
252331

253332
with ThreadPoolExecutor(max_workers=concurrent_limit) as executor:

scripts/python/helpers/push_artifacts.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import tempfile
4747
from pathlib import Path
4848

49+
import disk_image_utils
4950
import publish_to_cgw_wrapper
5051
import pulp_push_wrapper
5152
import yaml # type: ignore
@@ -365,6 +366,18 @@ def run(exodus_gw_env: str, cgw_hostname: str, cert_expiration_warn_days: int) -
365366
cg = component.get("contentGateway") or {}
366367
cg["contentDir"] = str(component_dir)
367368
component["contentGateway"] = cg
369+
# Disk-image components that target both CDN and CGW describe their
370+
# deliverables in staged.files[] (consumed by the CDN/Customer Portal
371+
# flow) but also need those files listed in files[] for CGW registration.
372+
# If files[] is already populated the team provided it directly (e.g. a
373+
# CGW-only release), so we leave it untouched.
374+
# NOTE: this intentionally mutates the component dict in-place. It is safe
375+
# because the Pulp push and CDN exclusion logic for this component have already
376+
# completed above, and the only remaining consumer is publish_to_cgw_wrapper
377+
# called below via json.dumps(snapshot).
378+
is_disk_image = disk_image_utils.is_disk_image_component(component)
379+
if is_disk_image and not component.get("files"):
380+
component["files"] = (component.get("staged") or {}).get("files", [])
368381

369382
cgw_push = any(bool(c.get("contentGateway")) for c in snapshot.get("components", []))
370383
if cgw_push:

0 commit comments

Comments
 (0)