konflux-ci
diff --git a/‎scripts/python/helpers/compress_artifacts.py‎
Lines changed: 42 additions & 12 deletions b/‎scripts/python/helpers/compress_artifacts.py‎
Lines changed: 42 additions & 12 deletions
diff --git a/‎scripts/python/helpers/disk_image_utils.py‎
Lines changed: 31 additions & 0 deletions b/‎scripts/python/helpers/disk_image_utils.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎scripts/python/helpers/extract_artifacts.py‎
Lines changed: 106 additions & 27 deletions b/‎scripts/python/helpers/extract_artifacts.py‎
Lines changed: 106 additions & 27 deletions
diff --git a/‎scripts/python/helpers/push_artifacts.py‎
Lines changed: 13 additions & 0 deletions b/‎scripts/python/helpers/push_artifacts.py‎
Lines changed: 13 additions & 0 deletions
@@ -5,7 +5,8 @@
 * Pulls signed macOS and Windows OCI artifacts from Quay into a ``signed/`` directory.
 * Restores supplementary files (readme, license, changelog) that were held during signing.
 * Compresses each file entry into the final deliverable format:
-  - macOS / Linux → ``.tar.gz`` (from ``os/arch/`` directory)
+  - macOS / Linux (non-disk-image) → ``.tar.gz`` (from ``os/arch/`` directory)
+  - Linux disk images (``.qcow2``, ``.iso``) → copied as-is to ``ready_for_distribution/``
   - Windows → ``.zip`` (from ``os/arch/`` directory, extension corrected from
     ``.tar.gz``/``.tar``)
 * Updates ``SNAPSHOT_JSON`` to reflect corrected Windows filenames in ``files[]``.
@@ -35,10 +36,12 @@
 import zipfile
 from pathlib import Path
 
+import disk_image_utils
 import oras_utils
 
 PROG = "compress_artifacts.py"
 
+
 QUAY_SECRET_MOUNT = Path(os.environ.get("QUAY_SECRET_MOUNT", "/mnt/quaySecret"))
 CONTENT_DIR = Path(os.environ.get("CONTENT_DIR", "/shared/artifacts"))
 SHARED_DIR = Path(os.environ.get("SHARED_DIR", "/shared"))
@@ -103,19 +106,25 @@ def _windows_filename(source_filename: str) -> str:
 
 
 def _compress_file_entry(
-    entry: dict, array_name: str, component_dir: Path, ready_dir: Path
+    entry: dict,
+    array_name: str,
+    component_dir: Path,
+    ready_dir: Path,
+    *,
+    is_disk_image_component: bool = False,
 ) -> str:
     """Compress one file entry into ready_dir and return the (possibly normalized) source path.
 
     For macOS and Linux entries the source path is returned unchanged. For Windows entries
     the archive is created as a ``.zip`` instead of ``.tar.gz``/``.tar``, and the returned
     source path reflects the corrected filename so the snapshot can be updated accordingly.
 
-    Raises RuntimeError on failure (missing source, unknown OS, or empty arch directory).
+    Files are copied directly to ``ready_dir`` (without archiving) when either:
+    - *is_disk_image_component* is True (set when contentType: disk-image), or
+    - the filename has an unambiguous disk-image suffix (.qcow2, .iso, .iso.gz,
+      .raw.gz, .vhd.gz).
 
-    Note: all files are currently compressed regardless of type. ISOs should be
-    passed through as-is rather than wrapped in a tarball — this will need to be
-    addressed before ISO delivery is supported.
+    Raises RuntimeError on failure (missing source, unknown OS, or empty arch directory).
     """
     source = entry.get("source")
     if not source:
@@ -139,12 +148,23 @@ def _compress_file_entry(
 
     # macOS and Linux follow the Unix convention of tar.gz archives; Windows uses zip
     # because that is the standard expected by Windows users and Developer Portal tooling.
+    # Disk images are an exception: they are delivered as-is without any archiving.
     if os_name in ("darwin", "linux"):
         out_path = ready_dir / source_filename
-        with tarfile.open(str(out_path), "w:gz") as tf:
-            for item in sorted(arch_dir.rglob("*")):
-                if item.is_file():
-                    tf.add(str(item), arcname=str(item.relative_to(arch_dir)))
+        if is_disk_image_component or disk_image_utils.is_disk_image_file(source_filename):
+            # Use the known filename directly — multiple disk images may share
+            # the same arch directory, so scanning the whole dir is incorrect.
+            src_file = arch_dir / source_filename
+            if not src_file.is_file():
+                raise RuntimeError(
+                    f"Disk image file '{source_filename}' not found in {arch_dir}"
+                )
+            shutil.copy2(str(src_file), str(out_path))
+        else:
+            with tarfile.open(str(out_path), "w:gz") as tf:
+                for item in sorted(arch_dir.rglob("*")):
+                    if item.is_file():
+                        tf.add(str(item), arcname=str(item.relative_to(arch_dir)))
         logger.info("  Created (%s): %s", array_name, source_filename)
         return source
 
@@ -174,13 +194,17 @@ def compress_component(component: dict, snapshot: dict) -> dict:
     files_entries = list(component.get("files") or [])
     staged_entries = list((component.get("staged") or {}).get("files") or [])
 
+    is_disk_image = disk_image_utils.is_disk_image_component(component)
+
     normalized_files = []
     if files_entries:
         logger.info(
             "  Processing %d files from files[] (Developer Portal):", len(files_entries)
         )
         for entry in files_entries:
-            normalized_source = _compress_file_entry(entry, "files", component_dir, ready_dir)
+            normalized_source = _compress_file_entry(
+                entry, "files", component_dir, ready_dir, is_disk_image_component=is_disk_image
+            )
             normalized_entry = dict(entry)
             # no-op for mac/linux, .zip correction for windows
             normalized_entry["source"] = normalized_source
@@ -191,7 +215,13 @@ def compress_component(component: dict, snapshot: dict) -> dict:
             "  Processing %d files from staged.files[] (Customer Portal):", len(staged_entries)
         )
         for entry in staged_entries:
-            _compress_file_entry(entry, "staged.files", component_dir, ready_dir)
+            _compress_file_entry(
+                entry,
+                "staged.files",
+                component_dir,
+                ready_dir,
+                is_disk_image_component=is_disk_image,
+            )
 
     updated_component = dict(component)
     if files_entries:
 
@@ -0,0 +1,31 @@
+"""Shared helpers for identifying disk-image files and components."""
+
+from __future__ import annotations
+
+# Unambiguous disk-image file suffixes (simple and compound). Files matching
+# these are handled as raw binary blobs rather than tar archives, even when the
+# component does not carry contentType: disk-image.
+# NOTE: .tar.gz is intentionally excluded — it is ambiguous between binary
+# archives and disk images (e.g. GCP images packaged as tarballs). Use
+# contentType: disk-image on the component to handle those cases.
+DISK_IMAGE_SUFFIXES: frozenset[str] = frozenset(
+    {".qcow2", ".iso", ".iso.gz", ".raw.gz", ".vhd.gz"}
+)
+
+
+def is_disk_image_file(filename: str) -> bool:
+    """Return True if *filename* has an unambiguous disk-image file suffix."""
+    lower = filename.lower()
+    return any(lower.endswith(ext) for ext in DISK_IMAGE_SUFFIXES)
+
+
+def is_disk_image_component(component: dict) -> bool:
+    """Return True if *component* is declared as a disk-image release.
+
+    A component is a disk-image if contentType: disk-image appears at the
+    top-level component field OR nested under contentGateway.
+    """
+    return (
+        component.get("contentType") == "disk-image"
+        or (component.get("contentGateway") or {}).get("contentType") == "disk-image"
+    )
@@ -34,6 +34,7 @@
 from pathlib import Path
 
 import authentication
+import disk_image_utils
 
 PROG = "extract_artifacts.py"
 
@@ -121,6 +122,47 @@ def _safe_extract_layer(
     return found
 
 
+def _extract_from_oras(
+    manifest: dict,
+    tmp_dir: Path,
+    wanted_files: list[str],
+    destination: Path,
+    component_name: str,
+) -> None:
+    """Copy raw ORAS blob layers to destination, matching by filename.
+
+    ORAS artifacts store raw file blobs as layers with an
+    ``org.opencontainers.image.title`` annotation containing the filename.
+    We match each wanted file (by basename) to its blob and copy it directly.
+    """
+    title_to_blob: dict[str, Path] = {}
+    for layer in manifest.get("layers", []):
+        title = (layer.get("annotations") or {}).get("org.opencontainers.image.title")
+        digest = layer.get("digest", "")
+        if title and digest:
+            blob_path = tmp_dir / digest.removeprefix("sha256:")
+            title_to_blob[title] = blob_path
+
+    logger.info(
+        "ORAS artifact detected for '%s'; available blobs: %s",
+        component_name,
+        list(title_to_blob),
+    )
+
+    for wanted in wanted_files:
+        basename = Path(wanted).name
+        blob = title_to_blob.get(basename)
+        if blob is None or not blob.is_file():
+            available = sorted(title_to_blob)
+            raise RuntimeError(
+                f"ORAS layer with title '{basename}' not found in component "
+                f"'{component_name}'. Available titles: {available}"
+            )
+        out = destination / basename
+        shutil.copy2(str(blob), str(out))
+        logger.info("Copied ORAS blob '%s' -> %s", basename, out)
+
+
 def process_component(component: dict) -> None:
     """Pull and extract one component's artifacts into CONTENT_DIR/<name>/."""
     name = component.get("name")
@@ -174,32 +216,42 @@ def process_component(component: dict) -> None:
         logger.info("Files to extract from RPA: %s", wanted_files)
 
         manifest = json.loads((tmp_dir / "manifest.json").read_text())
-        layer_digests = [layer["digest"] for layer in manifest.get("layers", [])]
-
-        for digest in layer_digests:
-            layer_file = tmp_dir / digest.removeprefix("sha256:")
-            if not layer_file.exists():
-                continue
-            with tarfile.open(str(layer_file)) as tf:
-                for image_path in extract_dirs:
-                    if _safe_extract_layer(tf, image_path, tmp_dir, layer_file.name):
-                        logger.info("Extracting %s/ from %s...", image_path, layer_file.name)
-                    else:
-                        logger.info(
-                            "skipping %s. It doesn't contain the %s dir",
-                            layer_file.name,
-                            image_path,
-                        )
-
-        for wanted in wanted_files:
-            src = tmp_dir / wanted
-            if src.is_file():
-                shutil.copy2(str(src), str(destination / src.name))
-            else:
-                logger.error("Expected file not found in container: %s", wanted)
-                raise RuntimeError(
-                    f"File '{wanted}' declared in RPA was not found in any container layer"
-                )
+
+        config_media_type = manifest.get("config", {}).get("mediaType", "")
+        if config_media_type == "application/vnd.oci.empty.v1+json":
+            # ORAS artifact: layers are raw file blobs, not tar archives.
+            # Each layer carries an org.opencontainers.image.title annotation
+            # that holds the original filename.  Copy blobs directly to destination.
+            _extract_from_oras(manifest, tmp_dir, wanted_files, destination, name)
+        else:
+            layer_digests = [layer["digest"] for layer in manifest.get("layers", [])]
+
+            for digest in layer_digests:
+                layer_file = tmp_dir / digest.removeprefix("sha256:")
+                if not layer_file.exists():
+                    continue
+                with tarfile.open(str(layer_file)) as tf:
+                    for image_path in extract_dirs:
+                        if _safe_extract_layer(tf, image_path, tmp_dir, layer_file.name):
+                            logger.info(
+                                "Extracting %s/ from %s...", image_path, layer_file.name
+                            )
+                        else:
+                            logger.info(
+                                "skipping %s. It doesn't contain the %s dir",
+                                layer_file.name,
+                                image_path,
+                            )
+
+            for wanted in wanted_files:
+                src = tmp_dir / wanted
+                if src.is_file():
+                    shutil.copy2(str(src), str(destination / src.name))
+                else:
+                    logger.error("Expected file not found in container: %s", wanted)
+                    raise RuntimeError(
+                        f"File '{wanted}' declared in RPA was not found in any container layer"
+                    )
     finally:
         shutil.rmtree(tmp_dir, ignore_errors=True)
 
@@ -240,14 +292,41 @@ def _matches(entry: dict, os_name: str) -> bool:
             logger.info("  - Linux content detected")
 
 
+def _validate_disk_image_components(components: list[dict]) -> None:
+    """Fail fast if any disk-image component has non-linux file entries.
+
+    Disk images must always target os: linux. Detecting this before pulling
+    images avoids wasting time on downloads only to fail deep in the pipeline.
+    """
+    for component in components:
+        if not disk_image_utils.is_disk_image_component(component):
+            continue
+        name = component.get("name", "<unknown>")
+        all_file_entries = list(component.get("files") or []) + list(
+            (component.get("staged") or {}).get("files") or []
+        )
+        for entry in all_file_entries:
+            entry_os = entry.get("os", "")
+            if entry_os in ("darwin", "windows"):
+                raise RuntimeError(
+                    f"Component '{name}' has contentType: disk-image but entry "
+                    f"'{entry.get('source', '<unknown>')}' has os: {entry_os}. "
+                    f"Disk images must be os: linux. Fix the RPA before releasing."
+                )
+
+
 def run(concurrent_limit: int) -> None:
     """Extract artifacts from all snapshot components and write OS flag files."""
     snapshot = json.loads(os.environ["SNAPSHOT_JSON"])
 
+    components = snapshot.get("components", [])
+
+    # Validate disk-image component constraints before doing any image pulls.
+    _validate_disk_image_components(components)
+
     _setup_docker_config()
     CONTENT_DIR.mkdir(parents=True, exist_ok=True)
 
-    components = snapshot.get("components", [])
     errors: list[str] = []
 
     with ThreadPoolExecutor(max_workers=concurrent_limit) as executor:
 
@@ -46,6 +46,7 @@
 import tempfile
 from pathlib import Path
 
+import disk_image_utils
 import publish_to_cgw_wrapper
 import pulp_push_wrapper
 import yaml  # type: ignore
@@ -365,6 +366,18 @@ def run(exodus_gw_env: str, cgw_hostname: str, cert_expiration_warn_days: int) -
             cg = component.get("contentGateway") or {}
             cg["contentDir"] = str(component_dir)
             component["contentGateway"] = cg
+            # Disk-image components that target both CDN and CGW describe their
+            # deliverables in staged.files[] (consumed by the CDN/Customer Portal
+            # flow) but also need those files listed in files[] for CGW registration.
+            # If files[] is already populated the team provided it directly (e.g. a
+            # CGW-only release), so we leave it untouched.
+            # NOTE: this intentionally mutates the component dict in-place. It is safe
+            # because the Pulp push and CDN exclusion logic for this component have already
+            # completed above, and the only remaining consumer is publish_to_cgw_wrapper
+            # called below via json.dumps(snapshot).
+            is_disk_image = disk_image_utils.is_disk_image_component(component)
+            if is_disk_image and not component.get("files"):
+                component["files"] = (component.get("staged") or {}).get("files", [])
 
     cgw_push = any(bool(c.get("contentGateway")) for c in snapshot.get("components", []))
     if cgw_push: