From c6cc26fe6141adf0803bd2e0db45de7895ef34aa Mon Sep 17 00:00:00 2001 From: git-hyagi <45576767+git-hyagi@users.noreply.github.com> Date: Mon, 23 Sep 2024 14:30:47 -0300 Subject: [PATCH] Add compressed layers size field to manifest model closes: #1767 --- CHANGES/1767.feature | 1 + .../container-repair-manifest-metadatas.py | 16 ++++++++++------ .../0044_add_manifest_compressed_image_size.py | 18 ++++++++++++++++++ pulp_container/app/models.py | 16 ++++++++++++++++ pulp_container/app/registry.py | 3 ++- pulp_container/app/serializers.py | 6 ++++++ pulp_container/app/tasks/builder.py | 5 ++++- pulp_container/app/tasks/sync_stages.py | 3 +++ .../tests/functional/api/test_build_images.py | 1 + .../functional/api/test_pull_through_cache.py | 1 + .../tests/functional/api/test_push_content.py | 1 + .../tests/functional/api/test_sync.py | 1 + 12 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 pulp_container/app/migrations/0044_add_manifest_compressed_image_size.py diff --git a/CHANGES/1767.feature b/CHANGES/1767.feature index ead8ebcc2..70a254327 100644 --- a/CHANGES/1767.feature +++ b/CHANGES/1767.feature @@ -2,3 +2,4 @@ The Manifest model has been enhanced with a new: * `architecture` field, which specifies the CPU architecture for which the binaries in the image are designed to run. * `os` field, which specifies the operating system which the image is built to run on. + * `compressed_layers_size` field, which specifies the sum of the sizes of all compressed layers. diff --git a/pulp_container/app/management/commands/container-repair-manifest-metadatas.py b/pulp_container/app/management/commands/container-repair-manifest-metadatas.py index 5c5173ed2..8b5a940f9 100644 --- a/pulp_container/app/management/commands/container-repair-manifest-metadatas.py +++ b/pulp_container/app/management/commands/container-repair-manifest-metadatas.py @@ -8,6 +8,7 @@ from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.core.management import BaseCommand +from django.db.models import Q from pulpcore.plugin.cache import SyncContentCache @@ -16,8 +17,8 @@ class Command(BaseCommand): """ - A management command to handle the initialization of empty architecture and os fields for - container images. + A management command to handle the initialization of empty architecture, os, and + compressed_layers_size fields for container images. This command retrieves a list of manifests that have a null architecture field and populates them with the appropriate architecture definitions sourced from the corresponding @@ -29,7 +30,9 @@ class Command(BaseCommand): def handle(self, *args, **options): manifests_updated_count = 0 - manifests_v1 = Manifest.objects.filter(architecture__isnull=True) + manifests_v1 = Manifest.objects.filter( + Q(architecture__isnull=True) | Q(compressed_layers_size__isnull=True) + ) manifests_updated_count += self.update_manifests(manifests_v1) self.stdout.write( @@ -50,11 +53,12 @@ def update_manifests(self, manifests_qs): # suppress non-existing/already migrated artifacts and corrupted JSON files with suppress(ObjectDoesNotExist, JSONDecodeError): manifest_data = json.loads(manifest.data) - manifest.init_metadata(manifest_data) + manifest.init_architecture(manifest_data) + manifest.init_compressed_layers_size(manifest_data) manifests_to_update.append(manifest) if len(manifests_to_update) > 1000: - fields_to_update = ["architecture", "os"] + fields_to_update = ["architecture", "compressed_layers_size", "os"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, @@ -63,7 +67,7 @@ def update_manifests(self, manifests_qs): manifests_to_update.clear() if manifests_to_update: - fields_to_update = ["architecture", "os"] + fields_to_update = ["architecture", "compressed_layers_size", "os"] manifests_qs.model.objects.bulk_update( manifests_to_update, fields_to_update, diff --git a/pulp_container/app/migrations/0044_add_manifest_compressed_image_size.py b/pulp_container/app/migrations/0044_add_manifest_compressed_image_size.py new file mode 100644 index 000000000..58300a5bb --- /dev/null +++ b/pulp_container/app/migrations/0044_add_manifest_compressed_image_size.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.16 on 2024-09-24 11:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('container', '0043_add_manifest_os'), + ] + + operations = [ + migrations.AddField( + model_name='manifest', + name='compressed_layers_size', + field=models.TextField(null=True), + ), + ] diff --git a/pulp_container/app/models.py b/pulp_container/app/models.py index ee0457cb9..3c1a85c76 100644 --- a/pulp_container/app/models.py +++ b/pulp_container/app/models.py @@ -80,6 +80,8 @@ class Manifest(Content): architecture (models.TextField): CPU architecture for which the binaries in the image are designed to run. os (models.TextField): Operating System which the image is built to run on. + compressed_layers_size (models.TextField): Sum of the sizes, in bytes, of all compressed + layers. Relations: blobs (models.ManyToManyField): Many-to-many relationship with Blob. @@ -108,6 +110,7 @@ class Manifest(Content): labels = models.JSONField(default=dict) architecture = models.TextField(null=True) os = models.TextField(null=True) + compressed_layers_size = models.TextField(null=True) is_bootable = models.BooleanField(default=False) is_flatpak = models.BooleanField(default=False) @@ -130,6 +133,7 @@ def init_metadata(self, manifest_data=None): has_labels = self.init_labels() has_image_nature = self.init_image_nature() self.init_architecture_and_os(manifest_data) + self.init_compressed_layers_size(manifest_data) return has_annotations or has_labels or has_image_nature def init_annotations(self, manifest_data=None): @@ -195,6 +199,18 @@ def init_architecture_and_os(self, manifest_data): self.architecture = config_blob.get("architecture", None) self.os = config_blob.get("os", None) + def init_compressed_layers_size(self, manifest_data): + # manifestv2 schema1 has only blobSum definition for each layer + if manifest_data.get("fsLayers", None): + self.compressed_layers_size = 0 + return + + layers = manifest_data.get("layers") + compressed_size = 0 + for layer in layers: + compressed_size += layer.get("size") + self.compressed_layers_size = compressed_size + def is_bootable_image(self): if ( self.annotations.get("containers.bootc") == "1" diff --git a/pulp_container/app/registry.py b/pulp_container/app/registry.py index 3c287e389..0136f1c7f 100644 --- a/pulp_container/app/registry.py +++ b/pulp_container/app/registry.py @@ -455,11 +455,12 @@ async def init_pending_content(self, digest, manifest_data, media_type, raw_text data=raw_text_data, ) - # if media_type of schema1 configure only manifest architecture and os + # if media_type of schema1 configure only manifest architecture and compressed layers size if media_type in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI): await sync_to_async(manifest.init_metadata)(manifest_data=manifest_data) else: await sync_to_async(manifest.init_architecture_and_os)(manifest_data=manifest_data) + await sync_to_async(manifest.init_compressed_layers_size)(manifest_data=manifest_data) try: await manifest.asave() diff --git a/pulp_container/app/serializers.py b/pulp_container/app/serializers.py index e07bb9be7..c99d7984d 100644 --- a/pulp_container/app/serializers.py +++ b/pulp_container/app/serializers.py @@ -113,6 +113,11 @@ class ManifestSerializer(NoArtifactContentSerializer): required=False, default=None, ) + compressed_layers_size = serializers.CharField( + help_text="Specifies the sum of the sizes, in bytes, of all compressed layers", + required=False, + default=None, + ) class Meta: fields = NoArtifactContentSerializer.Meta.fields + ( @@ -128,6 +133,7 @@ class Meta: "is_flatpak", "architecture", "os", + "compressed_layers_size", ) model = models.Manifest diff --git a/pulp_container/app/tasks/builder.py b/pulp_container/app/tasks/builder.py index cee800c57..27beb53c2 100644 --- a/pulp_container/app/tasks/builder.py +++ b/pulp_container/app/tasks/builder.py @@ -87,11 +87,14 @@ def add_image_from_directory_to_repository(path, repository, tag): config_blob_dict, _ = get_content_data(blob_artifact) manifest.architecture = config_blob_dict.get("architecture", None) manifest.os = config_blob_dict.get("os", None) - manifest.save() pks_to_add = [] + compressed_size = 0 for layer in manifest_json["layers"]: + compressed_size += layer.get("size") pks_to_add.append(get_or_create_blob(layer, manifest, path).pk) + manifest.compressed_layers_size = compressed_size + manifest.save() pks_to_add.extend([manifest.pk, tag.pk, config_blob.pk]) new_repo_version.add_content(Content.objects.filter(pk__in=pks_to_add)) diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index f59cb0cef..7892b2dce 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -332,12 +332,15 @@ async def handle_blobs(self, manifest_dc, content_data): Handle blobs. """ manifest_dc.extra_data["blob_dcs"] = [] + compressed_size = 0 for layer in content_data.get("layers") or content_data.get("fsLayers"): if not self._include_layer(layer): continue + compressed_size += layer.get("size", 0) blob_dc = self.create_blob(layer) manifest_dc.extra_data["blob_dcs"].append(blob_dc) await self.put(blob_dc) + manifest_dc.content.compressed_layers_size = compressed_size layer = content_data.get("config", None) if layer: blob_dc = self.create_blob(layer, deferred_download=False) diff --git a/pulp_container/tests/functional/api/test_build_images.py b/pulp_container/tests/functional/api/test_build_images.py index 2b281377c..003586c84 100644 --- a/pulp_container/tests/functional/api/test_build_images.py +++ b/pulp_container/tests/functional/api/test_build_images.py @@ -67,3 +67,4 @@ def test_build_image( manifest = manifest.to_dict()["results"][0] assert manifest["architecture"] == "amd64" assert manifest["os"] == "linux" + assert int(manifest["compressed_layers_size"]) > 0 diff --git a/pulp_container/tests/functional/api/test_pull_through_cache.py b/pulp_container/tests/functional/api/test_pull_through_cache.py index 73dccceea..9ed957905 100644 --- a/pulp_container/tests/functional/api/test_pull_through_cache.py +++ b/pulp_container/tests/functional/api/test_pull_through_cache.py @@ -65,6 +65,7 @@ def _pull_and_verify(images, pull_through_distribution): manifest = manifest.to_dict()["results"][0] assert manifest["architecture"] == "amd64" assert manifest["os"] == "linux" + assert int(manifest["compressed_layers_size"]) > 0 path, tag = local_image_path.split(":") tags_to_verify.append(tag) diff --git a/pulp_container/tests/functional/api/test_push_content.py b/pulp_container/tests/functional/api/test_push_content.py index d2a26db52..d6b2570be 100644 --- a/pulp_container/tests/functional/api/test_push_content.py +++ b/pulp_container/tests/functional/api/test_push_content.py @@ -56,6 +56,7 @@ def test_push_using_registry_client_admin( manifest = manifest.to_dict()["results"][0] assert manifest["architecture"] == "amd64" assert manifest["os"] == "linux" + assert int(manifest["compressed_layers_size"]) > 0 # ensure that same content can be pushed twice without permission errors local_registry.tag_and_push(image_path, local_url) diff --git a/pulp_container/tests/functional/api/test_sync.py b/pulp_container/tests/functional/api/test_sync.py index 761d0a83b..b04a4e02e 100644 --- a/pulp_container/tests/functional/api/test_sync.py +++ b/pulp_container/tests/functional/api/test_sync.py @@ -72,6 +72,7 @@ def test_basic_sync( manifest = manifest.to_dict()["results"][0] assert manifest["architecture"] == "amd64" assert manifest["os"] == "linux" + assert int(manifest["compressed_layers_size"]) > 0 @pytest.mark.parallel