Skip to content

Commit 3e75f74

Browse files
committed
feat!(ISV-6786): consume new data structure for generating image SBOMs
1 parent 6b19c10 commit 3e75f74

22 files changed

Lines changed: 286 additions & 478 deletions

docs/sboms/oci_image.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ mobster --verbose generate oci-image \
4444
- `--base-image-digest-file` -- points to a file with digests for images used in Dockerfile.
4545
if omitted, the references will be fetched via `oras`. The expected format of the file is
4646
`<registry>/<repository>:<tag> <registry>/<repository>:<tag>@sha256:<digest>`
47-
- `--dockerfile-target` -- if a build target was used for multi-stage build, use this argument to specify the build target
4847
- `--additional-base-images` -- optionally add references to other build images outside the parsed Dockerfile.
4948
expects the format `<registry>/<repository>:<tag>@sha256:<digest value>`
5049
- `--contextualize` -- Allows SBOM contextualization (see [Contextual SBOM](#contextual-sbom))

poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ dependencies = [
3636
"aiofiles (>=24.1.0,<25.0.0)",
3737
"httpx (>=0.28.1,<0.29.0)",
3838
"aioboto3 (>=15.2.0,<15.3.0)",
39+
"pyyaml (>=6.0.3,<7.0.0)",
3940
]
4041

4142
[project.urls]

src/mobster/cli.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -130,31 +130,9 @@ def validated_additional_reference(value: str) -> str:
130130
help="Image digest for the OCI image in the format sha256:<digest>",
131131
)
132132
oci_image_parser.add_argument(
133-
"--parsed-dockerfile-path",
133+
"--metadata-path",
134134
type=Path,
135-
help="Path to the parsed Dockerfile file",
136-
)
137-
oci_image_parser.add_argument(
138-
"--base-image-digest-file",
139-
type=Path,
140-
help="Path to the file containing references "
141-
"to images in the Dockerfile and their digests. "
142-
"Expected format: "
143-
"`<registry>/<repository>:<tag> <registry>/<repository>:<tag>@sha256:<digest>`",
144-
)
145-
oci_image_parser.add_argument(
146-
"--dockerfile-target",
147-
type=str,
148-
help="The name of the build target from the Dockerfile",
149-
default=None,
150-
)
151-
oci_image_parser.add_argument(
152-
"--additional-base-image",
153-
type=validated_additional_reference,
154-
action="append",
155-
default=[],
156-
help="Base (builder) image to add, can be specified multiple times. "
157-
"Expects the format <registry>/<repository>:<tag>@sha256:<digest value>",
135+
help="Path to a metadata file generated by sbomgen.",
158136
)
159137
oci_image_parser.add_argument(
160138
"--arch",

src/mobster/cmd/generate/oci_image/__init__.py

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,20 @@
1010
from typing import Any
1111

1212
from cyclonedx.exception import CycloneDxException
13+
from mobster.cmd.generate.oci_image.sbomgen import SBOMMetadata
1314
from spdx_tools.spdx.jsonschema.document_converter import DocumentConverter
1415
from spdx_tools.spdx.model.document import Document
1516
from spdx_tools.spdx.validation.document_validator import validate_full_spdx_document
1617
from spdx_tools.spdx.writer.write_utils import convert
18+
import yaml
1719

1820
import mobster.utils
1921
from mobster import syft
2022
from mobster.cmd.generate.base import GenerateCommandWithOutputTypeSelector
2123
from mobster.cmd.generate.oci_image.add_image import extend_sbom_with_image_reference
2224
from mobster.cmd.generate.oci_image.base_images_dockerfile import (
23-
extend_sbom_with_base_images_from_dockerfile,
24-
get_base_images_refs_from_dockerfile,
25+
extend_sbom_with_base_images,
2526
get_digest_for_image_ref,
26-
get_image_objects_from_file,
2727
)
2828
from mobster.cmd.generate.oci_image.contextual_sbom.builder import (
2929
BuilderContextualizationError,
@@ -99,6 +99,15 @@ async def _load_and_filter_hermeto_sbom(self) -> dict[str, Any]:
9999
arch = self.cli_args.arch or mobster.utils.identify_arch()
100100
return filter_hermeto_sbom_by_arch(hermeto_sbom, arch)
101101

102+
def _load_metadata(self) -> None:
103+
"""
104+
Load a metadata file from the --metadata-path argument into
105+
self._metadata.
106+
"""
107+
with open(self.cli_args.metadata_path) as metadata_file:
108+
raw_metadata = yaml.load(metadata_file, yaml.Loader)
109+
self._metadata = SBOMMetadata.from_dict(raw_metadata)
110+
102111
async def _handle_bom_inputs(
103112
self,
104113
) -> dict[str, Any]:
@@ -113,13 +122,19 @@ async def _handle_bom_inputs(
113122
self.cli_args.from_hermeto is None
114123
and self.cli_args.from_syft is None
115124
and self.cli_args.image_pullspec is None
125+
and self.cli_args.metadata_path is None
116126
):
117127
raise ArgumentError(
118128
None,
119-
"At least one of --from-syft, --from-hermeto or --image-pullspec"
120-
" must be provided",
129+
"At least one of --from-syft, --from-hermeto, --image-pullspec, "
130+
"or --metadata-path must be provided",
121131
)
122132

133+
if self.cli_args.metadata_path is not None:
134+
self._load_metadata()
135+
# if we don't have an sbom provided to us, use syft to generate it
136+
if self.cli_args.from_syft is None and self.cli_args.from_hermeto is None:
137+
return await syft.scan_image(self._metadata.image.pullspec)
123138
if self.cli_args.from_syft is not None:
124139
# Merging Syft & Hermeto SBOMs
125140
if len(self.cli_args.from_syft) > 1 or self.cli_args.from_hermeto:
@@ -228,7 +243,7 @@ async def _assess_and_dispatch_contextual_workflow(
228243
(non-modified) SBOM is furtherly processed by mobster.
229244
Args:
230245
component_sbom_doc: The component SBOM created for this image.
231-
base_images_refs: List of references from the parsed Dockerfile.
246+
base_images_refs: List of references from the build.
232247
image_arch: CPU architecture of this image.
233248
234249
Returns:
@@ -264,11 +279,12 @@ async def execute(self) -> Any:
264279
"""
265280
LOGGER.debug("Generating SBOM document for OCI image")
266281

282+
# Get/merge the raw SBOM
267283
merged_sbom_dict = await self._handle_bom_inputs()
268284
sbom: Document | CycloneDX1BomWrapper
269-
image_arch = identify_arch()
285+
image_arch = self.cli_args.arch or mobster.utils.identify_arch()
270286

271-
# Parsing into objects
287+
# Parse into objects
272288
if merged_sbom_dict.get("bomFormat") == "CycloneDX":
273289
if self.cli_args.contextualize:
274290
raise ArgumentError(
@@ -280,9 +296,32 @@ async def execute(self) -> Any:
280296
else:
281297
raise ValueError("Unknown SBOM Format!")
282298

283-
# Extending with image reference
284-
if self.cli_args.image_pullspec:
285-
image_arch = self.cli_args.arch or mobster.utils.identify_arch()
299+
base_images_refs = []
300+
base_images_map: dict[str, Image] = {}
301+
302+
# Extend with image reference
303+
if self.cli_args.metadata_path:
304+
image = Image.from_image_index_url_and_digest(
305+
self._metadata.image.pullspec,
306+
self._metadata.image.digest,
307+
arch=image_arch,
308+
)
309+
await extend_sbom_with_image_reference(sbom, image, False)
310+
for base_image_data in self._metadata.base_images:
311+
base_image = Image.from_image_index_url_and_digest(
312+
base_image_data.pullspec,
313+
base_image_data.digest,
314+
)
315+
base_images_refs.append(base_image_data.pullspec)
316+
base_images_map[base_image_data.pullspec] = base_image
317+
await extend_sbom_with_base_images(sbom, base_images_refs, base_images_map)
318+
for extra_image_data in self._metadata.extra_images:
319+
extra_image = Image.from_image_index_url_and_digest(
320+
extra_image_data.pullspec,
321+
extra_image_data.digest,
322+
)
323+
await extend_sbom_with_image_reference(sbom, extra_image, True)
324+
elif self.cli_args.image_pullspec:
286325
if not self.cli_args.image_digest:
287326
LOGGER.info(
288327
"Provided pullspec but not digest."
@@ -308,37 +347,6 @@ async def execute(self) -> Any:
308347
"Provided image digest but no pullspec. The digest value is ignored."
309348
)
310349

311-
base_images_refs = []
312-
base_images_map: dict[str, Image] = {}
313-
314-
# Extending with base images references from a dockerfile
315-
if self.cli_args.parsed_dockerfile_path:
316-
with open(
317-
self.cli_args.parsed_dockerfile_path, encoding="utf-8"
318-
) as parsed_dockerfile_io:
319-
parsed_dockerfile = json.load(parsed_dockerfile_io)
320-
321-
base_images_refs = await get_base_images_refs_from_dockerfile(
322-
parsed_dockerfile, self.cli_args.dockerfile_target
323-
)
324-
325-
if self.cli_args.base_image_digest_file:
326-
LOGGER.debug(
327-
"Supplied pre-parsed image digest file, will operate offline."
328-
)
329-
base_images_map = await get_image_objects_from_file(
330-
self.cli_args.base_image_digest_file
331-
)
332-
await extend_sbom_with_base_images_from_dockerfile(
333-
sbom, base_images_refs, base_images_map
334-
)
335-
336-
# Extending with additional base images
337-
for image_ref in self.cli_args.additional_base_image:
338-
image_object = Image.from_oci_artifact_reference(image_ref)
339-
await extend_sbom_with_image_reference(
340-
sbom, image_object, is_builder_image=True
341-
)
342350
with log_elapsed("Contextual workflow", logging.INFO):
343351
contextual_sbom = await self._assess_and_dispatch_contextual_workflow(
344352
sbom, base_images_refs, base_images_map, image_arch

src/mobster/cmd/generate/oci_image/base_images_dockerfile.py

Lines changed: 4 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Module for augmenting the oci-image SBOM with information from a parsed Dockerfile"""
1+
"""Module for augmenting the oci-image SBOM with information from various sources"""
22

33
import json
44
import logging
@@ -27,74 +27,6 @@
2727
LOGGER = logging.getLogger(__name__)
2828

2929

30-
async def get_base_images_refs_from_dockerfile(
31-
parsed_dockerfile: dict[str, Any], target_stage: str | None = None
32-
) -> list[str | None]:
33-
"""
34-
Reads the base images from provided parsed dockerfile, does not include
35-
stages after the target of the build. So the last image returned is
36-
the parent image used.
37-
38-
Args:
39-
parsed_dockerfile (dict[str, Any]): Contents of the parsed dockerfile
40-
target_stage (str): The target stage for the build
41-
Returns:
42-
list[str | None]: List of base images used during build as extracted
43-
from the dockerfile in the order they were used.
44-
`FROM SCRATCH` is identified as `None`.
45-
46-
Example:
47-
If the Dockerfile looks like
48-
FROM registry.access.redhat.com/ubi8/ubi:latest as builder
49-
...
50-
FROM builder
51-
...
52-
53-
Then the relevant part of parsed_dockerfile look like
54-
{
55-
"Stages": [
56-
{
57-
"BaseName": "registry.access.redhat.com/ubi8/ubi:latest",
58-
"As": "builder",
59-
"From": {"Image": "registry.access.redhat.com/ubi8/ubi:latest"},
60-
},
61-
{
62-
"BaseName": "builder",
63-
"From": {"Stage": {"Named": "builder", "Index": 0}},
64-
},
65-
]
66-
},
67-
"""
68-
base_images_pullspecs: list[str | None] = []
69-
for stage in parsed_dockerfile.get("Stages", []):
70-
is_actually_image = True
71-
72-
from_field = stage.get("From", {})
73-
# Ignore scratch image as well as
74-
# references to previous stages
75-
if "Stage" in from_field:
76-
is_actually_image = False
77-
if from_field.get("Scratch"):
78-
# It is an empty image
79-
base_images_pullspecs.append(None)
80-
is_actually_image = False
81-
base_name: str = stage.get("BaseName")
82-
if is_actually_image and base_name and not base_name.startswith("oci-archive:"):
83-
# flatpak archives are not real base images. So we skip them
84-
base_images_pullspecs.append(base_name.strip("'\""))
85-
86-
# Don't include images after the target used for build
87-
alias = stage.get("As")
88-
if target_stage and alias and alias == target_stage:
89-
# The `AS` keyword of this stage matches the target
90-
break
91-
if target_stage and not alias and base_name == target_stage:
92-
# This stage does not use the `AS` keyword,
93-
# the pull-spec matches the target
94-
break
95-
return base_images_pullspecs
96-
97-
9830
async def get_digest_for_image_ref(image_ref: str, arch: Any = None) -> str | None:
9931
"""
10032
Fetches the digest of a pullspec using oras.
@@ -142,27 +74,6 @@ def get_base_images_digests_lines(base_images_digests: Path) -> list[str]:
14274
return list(input_file_stream)
14375

14476

145-
async def get_image_objects_from_file(base_images_digests: Path) -> dict[str, Image]:
146-
"""
147-
Parses the base image digest file into a dictionary of
148-
image references present in a Dockerfile and Image
149-
objects.
150-
Args:
151-
base_images_digests (Path): File containing the digests of images.
152-
expects the format <image_ref> <name>:<tag>@sha256:<digest>
153-
154-
Returns:
155-
dict[str, Image]: Mapping of the references to Image objects
156-
"""
157-
base_images_mapping = {}
158-
for line in get_base_images_digests_lines(base_images_digests):
159-
line = line.strip()
160-
image_ref, image_full_reference = re.split(r"\s+", line)
161-
image_obj = Image.from_oci_artifact_reference(image_full_reference.strip("'\""))
162-
base_images_mapping[image_ref.strip("'\"")] = image_obj
163-
return base_images_mapping
164-
165-
16677
async def get_objects_for_base_images(
16778
base_images_refs: list[str | None],
16879
) -> dict[str, Image]:
@@ -221,8 +132,8 @@ async def _get_images_and_their_annotations(
221132
if not image_obj:
222133
LOGGER.warning(
223134
"Cannot get information about base image "
224-
"%s mentioned in the Dockerfile! THIS MEANS "
225-
"THE PRODUCED SBOM WILL BE INCOMPLETE!",
135+
"%s! THIS MEANS THE PRODUCED SBOM WILL BE"
136+
"INCOMPLETE!",
226137
image_ref,
227138
)
228139
continue
@@ -409,7 +320,7 @@ async def _extend_cdx_with_base_images(
409320
)
410321

411322

412-
async def extend_sbom_with_base_images_from_dockerfile(
323+
async def extend_sbom_with_base_images(
413324
sbom: CycloneDX1BomWrapper | Document,
414325
base_images_refs: list[str | None],
415326
base_images_objects: dict[str, Image] | None = None,

0 commit comments

Comments
 (0)