From ef623273108b4054942852913b940347c020f5ca Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 14 May 2025 09:37:18 -0400 Subject: [PATCH 01/25] Init Signed-off-by: Scott R. Shinn --- sbom_generator.py | 249 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 sbom_generator.py diff --git a/sbom_generator.py b/sbom_generator.py new file mode 100644 index 000000000..6c4821b37 --- /dev/null +++ b/sbom_generator.py @@ -0,0 +1,249 @@ +import os +import json +import subprocess +from mockbuild.trace_decorator import traceLog +import hashlib +import re + +requires_api_version = "1.1" # Ensure compatibility with mock API + +# Plugin entry point +@traceLog() +def init(plugins, conf, buildroot): + # Ensure configuration exists for the plugin + if "sbom_generator_opts" not in conf: + conf["sbom_generator_opts"] = {} + SBOMGenerator(plugins, conf["sbom_generator_opts"], buildroot) + +class SBOMGenerator(object): + """Generates SBOM for the built packages.""" + # pylint: disable=too-few-public-methods + @traceLog() + def __init__(self, plugins, conf, buildroot): + + self.buildroot = buildroot + self.state = buildroot.state + self.rootdir = buildroot.rootdir + self.builddir = buildroot.builddir + self.conf = conf + self.sbom_enabled = self.conf.get('generate_sbom', True) + self.sbom_done = False + plugins.add_hook("prebuild", self._listSPECSDirectory) + plugins.add_hook("postbuild", self._generateSBOMPostBuildHook) + + @traceLog() + def _listSPECSDirectory(self): + """Lists the contents of the SPECS directory before building.""" + + print("DEBUG: Listing contents of SPECS directory before building:") +# print(f"Buildroot values:") +# for key, value in vars(self.buildroot).items(): +# print(f" {key}: {value}") + print(f"DEBUG: builddir is {self.buildroot.builddir}") + print(f"DEBUG: rootdir is {self.rootdir}") + print(f"DEBUG: resultsdir is {self.buildroot.resultdir}") + + # specs_dir is rootdir + builddir + SPECS + #specs_dir = os.path.join(self.buildroot.rootdir, self.buildroot.builddir, "rpmbuild/SPECS") + specs_dir = os.path.join(self.rootdir, "/foo/", "/bar/") + print(f"DEBUG: spec dir is {specs_dir}") + + try: + if os.path.exists(specs_dir): + specs_files = os.listdir(specs_dir) + print(f"Contents of SPECS directory: {specs_files}") + else: + print("SPECS directory does not exist.") + except Exception as e: + print(f"Failed to list contents of SPECS directory: {e}") + + @traceLog() + def _generateSBOMPostBuildHook(self): + if self.sbom_done or not self.sbom_enabled: + return + + out_file = os.path.join(self.buildroot.resultdir, 'sbom.spdx.json') + state_text = "Generating SBOM for built packages v0.8" + self.state.start(state_text) + + try: + build_dir = self.buildroot.resultdir + rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.rpm')] + src_rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.src.rpm')] + spec_file = next((f for f in os.listdir(build_dir) if f.endswith('.spec')), None) + + if not rpm_files and not src_rpm_files and not spec_file: + print("No RPM, source RPM, or spec file found for SBOM generation.") + return + + sbom = { + "SPDXVersion": "SPDX-2.3", + "DataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "mock-build", + "creator": "Mock-SBOM-Plugin", + "created": self.get_iso_timestamp(), + "packages": [], + "source_package": {} + } + + # Process spec file for sources and patches + if spec_file: + spec_path = os.path.join(build_dir, spec_file) + source_files = self.parse_spec_file(spec_path) + sbom["source_package"]["source_files"] = source_files + + build_environment = self.get_build_environment_packages() + + # Process binary RPMs + for rpm_file in rpm_files: + rpm_path = os.path.join(build_dir, rpm_file) + package_data = self.get_rpm_metadata(rpm_path) + if package_data: + sbom_package = { + "name": package_data.get("name"), + "version": package_data.get("version"), + "release": package_data.get("release"), + "license": package_data.get("license"), + "vendor": package_data.get("vendor"), + "url": package_data.get("url"), + "packager": package_data.get("packager"), + "files": [], + "dependencies": [], + "gpg_signature": None, + } + sbom["packages"].append(sbom_package) + + sbom["build_environment"] = build_environment + + with open(out_file, "w") as f: + json.dump(sbom, f, indent=4) + + print(f"SBOM successfully written to: {out_file}") + except Exception as e: + print(f"An error occurred during SBOM generation: {e}") + finally: + self.sbom_done = True + self.state.finish(state_text) + + def parse_spec_file(self, spec_path): + """Parses a spec file to extract source and patch files.""" + + # print that we're in this function + print("Parsing spec file") + # does spec file exist? if not print what it was looking for + if not os.path.isfile(spec_path): + print(f"Spec file not found: {spec_path}") + return [] + sources = [] + try: + with open(spec_path, 'r') as spec: + for line in spec: + line = line.strip() + # Match lines like Source0: or Patch1: + match = re.match(r'^(Source|Patch)[0-9]*:\s*(.+)$', line) + if match: + sources.append(match.group(2)) + print(f"Extracted source and patch files from spec: {sources}") + except Exception as e: + print(f"Failed to parse spec file {spec_path}: {e}") + return sources + + def get_iso_timestamp(self): + """Returns the current time in ISO 8601 format.""" + from datetime import datetime + return datetime.utcnow().isoformat() + "Z" + + def get_build_environment_packages(self): + """Returns the list of packages installed in the build environment.""" + try: + cmd = "rpm -qa --qf '%{NAME} %{VERSION}-%{RELEASE}.%{ARCH} %{SIGPGP}\n'" + output, _ = self.buildroot.doOutChroot(cmd, returnOutput=True, shell=True) + packages = [] + for line in output.splitlines(): + parts = line.split() + if len(parts) >= 3: + packages.append({ + "name": parts[0], + "version": parts[1], + "digital_signature": parts[2] if len(parts) > 2 else None + }) + #print(f"Build environment packages: {packages}") + return packages + except Exception as e: + print(f"Failed to retrieve build environment packages: {e}") + return [] + + def get_rpm_metadata(self, rpm_path): + """Extracts metadata from an RPM file.""" + if not os.path.isfile(rpm_path): + print(f"RPM file not found: {rpm_path}") + return {} + + cmd = ["rpm", "-qp", rpm_path, "--queryformat", + "\{\"name\": \"%{NAME}\", \"version\": \"%{VERSION}\", \"release\": \"%{RELEASE}\", \"arch\": \"%{ARCH}\", \"summary\": \"%{SUMMARY}\", \"license\": \"%{LICENSE}\", \"vendor\": \"%{VENDOR}\", \"url\": \"%{URL}\", \"packager\": \"%{PACKAGER}\"\}"] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + if not result.stdout.strip(): + print(f"No output from RPM command for {rpm_path}") + print(f"Command run: {cmd}") + return {} + + print(f"RPM command output: {result.stdout}") + return json.loads(result.stdout) + except subprocess.CalledProcessError as e: + print(f"RPM command failed for {rpm_path}: {e.stderr}") + return {} + except json.JSONDecodeError as e: + print(f"Failed to parse RPM metadata output: {result.stdout}") + return {} + + def get_rpm_file_list(self, rpm_path): + """Extracts the list of files from an RPM file.""" + cmd = ["rpm", "-qpl", rpm_path] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + files = result.stdout.splitlines() + print(f"Files in RPM {rpm_path}: {files}") + return files + except subprocess.CalledProcessError as e: + print(f"Failed to get file list for {rpm_path}: {e.stderr}") + return [] + + def get_rpm_dependencies(self, rpm_path): + """Extracts the list of dependencies from an RPM file.""" + cmd = ["rpm", "-qpR", rpm_path] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + dependencies = result.stdout.splitlines() + print(f"Dependencies for RPM {rpm_path}: {dependencies}") + return dependencies + except subprocess.CalledProcessError as e: + print(f"Failed to get dependencies for {rpm_path}: {e.stderr}") + return [] + + def get_rpm_signature(self, rpm_path): + """Extracts the GPG signature of an RPM file.""" + cmd = ["rpm", "-qpi", rpm_path] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + for line in result.stdout.splitlines(): + if line.startswith("Signature"): + print(f"GPG Signature for {rpm_path}: {line}") + return line + return None + except subprocess.CalledProcessError as e: + print(f"Failed to get GPG signature for {rpm_path}: {e.stderr}") + return None + + def hash_file(self, file_path): + """Calculates the SHA256 hash of a file.""" + sha256 = hashlib.sha256() + try: + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + sha256.update(chunk) + return sha256.hexdigest() + except Exception as e: + print(f"Failed to hash file {file_path}: {e}") + return None From 51ef8c3f9e17e1bffce4a998a6ffc05004f6b11a Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 14 May 2025 10:31:24 -0400 Subject: [PATCH 02/25] Adding SBOM module Signed-off-by: Scott R. Shinn --- docs/Plugin-SBOM.md | 109 +++++++++ mock/py/mockbuild/config.py | 6 +- mock/py/mockbuild/plugins/sbom_generator.py | 249 ++++++++++++++++++++ 3 files changed, 363 insertions(+), 1 deletion(-) create mode 100644 docs/Plugin-SBOM.md create mode 100644 mock/py/mockbuild/plugins/sbom_generator.py diff --git a/docs/Plugin-SBOM.md b/docs/Plugin-SBOM.md new file mode 100644 index 000000000..ef0a580d3 --- /dev/null +++ b/docs/Plugin-SBOM.md @@ -0,0 +1,109 @@ +--- +layout: default +title: Plugin SBOM Generator +--- + +This plugin generates a Software Bill of Materials (SBOM) in SPDX format for packages built with Mock. The SBOM provides detailed information about the build environment, source files, and resulting packages. + +## Features + +* Generates SBOM in SPDX 2.3 format +* Captures information about: + * Source files and patches from spec files + * Binary RPM metadata + * Build environment packages + * Dependencies + * File lists + * GPG signatures +* Outputs SBOM as JSON file in the build results directory + +## Configuration + +The plugin is disabled by default. To enable it, add this to your configuration: + +```python +config_opts['plugin_conf']['sbom_generator_enable'] = True +config_opts['plugin_conf']['sbom_generator_opts'] = { + 'generate_sbom': True +} +``` + +You can also enable it for a single build using the command line: + + mock --enable-plugin=sbom_generator --rebuild package.src.rpm + +## Output + +The plugin generates a file named `sbom.spdx.json` in the build results directory (typically `/var/lib/mock/fedora-42-x86_64/result/`). The SBOM includes: + +* SPDX version and metadata +* Source package information + * Source files + * Patch files +* Binary package information + * Package name, version, and release + * License information + * Vendor and URL + * Packager information + * File lists + * Dependencies + * GPG signatures +* Build environment information + * List of installed packages + * Package versions + * Digital signatures + +## Example SBOM Structure + +```json +{ + "SPDXVersion": "SPDX-2.3", + "DataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "mock-build", + "creator": "Mock-SBOM-Plugin", + "created": "2024-01-19T15:20:00Z", + "packages": [ + { + "name": "package-name", + "version": "1.0", + "release": "1.fc42", + "license": "MIT", + "vendor": "Fedora Project", + "url": "https://example.com", + "packager": "Fedora Project", + "files": [], + "dependencies": [], + "gpg_signature": null + } + ], + "source_package": { + "source_files": [ + "source0.tar.gz", + "patch1.patch" + ] + }, + "build_environment": [ + { + "name": "package-name", + "version": "1.0-1.fc42", + "digital_signature": "RSA/SHA256, ..." + } + ] +} +``` + +## Requirements + +* Python 3.x +* RPM tools for package metadata extraction +* Access to build environment for package information + +## Notes + +* The plugin runs in the `postbuild` hook, after the build completes +* SBOM generation is skipped if no RPM, source RPM, or spec file is found +* The plugin is designed to work with both source and binary RPM builds +* Build environment information is collected using `rpm -qa` command + +Available since version 6.1. \ No newline at end of file diff --git a/mock/py/mockbuild/config.py b/mock/py/mockbuild/config.py index 0296b855f..1d27c09a4 100644 --- a/mock/py/mockbuild/config.py +++ b/mock/py/mockbuild/config.py @@ -33,7 +33,7 @@ 'lvm_root', 'compress_logs', 'sign', 'pm_request', 'hw_info', 'procenv', 'showrc', 'rpkg_preprocessor', 'rpmautospec', 'buildroot_lock', 'export_buildroot_image', - 'unbreq', 'expand_spec'] + 'unbreq', 'expand_spec', 'sbom_generator'] def nspawn_supported(): """Detect some situations where the systemd-nspawn chroot code won't work""" @@ -264,6 +264,10 @@ def setup_default_config_opts(): 'expand_spec_opts': { 'rpmspec_opts': [], }, + 'sbom_generator_enable': False, + 'sbom_generator_opts': { + 'generate_sbom': True, + }, } config_opts['environment'] = { diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py new file mode 100644 index 000000000..6c4821b37 --- /dev/null +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -0,0 +1,249 @@ +import os +import json +import subprocess +from mockbuild.trace_decorator import traceLog +import hashlib +import re + +requires_api_version = "1.1" # Ensure compatibility with mock API + +# Plugin entry point +@traceLog() +def init(plugins, conf, buildroot): + # Ensure configuration exists for the plugin + if "sbom_generator_opts" not in conf: + conf["sbom_generator_opts"] = {} + SBOMGenerator(plugins, conf["sbom_generator_opts"], buildroot) + +class SBOMGenerator(object): + """Generates SBOM for the built packages.""" + # pylint: disable=too-few-public-methods + @traceLog() + def __init__(self, plugins, conf, buildroot): + + self.buildroot = buildroot + self.state = buildroot.state + self.rootdir = buildroot.rootdir + self.builddir = buildroot.builddir + self.conf = conf + self.sbom_enabled = self.conf.get('generate_sbom', True) + self.sbom_done = False + plugins.add_hook("prebuild", self._listSPECSDirectory) + plugins.add_hook("postbuild", self._generateSBOMPostBuildHook) + + @traceLog() + def _listSPECSDirectory(self): + """Lists the contents of the SPECS directory before building.""" + + print("DEBUG: Listing contents of SPECS directory before building:") +# print(f"Buildroot values:") +# for key, value in vars(self.buildroot).items(): +# print(f" {key}: {value}") + print(f"DEBUG: builddir is {self.buildroot.builddir}") + print(f"DEBUG: rootdir is {self.rootdir}") + print(f"DEBUG: resultsdir is {self.buildroot.resultdir}") + + # specs_dir is rootdir + builddir + SPECS + #specs_dir = os.path.join(self.buildroot.rootdir, self.buildroot.builddir, "rpmbuild/SPECS") + specs_dir = os.path.join(self.rootdir, "/foo/", "/bar/") + print(f"DEBUG: spec dir is {specs_dir}") + + try: + if os.path.exists(specs_dir): + specs_files = os.listdir(specs_dir) + print(f"Contents of SPECS directory: {specs_files}") + else: + print("SPECS directory does not exist.") + except Exception as e: + print(f"Failed to list contents of SPECS directory: {e}") + + @traceLog() + def _generateSBOMPostBuildHook(self): + if self.sbom_done or not self.sbom_enabled: + return + + out_file = os.path.join(self.buildroot.resultdir, 'sbom.spdx.json') + state_text = "Generating SBOM for built packages v0.8" + self.state.start(state_text) + + try: + build_dir = self.buildroot.resultdir + rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.rpm')] + src_rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.src.rpm')] + spec_file = next((f for f in os.listdir(build_dir) if f.endswith('.spec')), None) + + if not rpm_files and not src_rpm_files and not spec_file: + print("No RPM, source RPM, or spec file found for SBOM generation.") + return + + sbom = { + "SPDXVersion": "SPDX-2.3", + "DataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "mock-build", + "creator": "Mock-SBOM-Plugin", + "created": self.get_iso_timestamp(), + "packages": [], + "source_package": {} + } + + # Process spec file for sources and patches + if spec_file: + spec_path = os.path.join(build_dir, spec_file) + source_files = self.parse_spec_file(spec_path) + sbom["source_package"]["source_files"] = source_files + + build_environment = self.get_build_environment_packages() + + # Process binary RPMs + for rpm_file in rpm_files: + rpm_path = os.path.join(build_dir, rpm_file) + package_data = self.get_rpm_metadata(rpm_path) + if package_data: + sbom_package = { + "name": package_data.get("name"), + "version": package_data.get("version"), + "release": package_data.get("release"), + "license": package_data.get("license"), + "vendor": package_data.get("vendor"), + "url": package_data.get("url"), + "packager": package_data.get("packager"), + "files": [], + "dependencies": [], + "gpg_signature": None, + } + sbom["packages"].append(sbom_package) + + sbom["build_environment"] = build_environment + + with open(out_file, "w") as f: + json.dump(sbom, f, indent=4) + + print(f"SBOM successfully written to: {out_file}") + except Exception as e: + print(f"An error occurred during SBOM generation: {e}") + finally: + self.sbom_done = True + self.state.finish(state_text) + + def parse_spec_file(self, spec_path): + """Parses a spec file to extract source and patch files.""" + + # print that we're in this function + print("Parsing spec file") + # does spec file exist? if not print what it was looking for + if not os.path.isfile(spec_path): + print(f"Spec file not found: {spec_path}") + return [] + sources = [] + try: + with open(spec_path, 'r') as spec: + for line in spec: + line = line.strip() + # Match lines like Source0: or Patch1: + match = re.match(r'^(Source|Patch)[0-9]*:\s*(.+)$', line) + if match: + sources.append(match.group(2)) + print(f"Extracted source and patch files from spec: {sources}") + except Exception as e: + print(f"Failed to parse spec file {spec_path}: {e}") + return sources + + def get_iso_timestamp(self): + """Returns the current time in ISO 8601 format.""" + from datetime import datetime + return datetime.utcnow().isoformat() + "Z" + + def get_build_environment_packages(self): + """Returns the list of packages installed in the build environment.""" + try: + cmd = "rpm -qa --qf '%{NAME} %{VERSION}-%{RELEASE}.%{ARCH} %{SIGPGP}\n'" + output, _ = self.buildroot.doOutChroot(cmd, returnOutput=True, shell=True) + packages = [] + for line in output.splitlines(): + parts = line.split() + if len(parts) >= 3: + packages.append({ + "name": parts[0], + "version": parts[1], + "digital_signature": parts[2] if len(parts) > 2 else None + }) + #print(f"Build environment packages: {packages}") + return packages + except Exception as e: + print(f"Failed to retrieve build environment packages: {e}") + return [] + + def get_rpm_metadata(self, rpm_path): + """Extracts metadata from an RPM file.""" + if not os.path.isfile(rpm_path): + print(f"RPM file not found: {rpm_path}") + return {} + + cmd = ["rpm", "-qp", rpm_path, "--queryformat", + "\{\"name\": \"%{NAME}\", \"version\": \"%{VERSION}\", \"release\": \"%{RELEASE}\", \"arch\": \"%{ARCH}\", \"summary\": \"%{SUMMARY}\", \"license\": \"%{LICENSE}\", \"vendor\": \"%{VENDOR}\", \"url\": \"%{URL}\", \"packager\": \"%{PACKAGER}\"\}"] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + if not result.stdout.strip(): + print(f"No output from RPM command for {rpm_path}") + print(f"Command run: {cmd}") + return {} + + print(f"RPM command output: {result.stdout}") + return json.loads(result.stdout) + except subprocess.CalledProcessError as e: + print(f"RPM command failed for {rpm_path}: {e.stderr}") + return {} + except json.JSONDecodeError as e: + print(f"Failed to parse RPM metadata output: {result.stdout}") + return {} + + def get_rpm_file_list(self, rpm_path): + """Extracts the list of files from an RPM file.""" + cmd = ["rpm", "-qpl", rpm_path] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + files = result.stdout.splitlines() + print(f"Files in RPM {rpm_path}: {files}") + return files + except subprocess.CalledProcessError as e: + print(f"Failed to get file list for {rpm_path}: {e.stderr}") + return [] + + def get_rpm_dependencies(self, rpm_path): + """Extracts the list of dependencies from an RPM file.""" + cmd = ["rpm", "-qpR", rpm_path] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + dependencies = result.stdout.splitlines() + print(f"Dependencies for RPM {rpm_path}: {dependencies}") + return dependencies + except subprocess.CalledProcessError as e: + print(f"Failed to get dependencies for {rpm_path}: {e.stderr}") + return [] + + def get_rpm_signature(self, rpm_path): + """Extracts the GPG signature of an RPM file.""" + cmd = ["rpm", "-qpi", rpm_path] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + for line in result.stdout.splitlines(): + if line.startswith("Signature"): + print(f"GPG Signature for {rpm_path}: {line}") + return line + return None + except subprocess.CalledProcessError as e: + print(f"Failed to get GPG signature for {rpm_path}: {e.stderr}") + return None + + def hash_file(self, file_path): + """Calculates the SHA256 hash of a file.""" + sha256 = hashlib.sha256() + try: + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + sha256.update(chunk) + return sha256.hexdigest() + except Exception as e: + print(f"Failed to hash file {file_path}: {e}") + return None From 0e6adf45ba031181bb734d34154b017b36e86ba4 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 31 Dec 2025 11:11:27 -0500 Subject: [PATCH 03/25] Update SBOM generator plugin to v1.2.5 This update includes: - CycloneDX 1.5 schema compliance - Build hardening metadata capture (FORTIFY, PIE, RELRO, etc.) - Improved dependency tracking and PURL/CPE support - Updated documentation for CycloneDX support - New default configuration options in config.py Signed-off-by: Scott R. Shinn --- docs/Plugin-SBOM.md | 329 +++- mock/py/mockbuild/config.py | 6 + mock/py/mockbuild/plugins/sbom_generator.py | 1920 ++++++++++++++++++- 3 files changed, 2117 insertions(+), 138 deletions(-) diff --git a/docs/Plugin-SBOM.md b/docs/Plugin-SBOM.md index ef0a580d3..32da21453 100644 --- a/docs/Plugin-SBOM.md +++ b/docs/Plugin-SBOM.md @@ -3,23 +3,100 @@ layout: default title: Plugin SBOM Generator --- -This plugin generates a Software Bill of Materials (SBOM) in SPDX format for packages built with Mock. The SBOM provides detailed information about the build environment, source files, and resulting packages. +This plugin generates a Software Bill of Materials (SBOM) in CycloneDX format for packages built with Mock. The SBOM provides detailed information about the build environment, source files, and resulting packages, optimized for security use cases. ## Features -* Generates SBOM in SPDX 2.3 format +* Generates SBOM in CycloneDX 1.5 format (JSON) * Captures information about: * Source files and patches from spec files - * Binary RPM metadata - * Build environment packages - * Dependencies - * File lists - * GPG signatures + * Binary RPM metadata with PURL and CPE identifiers + * Complete build toolchain packages + * Runtime dependencies + * File hashes (SHA-256) + * GPG signatures with detailed metadata * Outputs SBOM as JSON file in the build results directory +* Compatible with security scanners (Grype, Trivy, Snyk) + +## Usage + +### Basic Usage + +The simplest way to use the SBOM generator is to enable it for a single build: + +```bash +# Build a package and generate SBOM +mock --enable-plugin=sbom_generator --rebuild package.src.rpm + +# Or build from an existing SRPM +mock --enable-plugin=sbom_generator --rebuild ~/rpmbuild/SRPMS/package-1.0-1.fc42.src.rpm + +# Specify a chroot configuration +mock --enable-plugin=sbom_generator --rebuild package.src.rpm -r rocky-9-x86_64 +``` + +After the build completes, the SBOM will be available in the build results directory: + +```bash +# Find the SBOM file +ls /var/lib/mock/*/result/sbom.cyclonedx.json + +# View the SBOM +cat /var/lib/mock/rocky-9-x86_64/result/sbom.cyclonedx.json | jq . + +# Get build results directory path +mock --resultdir package.src.rpm +``` + +### Viewing and Analyzing the SBOM + +The generated SBOM can be analyzed using various tools: + +```bash +# View basic SBOM information +jq '.metadata.component' sbom.cyclonedx.json +jq '.components | length' sbom.cyclonedx.json +jq '.dependencies | length' sbom.cyclonedx.json + +# List all built packages +jq '.components[] | select(.type == "library") | {name, version, purl}' sbom.cyclonedx.json + +# List source files used in the build +jq '.components[] | select(.properties[]?.name == "mock:source:type") | {name, hashes}' sbom.cyclonedx.json + +# View runtime dependencies for a specific package +jq '.dependencies[] | select(.ref | contains("httpd"))' sbom.cyclonedx.json +``` + +### Using with Security Scanners + +The SBOM can be directly used with security vulnerability scanners: + +```bash +# Scan with Grype +grype sbom:./sbom.cyclonedx.json + +# Scan with Trivy +trivy sbom sbom.cyclonedx.json + +# Export to other formats if needed +syft convert sbom.cyclonedx.json -o spdx-json > sbom.spdx.json +``` ## Configuration -The plugin is disabled by default. To enable it, add this to your configuration: +### Enabling the Plugin + +The plugin is disabled by default. You can enable it in several ways: + +**Option 1: Command line (recommended for one-off builds)** +```bash +mock --enable-plugin=sbom_generator --rebuild package.src.rpm +``` + +**Option 2: Configuration file (for persistent enablement)** + +Add to your Mock configuration file (e.g., `/etc/mock/fedora-rawhide-x86_64.cfg`): ```python config_opts['plugin_conf']['sbom_generator_enable'] = True @@ -28,71 +105,197 @@ config_opts['plugin_conf']['sbom_generator_opts'] = { } ``` -You can also enable it for a single build using the command line: +**Option 3: User configuration** + +Add to `~/.config/mock/mock.cfg`: + +```python +config_opts['plugin_conf']['sbom_generator_enable'] = True +``` + +### Configuration Options + +The plugin supports several configuration options to control SBOM generation: + +```python +config_opts['plugin_conf']['sbom_generator_opts'] = { + 'generate_sbom': True, # Enable SBOM generation (default: True) + 'include_file_components': True, # Include file-level components (default: True) + 'include_file_dependencies': True, # Include file-to-package dependencies (default: True) + 'include_debug_files': False, # Include debug files in file components (default: False) + 'include_man_pages': False, # Include man pages in file components (default: False) + 'include_toolchain_dependencies': False, # Include build toolchain in dependencies (default: False) +} +``` + +**Configuration Options Explained:** - mock --enable-plugin=sbom_generator --rebuild package.src.rpm +- `include_file_components`: When enabled, creates individual file components for each file in built packages, including hashes, permissions, and ownership information. +- `include_file_dependencies`: Creates dependency relationships showing which files belong to which packages. +- `include_debug_files`: Filters out debug files (`.debug`, files in `/usr/lib/debug`) from file components. +- `include_man_pages`: Filters out man pages from file components. +- `include_toolchain_dependencies`: Adds build toolchain packages to the dependencies array (useful for complete build provenance, but can make dependency graphs very large). ## Output -The plugin generates a file named `sbom.spdx.json` in the build results directory (typically `/var/lib/mock/fedora-42-x86_64/result/`). The SBOM includes: - -* SPDX version and metadata -* Source package information - * Source files - * Patch files -* Binary package information - * Package name, version, and release - * License information - * Vendor and URL - * Packager information - * File lists - * Dependencies - * GPG signatures -* Build environment information - * List of installed packages - * Package versions - * Digital signatures +The plugin generates a file named `sbom.cyclonedx.json` in the build results directory (typically `/var/lib/mock/fedora-42-x86_64/result/`). The SBOM includes: + +* CycloneDX document metadata + * Build timestamp + * Tool information (Mock SBOM Generator) + * Mock-specific build properties (host, distribution, chroot, config) + * RPM header metadata surfaced at the document level (buildhost, buildtime, source RPM, group, epoch, distribution, manufacture/vendor) +* Components array containing: + * Built packages (type: "library" or "application") + * Package name, version, and PURL + * CPE identifiers for vulnerability matching + * License information plus RPM summary as description + * RPM file SHA-256 hash + * Vendor, packager, buildhost, buildtime, source RPM, group, epoch, distribution metadata + * Upstream/project URLs and source RPM links via `externalReferences` + * GPG signature details + * Note: Source tarballs and patches are represented as separate file components in the components array with their own BOM refs for traceability + * Build toolchain packages (type: "library") + * All packages installed in the build environment + * Signature information + * Marked with `mock:role: "build-toolchain"` property + * Source files (type: "file") + * Source and patch files from spec + * SHA-256 hashes + * Signature information if available +* Dependencies array + * Runtime dependencies for built packages (libraries/RPMs the package depends on) + * Dependency relationships modeled using bom-refs + * Note: Source code relationships are represented in component properties and the components array, not in the dependencies section (source code is a build input, not a runtime dependency) ## Example SBOM Structure ```json { - "SPDXVersion": "SPDX-2.3", - "DataLicense": "CC0-1.0", - "SPDXID": "SPDXRef-DOCUMENT", - "name": "mock-build", - "creator": "Mock-SBOM-Plugin", - "created": "2024-01-19T15:20:00Z", - "packages": [ - { - "name": "package-name", - "version": "1.0", - "release": "1.fc42", - "license": "MIT", - "vendor": "Fedora Project", - "url": "https://example.com", - "packager": "Fedora Project", - "files": [], - "dependencies": [], - "gpg_signature": null - } + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": "urn:uuid:...", + "version": 1, + "metadata": { + "timestamp": "2024-01-19T15:20:00Z", + "tools": [ + { + "vendor": "Mock", + "name": "mock-sbom-generator", + "version": "1.0" + } + ], + "properties": [ + { "name": "mock:build:host", "value": "build.example.com" }, + { "name": "mock:build:distribution", "value": "Fedora 42" }, + { "name": "mock:build:chroot", "value": "/var/lib/mock/fedora-42-x86_64/root" }, + { "name": "mock:rpm:buildhost", "value": "builder.fedora.example.org" }, + { "name": "mock:rpm:buildtime", "value": "2024-01-19T15:15:00+00:00" }, + { "name": "mock:rpm:sourcerpm", "value": "package-name-1.0-1.fc42.src.rpm" }, + { "name": "mock:rpm:group", "value": "System Environment/Libraries" }, + { "name": "mock:rpm:epoch", "value": "1" } ], - "source_package": { - "source_files": [ - "source0.tar.gz", - "patch1.patch" - ] + "manufacture": { + "name": "Fedora Project" }, - "build_environment": [ + "component": { + "type": "application", + "name": "package-name", + "version": "1.0-1.fc42", + "bom-ref": "build-output:package-name", + "description": "Package summary (build output containing 3 package(s))", + "licenses": [ + { + "license": { + "id": "MIT" + } + } + ], + "externalReferences": [ + { "type": "distribution", "url": "package-name-1.0-1.fc42.src.rpm" }, + { "type": "website", "url": "https://example.com/package-name" } + ] + } + }, + "components": [ + { + "type": "library", + "bom-ref": "pkg:rpm/fedora/package-name@1.0-1.fc42?arch=x86_64", + "name": "package-name", + "version": "1.0-1.fc42", + "purl": "pkg:rpm/fedora/package-name@1.0-1.fc42?arch=x86_64", + "externalReferences": [ + { + "type": "cpe23Type", + "url": "cpe:2.3:a:fedora:package-name:1.0:*:*:*:*:*:*:*:*" + }, + { + "type": "website", + "url": "https://src.fedoraproject.org/rpms/package-name" + }, { - "name": "package-name", - "version": "1.0-1.fc42", - "digital_signature": "RSA/SHA256, ..." + "type": "distribution", + "url": "package-name-1.0-1.fc42.src.rpm" } - ] + ], + "licenses": [ + { + "license": { + "id": "MIT" + } + } + ], + "hashes": [ + { + "alg": "SHA-256", + "content": "..." + } + ], + "properties": [ + { + "name": "mock:rpm:vendor", + "value": "Fedora Project" + }, + { + "name": "mock:rpm:buildhost", + "value": "builder.fedora.example.org" + }, + { + "name": "mock:rpm:buildtime", + "value": "2024-01-19T15:15:00+00:00" + }, + { + "name": "mock:rpm:sourcerpm", + "value": "package-name-1.0-1.fc42.src.rpm" + }, + { + "name": "mock:signature:type", + "value": "GPG" + } + ] + } + ], + "dependencies": [ + { + "ref": "pkg:rpm/fedora/package-name@1.0-1.fc42", + "dependsOn": [ + "pkg:rpm/fedora/glibc@2.38-1.fc42" + ] + } + ] } ``` +## Security Tool Compatibility + +The generated CycloneDX SBOM is compatible with popular security scanners: + +* **Grype**: `grype sbom:./sbom.cyclonedx.json` +* **Trivy**: `trivy sbom sbom.cyclonedx.json` +* **Snyk**: Supports CycloneDX format for vulnerability scanning + +The SBOM includes PURL (Package URL) and CPE identifiers for accurate vulnerability matching. + ## Requirements * Python 3.x @@ -105,5 +308,17 @@ The plugin generates a file named `sbom.spdx.json` in the build results director * SBOM generation is skipped if no RPM, source RPM, or spec file is found * The plugin is designed to work with both source and binary RPM builds * Build environment information is collected using `rpm -qa` command +* All build toolchain packages are captured, providing complete build provenance +* PURL format: `pkg:rpm/{distro}/{package}@{version}?arch={arch}` +* Mock-specific metadata is stored in component and metadata properties with `mock:` prefix + +## Competitive Advantages + +This SBOM generator leverages Mock's unique build environment visibility: + +* **Complete Build Toolchain**: Captures every package installed in the build chroot, not just declared dependencies +* **Build-Time Provenance**: Records the exact build environment, including tool versions and signatures +* **RPM-Native Intelligence**: Deep integration with RPM metadata, spec files, and package signatures +* **Reproducible Build Context**: Complete build environment fingerprinting for reproducibility verification Available since version 6.1. \ No newline at end of file diff --git a/mock/py/mockbuild/config.py b/mock/py/mockbuild/config.py index 1d27c09a4..a50c620cc 100644 --- a/mock/py/mockbuild/config.py +++ b/mock/py/mockbuild/config.py @@ -267,6 +267,12 @@ def setup_default_config_opts(): 'sbom_generator_enable': False, 'sbom_generator_opts': { 'generate_sbom': True, + 'include_file_components': True, + 'include_file_dependencies': False, + 'include_debug_files': False, + 'include_man_pages': True, + 'include_source_dependencies': True, + 'include_toolchain_dependencies': False, }, } diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 6c4821b37..8027e4650 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1,9 +1,17 @@ +# Copyright (C) 2025, Atomicorp, Inc. +# SPDX-License-Identifier: GPL-2.0-only + import os import json import subprocess from mockbuild.trace_decorator import traceLog import hashlib import re +import socket +import uuid +import tempfile +import shutil +import shlex requires_api_version = "1.1" # Ensure compatibility with mock API @@ -28,6 +36,15 @@ def __init__(self, plugins, conf, buildroot): self.conf = conf self.sbom_enabled = self.conf.get('generate_sbom', True) self.sbom_done = False + + # Configuration options for file-level dependencies and filtering + self.include_file_dependencies = self.conf.get('include_file_dependencies', False) + self.include_file_components = self.conf.get('include_file_components', True) + self.include_debug_files = self.conf.get('include_debug_files', False) + self.include_man_pages = self.conf.get('include_man_pages', True) + self.include_source_dependencies = self.conf.get('include_source_dependencies', True) + self.include_toolchain_dependencies = self.conf.get('include_toolchain_dependencies', False) + plugins.add_hook("prebuild", self._listSPECSDirectory) plugins.add_hook("postbuild", self._generateSBOMPostBuildHook) @@ -36,16 +53,13 @@ def _listSPECSDirectory(self): """Lists the contents of the SPECS directory before building.""" print("DEBUG: Listing contents of SPECS directory before building:") -# print(f"Buildroot values:") -# for key, value in vars(self.buildroot).items(): -# print(f" {key}: {value}") print(f"DEBUG: builddir is {self.buildroot.builddir}") print(f"DEBUG: rootdir is {self.rootdir}") print(f"DEBUG: resultsdir is {self.buildroot.resultdir}") - # specs_dir is rootdir + builddir + SPECS - #specs_dir = os.path.join(self.buildroot.rootdir, self.buildroot.builddir, "rpmbuild/SPECS") - specs_dir = os.path.join(self.rootdir, "/foo/", "/bar/") + # Look for spec file in the build directory + build_dir = self.buildroot.builddir + specs_dir = os.path.join(build_dir, "SPECS") print(f"DEBUG: spec dir is {specs_dir}") try: @@ -57,145 +71,1806 @@ def _listSPECSDirectory(self): except Exception as e: print(f"Failed to list contents of SPECS directory: {e}") + def _create_cyclonedx_document(self): + """Creates the base CycloneDX document structure.""" + return { + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": f"urn:uuid:{uuid.uuid4()}", + "version": 1, + "metadata": self._create_metadata(), + "components": [], + "dependencies": [] + } + + def _create_metadata(self): + """Creates CycloneDX metadata object with Mock-specific build information.""" + from datetime import datetime + + metadata = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "tools": [ + { + "vendor": "Mock", + "name": "mock-sbom-generator", + "version": "1.2.5" + } + ], + "lifecycles": [ + { + "phase": "build" + } + ], + "licenses": [ + { + "license": { + "id": "CC0-1.0" + } + } + ], + "properties": [] + } + + # Add Mock-specific build metadata as properties + properties = metadata["properties"] + + # Add SBOM completeness declaration + properties.append({ + "name": "sbom:completeness", + "value": "complete" + }) + + properties.append({ + "name": "mock:build:host", + "value": socket.gethostname() + }) + + distro = self.get_distribution() + if distro: + properties.append({ + "name": "mock:build:distribution", + "value": distro + }) + + # Add chroot information if available + if hasattr(self.buildroot, 'rootdir') and self.buildroot.rootdir: + properties.append({ + "name": "mock:build:chroot", + "value": self.buildroot.rootdir + }) + + # Add Mock config if available + if hasattr(self.buildroot, 'config') and self.buildroot.config: + config_name = self.buildroot.config.get('config_path', 'unknown') + properties.append({ + "name": "mock:build:config", + "value": config_name + }) + + hardening_props = self._collect_build_hardening_properties() + if hardening_props: + properties.extend(hardening_props) + + return metadata + + def _evaluate_rpm_macro(self, macro): + """Evaluate an RPM macro inside the buildroot (falling back to host).""" + cmd = ["rpm", "--eval", macro] + # Prefer evaluating inside the chroot to capture build-specific settings + if hasattr(self.buildroot, "doChroot"): + try: + output, _ = self.buildroot.doChroot( + cmd, + shell=False, + returnOutput=True, + printOutput=False, + ) + if output: + return output.strip() + except Exception as exc: # pylint: disable=broad-except + print(f"Warning: failed to eval macro {macro} in chroot: {exc}") + try: + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + text=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError as exc: + print(f"Warning: failed to eval macro {macro}: {exc}") + return "" + + def _read_file_from_chroot(self, relative_path): + """ + Read a file from inside the buildroot. + Returns the file content as a string or empty string on failure. + """ + chroot_path = os.path.join(self.buildroot.rootdir, relative_path.lstrip("/")) + try: + with open(chroot_path, "r", encoding="utf-8", errors="ignore") as handle: + return handle.read().strip() + except (OSError, IOError): + pass + if hasattr(self.buildroot, "doChroot"): + try: + output, _ = self.buildroot.doChroot( + ["cat", relative_path], + shell=False, + returnOutput=True, + printOutput=False, + ) + return output.strip() + except Exception: # pylint: disable=broad-except + return "" + return "" + + def _collect_build_hardening_properties(self): + """ + Capture key compiler/linker macro settings that influence hardening + (FORTIFY, PIE, RELRO, LTO, etc.) and expose them as SBOM properties. + """ + macro_queries = { + "build:hardening:optflags": "%{?optflags}", + "build:hardening:hardening_cflags": "%{?_hardening_cflags}", + "build:hardening:global_cflags": "%{?__global_cflags}", + "build:hardening:global_ldflags": "%{?__global_ldflags}", + "build:hardening:build_ldflags": "%{?build_ldflags}", + } + + properties = [] + macro_values = {} + for prop_name, macro in macro_queries.items(): + value = self._evaluate_rpm_macro(macro) + macro_values[prop_name] = value + if value: + properties.append({ + "name": prop_name, + "value": value + }) + + cflags_combined = " ".join( + filter( + None, + [ + macro_values.get("build:hardening:optflags"), + macro_values.get("build:hardening:hardening_cflags"), + macro_values.get("build:hardening:global_cflags"), + ], + ) + ).lower() + ldflags_combined = " ".join( + filter( + None, + [ + macro_values.get("build:hardening:global_ldflags"), + macro_values.get("build:hardening:build_ldflags"), + ], + ) + ).lower() + flag_union = f"{cflags_combined} {ldflags_combined}" + + def _contains_flag(flag): + return flag in flag_union if flag_union else False + + feature_map = { + "build:hardening:fortify_enabled": any( + token in flag_union + for token in ["-d_fortify_source", "_fortify_source="] + ), + "build:hardening:pie_enabled": any( + token in flag_union for token in ["-fpie", "-fpie", "-pie"] + ), + "build:hardening:relro_enabled": any( + token in flag_union + for token in ["-z relro", "-z now", "-wl,-z,relro", "-wl,-z,now"] + ), + "build:hardening:lto_enabled": _contains_flag("-flto"), + } + for name, enabled in feature_map.items(): + properties.append({ + "name": name, + "value": "true" if enabled else "false" + }) + + fips_value = self._read_file_from_chroot("/proc/sys/crypto/fips_enabled") + if fips_value != "": + properties.append({ + "name": "build:hardening:fips_enabled", + "value": "true" if fips_value.strip() == "1" else "false" + }) + + return properties + + def _generate_purl(self, package_name, version, distro=None, arch=None): + """Generate Package URL (PURL) for RPM package.""" + if not distro: + distro = self.detect_chroot_distribution() or "fedora" + + # Clean package name for PURL (lowercase, no special chars) + clean_name = re.sub(r'[^a-zA-Z0-9._-]', '-', package_name.lower()) + + purl = f"pkg:rpm/{distro}/{clean_name}@{version}" + if arch: + purl += f"?arch={arch}" + return purl + + def _generate_bom_ref(self, package_name, version, component_type="package"): + """Generate a unique bom-ref identifier for a component.""" + # Use PURL as bom-ref for consistency + distro = self.detect_chroot_distribution() or "fedora" + return self._generate_purl(package_name, version, distro) + @traceLog() def _generateSBOMPostBuildHook(self): if self.sbom_done or not self.sbom_enabled: return - out_file = os.path.join(self.buildroot.resultdir, 'sbom.spdx.json') - state_text = "Generating SBOM for built packages v0.8" + state_text = "Generating CycloneDX SBOM for built packages v1.0" self.state.start(state_text) try: build_dir = self.buildroot.resultdir - rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.rpm')] + # Filter out source RPMs from binary RPM processing + rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.rpm') and not f.endswith('.src.rpm')] src_rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.src.rpm')] - spec_file = next((f for f in os.listdir(build_dir) if f.endswith('.spec')), None) + + # Look for spec file in the build directory (during build process) + build_build_dir = os.path.join(self.buildroot.rootdir, "builddir/build") + spec_file = None + if os.path.exists(build_build_dir): + # Look for spec file in the build directory + for root, dirs, files in os.walk(build_build_dir): + for file in files: + if file.endswith('.spec'): + spec_file = os.path.join(root, file) + break + if spec_file: + break if not rpm_files and not src_rpm_files and not spec_file: print("No RPM, source RPM, or spec file found for SBOM generation.") return - sbom = { - "SPDXVersion": "SPDX-2.3", - "DataLicense": "CC0-1.0", - "SPDXID": "SPDXRef-DOCUMENT", - "name": "mock-build", - "creator": "Mock-SBOM-Plugin", - "created": self.get_iso_timestamp(), - "packages": [], - "source_package": {} - } + # Create CycloneDX document + bom = self._create_cyclonedx_document() + + # Get build toolchain packages + build_toolchain_packages = self.get_build_toolchain_packages() + + # Process source files from spec file + source_files = [] + source_component_entries = [] + build_subject_name = None + build_subject_version = None + build_subject_release = None - # Process spec file for sources and patches if spec_file: - spec_path = os.path.join(build_dir, spec_file) - source_files = self.parse_spec_file(spec_path) - sbom["source_package"]["source_files"] = source_files + build_subject_name = os.path.splitext(os.path.basename(spec_file))[0] + parsed_sources = self.parse_spec_file(spec_file) + if parsed_sources: + source_files = parsed_sources + + srpm_metadata = None + if src_rpm_files: + srpm_path = os.path.join(build_dir, src_rpm_files[0]) + srpm_metadata = self.get_rpm_metadata(srpm_path) + if srpm_metadata: + if not build_subject_name: + build_subject_name = srpm_metadata.get("name") + if not build_subject_version: + build_subject_version = srpm_metadata.get("version") + if not build_subject_release: + build_subject_release = srpm_metadata.get("release") + + # Construct output filename using package name-version-release format + # These should always be available in a proper mock build + if not build_subject_name or not build_subject_version or not build_subject_release: + print(f"WARNING: Missing package metadata - name: {build_subject_name}, version: {build_subject_version}, release: {build_subject_release}") + print("Cannot generate SBOM with proper filename - build metadata incomplete") + return + + sbom_filename = f"{build_subject_name}-{build_subject_version}-{build_subject_release}.sbom" + out_file = os.path.join(self.buildroot.resultdir, sbom_filename) - build_environment = self.get_build_environment_packages() + if not source_files and src_rpm_files: + # Extract from source RPM if available + src_rpm_path = os.path.join(build_dir, src_rpm_files[0]) + source_files = self.extract_source_files_from_srpm(src_rpm_path) - # Process binary RPMs + # Convert source files to CycloneDX components + for source_file in source_files: + component = self._create_source_file_component(source_file) + if component: + bom["components"].append(component) + filename = source_file.get("filename") + source_component_entries.append({ + "filename": filename, + "bom_ref": component.get("bom-ref"), + "type": "patch" if self._is_patch_file(filename) else "source" + }) + + # Convert build toolchain packages to components + distro = self.detect_chroot_distribution() or "fedora" + toolchain_bom_refs = [] + for toolchain_pkg in build_toolchain_packages: + component = self._create_toolchain_component(toolchain_pkg, distro) + if component: + bom_ref = component.get("bom-ref") + if bom_ref: + toolchain_bom_refs.append(bom_ref) + bom["components"].append(component) + + # Process binary RPMs and convert to components + built_package_bom_refs = [] + component_map = {} # Map package names to bom-refs for dependency resolution + primary_rpm_metadata = None # Store metadata from primary package for metadata enhancement + + # Build component map from toolchain packages + for toolchain_pkg in build_toolchain_packages: + pkg_name = toolchain_pkg.get("name") + pkg_version = toolchain_pkg.get("version") + if pkg_name and pkg_version: + purl = self._generate_purl(pkg_name, pkg_version, distro) + component_map[pkg_name.lower()] = purl + for rpm_file in rpm_files: rpm_path = os.path.join(build_dir, rpm_file) - package_data = self.get_rpm_metadata(rpm_path) - if package_data: - sbom_package = { - "name": package_data.get("name"), - "version": package_data.get("version"), - "release": package_data.get("release"), - "license": package_data.get("license"), - "vendor": package_data.get("vendor"), - "url": package_data.get("url"), - "packager": package_data.get("packager"), - "files": [], - "dependencies": [], - "gpg_signature": None, + component = self._create_built_package_component(rpm_path, distro, source_component_entries) + if component: + bom_ref = component.get("bom-ref") + package_name = component.get("name") + package_version = component.get("version") + if bom_ref: + built_package_bom_refs.append(bom_ref) + # Add to component map for dependency resolution + if package_name: + component_map[package_name.lower()] = bom_ref + bom["components"].append(component) + + # Store metadata from primary package (prefer main package matching build subject) + if not primary_rpm_metadata: + # Prefer the main package over debuginfo packages + if not package_name or 'debuginfo' not in package_name.lower(): + primary_rpm_metadata = self.get_rpm_metadata(rpm_path) + else: + # If we have metadata, check if we should replace it with a better match + current_name = primary_rpm_metadata.get('name', '').lower() + is_current_debuginfo = 'debuginfo' in current_name + is_current_main = build_subject_name and current_name == build_subject_name.lower() + + # Replace if: current is debuginfo and new is not, OR new matches build subject name + should_replace = False + if is_current_debuginfo and package_name and 'debuginfo' not in package_name.lower(): + should_replace = True + elif build_subject_name and package_name and package_name.lower() == build_subject_name.lower(): + # New package matches build subject name - always prefer it + should_replace = True + + if should_replace: + primary_rpm_metadata = self.get_rpm_metadata(rpm_path) + + # Create file components for files within this package + if package_name and package_version and self.include_file_components: + file_components = self._create_file_components(rpm_path, package_name, package_version) + for file_comp in file_components: + bom["components"].append(file_comp) + # Create dependency: file depends on package (only if configured) + if self._should_include_file_dependency(file_comp.get("name", "")): + file_dep = { + "ref": file_comp["bom-ref"], + "dependsOn": [bom_ref] + } + bom["dependencies"].append(file_dep) + + # Create dependency entry for runtime dependencies (libraries/RPMs) + dependencies = self.get_rpm_dependencies(rpm_path) + runtime_dependency = self._create_dependency(bom_ref, dependencies, component_map, distro) + + # Build dependsOn array with runtime dependencies and optionally toolchain + all_depends_on = [] + + # Add runtime dependencies (libraries/RPMs this package depends on) + if runtime_dependency and runtime_dependency.get("dependsOn"): + for dep_ref in runtime_dependency.get("dependsOn", []): + if dep_ref not in all_depends_on: + all_depends_on.append(dep_ref) + + # Add toolchain dependencies if configured (build-time dependencies) + if self.include_toolchain_dependencies and toolchain_bom_refs: + for toolchain_ref in toolchain_bom_refs: + if toolchain_ref not in all_depends_on: + all_depends_on.append(toolchain_ref) + + # Deduplicate final dependsOn array + all_depends_on = list(set(all_depends_on)) + + # Create dependency entry if we have any dependencies + if all_depends_on: + combined_dep = { + "ref": bom_ref, + "dependsOn": all_depends_on + } + bom["dependencies"].append(combined_dep) + elif runtime_dependency: + # Fall back to just runtime dependencies if no other deps + bom["dependencies"].append(runtime_dependency) + + # Note: Source code relationships are represented in component properties + # (mock:source:files, mock:source:refs, mock:patch:files, mock:patch:refs) + # rather than in dependencies, as source code is a build input, not a runtime dependency + + # Add RPM-specific metadata to metadata.properties + if primary_rpm_metadata: + rpm_props = bom["metadata"]["properties"] + + # Add buildhost if available + buildhost = primary_rpm_metadata.get("buildhost") + if buildhost and buildhost != "(none)": + rpm_props.append({ + "name": "mock:rpm:buildhost", + "value": buildhost + }) + + # Add buildtime if available + buildtime = primary_rpm_metadata.get("buildtime") + if buildtime and buildtime != "(none)": + rpm_props.append({ + "name": "mock:rpm:buildtime", + "value": buildtime + }) + + # Add source RPM if available + sourcerpm = primary_rpm_metadata.get("sourcerpm") + if sourcerpm and sourcerpm != "(none)": + rpm_props.append({ + "name": "mock:rpm:sourcerpm", + "value": sourcerpm + }) + + # Add group if available + group = primary_rpm_metadata.get("group") + if group and group != "(none)": + rpm_props.append({ + "name": "mock:rpm:group", + "value": group + }) + + # Add epoch if available and not empty + epoch = primary_rpm_metadata.get("epoch") + if epoch and epoch != "(none)" and epoch.strip(): + rpm_props.append({ + "name": "mock:rpm:epoch", + "value": epoch + }) + + # Add distribution if available + distribution = primary_rpm_metadata.get("distribution") + if distribution and distribution != "(none)": + rpm_props.append({ + "name": "mock:rpm:distribution", + "value": distribution + }) + + # Add manufacture field if vendor is available + vendor = primary_rpm_metadata.get("vendor") + if vendor and vendor != "(none)": + bom["metadata"]["manufacture"] = { + "name": vendor + } + # Also add as authors (sbomqs expects this) + bom["metadata"]["authors"] = [ + { + "name": vendor + } + ] + + # Add supplier (from Packager field) + packager = primary_rpm_metadata.get("packager") + if packager and packager != "(none)": + bom["metadata"]["supplier"] = { + "name": packager } - sbom["packages"].append(sbom_package) + + # Add metadata.component representing what this SBOM is about + # Use the primary built package(s) or create an aggregate component + if built_package_bom_refs: + # For single package builds, use that package + # For multi-package builds, use the first/main package or create aggregate + if len(built_package_bom_refs) == 1: + # Single package: use it as the component + primary_ref = built_package_bom_refs[0] + primary_component = next((c for c in bom["components"] if c.get("bom-ref") == primary_ref), None) + if primary_component: + component_obj = { + "type": primary_component.get("type", "application"), + "name": primary_component.get("name"), + "version": primary_component.get("version"), + "bom-ref": primary_ref, + "purl": primary_component.get("purl") + } + + # Add description if available + if primary_component.get("description"): + component_obj["description"] = primary_component.get("description") + elif primary_rpm_metadata: + summary = primary_rpm_metadata.get("summary") + if summary and summary != "(none)": + component_obj["description"] = summary + + # Add externalReferences + external_refs = [] + if primary_rpm_metadata: + # Add source RPM reference + sourcerpm = primary_rpm_metadata.get("sourcerpm") + if sourcerpm and sourcerpm != "(none)": + external_refs.append({ + "type": "distribution", + "url": sourcerpm + }) + # Add project URL + url = primary_rpm_metadata.get("url") + if url and url != "(none)": + external_refs.append({ + "type": "website", + "url": url + }) + if external_refs: + component_obj["externalReferences"] = external_refs + + # Add license information + if primary_component.get("licenses"): + component_obj["licenses"] = primary_component.get("licenses") + elif primary_rpm_metadata: + license_str = primary_rpm_metadata.get("license") + if license_str and license_str != "(none)": + component_obj["licenses"] = [ + { + "license": { + "id": license_str + } + } + ] + + bom["metadata"]["component"] = component_obj + else: + # Multi-package build: create aggregate component that represents the full build output + first_pkg = next((c for c in bom["components"] if c.get("bom-ref") == built_package_bom_refs[0]), None) + if first_pkg: + aggregate_name = build_subject_name or first_pkg.get("name", "unknown") + aggregate_version = None + if build_subject_version and build_subject_release: + aggregate_version = f"{build_subject_version}-{build_subject_release}" + elif primary_rpm_metadata: + meta_version = primary_rpm_metadata.get("version") + meta_release = primary_rpm_metadata.get("release") + if meta_version and meta_release: + aggregate_version = f"{meta_version}-{meta_release}" + if not aggregate_version: + aggregate_version = first_pkg.get("version", "unknown") - sbom["build_environment"] = build_environment + # Build description - prefer summary from RPM, fall back to generic description + description = f"Build output containing {len(built_package_bom_refs)} package(s)" + if primary_rpm_metadata: + summary = primary_rpm_metadata.get("summary") + if summary and summary != "(none)": + description = f"{summary} (build output containing {len(built_package_bom_refs)} package(s))" + + component_obj = { + "type": "application", + "name": aggregate_name, + "version": aggregate_version, + "bom-ref": f"build-output:{aggregate_name}", + "description": description + } + + if aggregate_name and aggregate_version: + component_obj["purl"] = self._generate_purl(aggregate_name, aggregate_version, distro) + elif first_pkg.get("purl"): + component_obj["purl"] = first_pkg.get("purl") + + # Add externalReferences + external_refs = [] + if primary_rpm_metadata: + # Add source RPM reference + sourcerpm = primary_rpm_metadata.get("sourcerpm") + if sourcerpm and sourcerpm != "(none)": + external_refs.append({ + "type": "distribution", + "url": sourcerpm + }) + # Add project URL + url = primary_rpm_metadata.get("url") + if url and url != "(none)": + external_refs.append({ + "type": "website", + "url": url + }) + if external_refs: + component_obj["externalReferences"] = external_refs + + # Add license information + if first_pkg.get("licenses"): + component_obj["licenses"] = first_pkg.get("licenses") + elif primary_rpm_metadata: + license_str = primary_rpm_metadata.get("license") + if license_str and license_str != "(none)": + component_obj["licenses"] = [ + { + "license": { + "id": license_str + } + } + ] + + bom["metadata"]["component"] = component_obj + # Add dependency entries for all components that don't have them + # CycloneDX requires every component to have a dependency entry + # Use dictionary to ensure uniqueness (no duplicates) + dependencies_dict = {} + + # Start with existing dependencies (from built packages) + for dep in bom["dependencies"]: + ref = dep.get("ref") + if ref: + dependencies_dict[ref] = dep + + # Add entries for source file components (no dependencies) + for source_entry in source_component_entries: + bom_ref = source_entry.get("bom_ref") + if bom_ref and bom_ref not in dependencies_dict: + dependencies_dict[bom_ref] = { + "ref": bom_ref, + "dependsOn": [] + } + + # Add entries for toolchain components (no dependencies tracked for now) + for toolchain_pkg in build_toolchain_packages: + pkg_name = toolchain_pkg.get("name") + pkg_version = toolchain_pkg.get("version") + if pkg_name and pkg_version: + purl = self._generate_purl(pkg_name, pkg_version, distro) + if purl and purl not in dependencies_dict: + dependencies_dict[purl] = { + "ref": purl, + "dependsOn": [] + } + + # Add entries for any remaining components + # (in case we have components that weren't explicitly handled) + for component in bom["components"]: + comp_bom_ref = component.get("bom-ref") + if comp_bom_ref and comp_bom_ref not in dependencies_dict: + dependencies_dict[comp_bom_ref] = { + "ref": comp_bom_ref, + "dependsOn": [] + } + + # Replace dependencies array with deduplicated list + bom["dependencies"] = list(dependencies_dict.values()) + + # Write CycloneDX BOM with open(out_file, "w") as f: - json.dump(sbom, f, indent=4) + json.dump(bom, f, indent=2) - print(f"SBOM successfully written to: {out_file}") + print(f"CycloneDX SBOM successfully written to: {out_file}") except Exception as e: print(f"An error occurred during SBOM generation: {e}") + import traceback + traceback.print_exc() finally: self.sbom_done = True self.state.finish(state_text) - def parse_spec_file(self, spec_path): - """Parses a spec file to extract source and patch files.""" + def _create_built_package_component(self, rpm_path, distro, source_components=None): + """Creates a CycloneDX component for a built RPM package.""" + package_data = self.get_rpm_metadata(rpm_path) + if not package_data: + return None + + package_name = package_data.get("name") + version = package_data.get("version") + release = package_data.get("release") + arch = package_data.get("arch") + + # Combine version and release + full_version = f"{version}-{release}" if release else version + + # Generate PURL and bom-ref + purl = self._generate_purl(package_name, full_version, distro, arch) + bom_ref = purl + + # Determine component type (application vs library) + # Most RPMs are libraries, but we could check for executables + component_type = "library" + + component = { + "type": component_type, + "bom-ref": bom_ref, + "name": package_name, + "version": full_version, + "purl": purl + } + + # Add external references (CPE) + cpe = self.generate_cpe(package_name, version) + if cpe: + component["externalReferences"] = [ + { + "type": "other", + "comment": "CPE 2.3", + "url": cpe + } + ] + + # Add hash of RPM file + rpm_hash = self.hash_file(rpm_path) + if rpm_hash: + component["hashes"] = [ + { + "alg": "SHA-256", + "content": rpm_hash + } + ] + + # Add license information + license_str = package_data.get("license") + if license_str and license_str != "(none)": + component["licenses"] = [ + { + "license": { + "id": license_str + } + } + ] + + # Add supplier information (from Packager field) + packager = package_data.get("packager") + if packager and packager != "(none)": + component["supplier"] = { + "name": packager + } + + # Add properties for RPM metadata + properties = [] + + vendor = package_data.get("vendor") + if vendor and vendor != "(none)": + properties.append({ + "name": "mock:rpm:vendor", + "value": vendor + }) + + packager = package_data.get("packager") + if packager and packager != "(none)": + properties.append({ + "name": "mock:rpm:packager", + "value": packager + }) + + buildhost = package_data.get("buildhost") + if buildhost and buildhost != "(none)": + properties.append({ + "name": "mock:rpm:buildhost", + "value": buildhost + }) + + buildtime_iso = self._format_epoch_timestamp(package_data.get("buildtime")) + if buildtime_iso: + properties.append({ + "name": "mock:rpm:buildtime", + "value": buildtime_iso + }) + + sourcerpm = package_data.get("sourcerpm") + if sourcerpm and sourcerpm != "(none)": + properties.append({ + "name": "mock:rpm:sourcerpm", + "value": sourcerpm + }) + + group = package_data.get("group") + if group and group != "(none)": + properties.append({ + "name": "mock:rpm:group", + "value": group + }) + + epoch_val = package_data.get("epoch") + if epoch_val and epoch_val != "(none)": + properties.append({ + "name": "mock:rpm:epoch", + "value": epoch_val + }) + + distribution = package_data.get("distribution") + if distribution and distribution != "(none)": + properties.append({ + "name": "mock:rpm:distribution", + "value": distribution + }) + + url = package_data.get("url") + if url and url != "(none)": + component["externalReferences"] = component.get("externalReferences", []) + component["externalReferences"].append({ + "type": "website", + "url": url + }) + + summary = package_data.get("summary") + if summary and summary != "(none)": + component["description"] = summary + + # Add GPG signature information if available + signature = self.get_rpm_signature(rpm_path) + if signature: + # Parse signature info + sig_props = self._parse_signature_to_properties(signature) + properties.extend(sig_props) + + # Note: Source/patch file relationships are represented in component properties + # (mock:source:files, mock:source:refs, mock:patch:files, mock:patch:refs) + # but are removed from individual package components to reduce noise. + # Source code relationships are still available in the components array. + + if properties: + component["properties"] = properties + + # Add external reference for source RPM if available + if sourcerpm and sourcerpm != "(none)": + component["externalReferences"] = component.get("externalReferences", []) + component["externalReferences"].append({ + "type": "distribution", + "url": sourcerpm + }) + + return component + + def _create_toolchain_component(self, toolchain_pkg, distro): + """Creates a CycloneDX component for a build toolchain package.""" + package_name = toolchain_pkg.get("name") + version = toolchain_pkg.get("version") + + if not package_name or not version: + return None + + # Generate PURL and bom-ref + purl = self._generate_purl(package_name, version, distro) + bom_ref = purl + + component = { + "type": "library", + "bom-ref": bom_ref, + "name": package_name, + "version": version, + "purl": purl + } + + # Add checksum if available + checksum = toolchain_pkg.get("checksum") + if checksum and checksum != "error" and not checksum.startswith("error"): + # Determine algorithm based on hash length + if len(checksum) == 64: + alg = "SHA-256" + elif len(checksum) == 40: + alg = "SHA-1" + else: + alg = "SHA-256" # Default assumption + + component["hashes"] = [ + { + "alg": alg, + "content": checksum + } + ] + + # Add CPE + cpe = toolchain_pkg.get("cpe") + if cpe: + component["externalReferences"] = [ + { + "type": "other", + "comment": "CPE 2.3", + "url": cpe + } + ] + + # Add license + license_str = toolchain_pkg.get("licenseDeclared") + if license_str and license_str != "(none)": + component["licenses"] = [ + { + "license": { + "id": license_str + } + } + ] + + # Add properties + properties = [] + + # Mark as build toolchain + properties.append({ + "name": "mock:role", + "value": "build-toolchain" + }) + + # Add signature information + signature_info = toolchain_pkg.get("digital_signature", {}) + if signature_info: + sig_props = self._signature_info_to_properties(signature_info) + properties.extend(sig_props) + + # Add build date if available + build_date = signature_info.get("build_date") + if build_date: + properties.append({ + "name": "mock:build:date", + "value": build_date + }) + + if properties: + component["properties"] = properties + + return component + + def _create_source_file_component(self, source_file): + """Creates a CycloneDX component for a source file.""" + filename = source_file.get("filename") + if not filename: + return None + + # Generate bom-ref from filename and hash + sha256 = source_file.get("sha256") + if sha256: + bom_ref = f"file:{filename}#{sha256[:16]}" + else: + bom_ref = f"file:{filename}" + + component = { + "type": "file", + "bom-ref": bom_ref, + "name": filename + } + + # Add hash + if sha256: + component["hashes"] = [ + { + "alg": "SHA-256", + "content": sha256 + } + ] + + # Add properties + properties = [] + + source_type = "patch" if self._is_patch_file(filename) else "source" + + properties.append({ + "name": "mock:source:type", + "value": source_type + }) + + # Add signature information if available + signature = source_file.get("digital_signature") + if signature: + properties.append({ + "name": "mock:signature:info", + "value": signature + }) + + if properties: + component["properties"] = properties + + return component + + def _is_patch_file(self, filename): + """Returns True if the filename looks like a patch file.""" + if not filename: + return False + lower_name = filename.lower() + return lower_name.startswith("patch") or lower_name.endswith(".patch") or lower_name.endswith(".diff") + + def _format_epoch_timestamp(self, epoch_value): + """Convert epoch timestamp string to ISO8601 if possible.""" + if not epoch_value or epoch_value in ("(none)", "None"): + return None + try: + epoch_int = int(epoch_value) + if epoch_int <= 0: + return None + from datetime import datetime, timezone + return datetime.fromtimestamp(epoch_int, tz=timezone.utc).isoformat() + except Exception: + return epoch_value + + def _append_source_properties(self, properties, source_entries): + """Append source and patch references to component properties.""" + if not source_entries: + return + source_names = set() + patch_names = set() + source_refs = set() + patch_refs = set() + for entry in source_entries: + filename = entry.get("filename") + bom_ref = entry.get("bom_ref") + entry_type = entry.get("type", "source") + if entry_type == "patch": + if filename: + patch_names.add(filename) + if bom_ref: + patch_refs.add(bom_ref) + else: + if filename: + source_names.add(filename) + if bom_ref: + source_refs.add(bom_ref) + if source_names: + properties.append({ + "name": "mock:source:files", + "value": ",".join(sorted(source_names)) + }) + if source_refs: + properties.append({ + "name": "mock:source:refs", + "value": ",".join(sorted(source_refs)) + }) + if patch_names: + properties.append({ + "name": "mock:patch:files", + "value": ",".join(sorted(patch_names)) + }) + if patch_refs: + properties.append({ + "name": "mock:patch:refs", + "value": ",".join(sorted(patch_refs)) + }) + + def _generate_file_bom_ref(self, package_name, package_version, file_path): + """Generates a bom-ref for a file component within a package. + + Format: file:package-name@version:/absolute/path/to/file + """ + # Normalize file path (ensure it starts with /) + if not file_path.startswith('/'): + file_path = '/' + file_path + + return f"file:{package_name}@{package_version}:{file_path}" + + def _should_include_file_dependency(self, file_path): + """Determine if a file should have a dependency entry.""" + if not self.include_file_dependencies: + return False + + # Filter out debug files if configured + if not self.include_debug_files: + if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): + return False + + # Filter out man pages if configured + if not self.include_man_pages: + if '/usr/share/man/' in file_path or (file_path.endswith('.gz') and '/man' in file_path): + return False + + return True + + def _create_file_components(self, rpm_path, package_name, package_version): + """Creates file components for all files in an RPM package.""" + if not self.include_file_components: + return [] + + file_list = self.get_rpm_file_list(rpm_path) + file_info = self.get_rpm_file_info(rpm_path) + + file_components = [] + for file_path in file_list: + if not file_path or not file_path.strip(): + continue + + # Filter files based on configuration + if not self.include_debug_files: + if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): + continue + + file_data = file_info.get(file_path, {}) + file_hash = file_data.get("sha256") + + bom_ref = self._generate_file_bom_ref(package_name, package_version, file_path) + component = { + "type": "file", + "bom-ref": bom_ref, + "name": file_path + } + + # Add hash if available + if file_hash: + component["hashes"] = [ + { + "alg": "SHA-256", + "content": file_hash + } + ] + + # Add properties for file metadata + properties = [] + if file_data.get("permissions"): + properties.append({ + "name": "mock:file:permissions", + "value": file_data["permissions"] + }) + if file_data.get("owner"): + properties.append({ + "name": "mock:file:owner", + "value": file_data["owner"] + }) + if file_data.get("group"): + properties.append({ + "name": "mock:file:group", + "value": file_data["group"] + }) + + if properties: + component["properties"] = properties + + file_components.append(component) + + return file_components + + def _get_source_file_bom_refs(self, package_name, source_files): + """Gets bom-refs for source files that were used to build a package. + + Returns list of bom-refs for source tar.gz and patch files. + """ + source_bom_refs = [] + + for source_file in source_files: + filename = source_file.get("filename", "") + if not filename: + continue + + # Include source tar.gz files + if filename.endswith('.tar.gz') or filename.endswith('.tar.bz2') or filename.endswith('.tar.xz'): + sha256 = source_file.get("sha256") + if sha256: + bom_ref = f"file:{filename}#{sha256[:16]}" + else: + bom_ref = f"file:{filename}" + source_bom_refs.append(bom_ref) + + # Include patch files (matching pattern like 00xx*.patch) + elif filename.endswith('.patch') or '.patch' in filename.lower(): + sha256 = source_file.get("sha256") + if sha256: + bom_ref = f"file:{filename}#{sha256[:16]}" + else: + bom_ref = f"file:{filename}" + source_bom_refs.append(bom_ref) + + return source_bom_refs + + def _create_dependency(self, bom_ref, dependencies, component_map, distro): + """Creates a CycloneDX dependency entry.""" + if not bom_ref: + return None + + # Convert dependency strings to bom-refs + depends_on = [] + for dep in dependencies: + # Parse RPM dependency format (e.g., "libc.so.6()(64bit)", "package >= 1.0") + dep_bom_ref = self._dependency_to_bom_ref(dep, component_map, distro) + if dep_bom_ref: + depends_on.append(dep_bom_ref) + + # Deduplicate dependsOn array + depends_on = list(set(depends_on)) + + if not depends_on: + return None + + return { + "ref": bom_ref, + "dependsOn": depends_on + } + + def _dependency_to_bom_ref(self, dependency_string, component_map, distro): + """Converts an RPM dependency string to a bom-ref (PURL).""" + if not dependency_string: + return None + + # RPM dependencies can be complex: "package >= version", "libc.so.6()(64bit)", etc. + # Try to extract package name + dep = dependency_string.split()[0] if dependency_string else "" + + # Remove version constraints (>=, <=, =, etc.) + dep = re.sub(r'\s*[><=!]+\s*.*$', '', dep) + + # Remove parentheses content (e.g., "libc.so.6()(64bit)" -> "libc.so.6") + dep = re.sub(r'\(.*?\)', '', dep) + dep = dep.strip() + + if not dep or dep.startswith('/'): + return None + + # Try to match against known components (case-insensitive) + dep_lower = dep.lower() + if dep_lower in component_map: + return component_map[dep_lower] + + # If no match found, try to find by package name pattern + # Some dependencies are library names, try to find matching package + for pkg_name, bom_ref in component_map.items(): + # Check if dependency might match this package + # (e.g., "libc.so.6" might come from "glibc" package) + if dep_lower in pkg_name or pkg_name in dep_lower: + return bom_ref + + # If still no match, return None (don't create invalid references) + return None + + def _parse_signature_to_properties(self, signature_string): + """Parses RPM signature string into CycloneDX properties.""" + properties = [] + if not signature_string or signature_string == "(none)": + return properties + + # Parse signature line like: "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" + properties.append({ + "name": "mock:signature:type", + "value": "GPG" + }) + + if "RSA/SHA256" in signature_string: + properties.append({ + "name": "mock:signature:algorithm", + "value": "RSA/SHA256" + }) + elif "DSA/SHA1" in signature_string: + properties.append({ + "name": "mock:signature:algorithm", + "value": "DSA/SHA1" + }) + elif "ECDSA/SHA256" in signature_string: + properties.append({ + "name": "mock:signature:algorithm", + "value": "ECDSA/SHA256" + }) + elif "Ed25519/SHA256" in signature_string: + properties.append({ + "name": "mock:signature:algorithm", + "value": "Ed25519/SHA256" + }) + + # Extract key ID + key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', signature_string) + if key_id_match: + properties.append({ + "name": "mock:signature:key", + "value": key_id_match.group(1) + }) + + # Extract date + date_match = re.search(r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', signature_string) + if date_match: + properties.append({ + "name": "mock:signature:date", + "value": date_match.group(1) + }) + + properties.append({ + "name": "mock:signature:raw", + "value": signature_string + }) + + return properties - # print that we're in this function + def _signature_info_to_properties(self, signature_info): + """Converts signature info dict to CycloneDX properties.""" + properties = [] + + sig_type = signature_info.get("signature_type", "unsigned") + properties.append({ + "name": "mock:signature:type", + "value": sig_type + }) + + if sig_type != "unsigned" and sig_type != "unknown": + algorithm = signature_info.get("signature_algorithm") + if algorithm: + properties.append({ + "name": "mock:signature:algorithm", + "value": algorithm + }) + + key_id = signature_info.get("signature_key") + if key_id: + properties.append({ + "name": "mock:signature:key", + "value": key_id + }) + + sig_date = signature_info.get("signature_date") + if sig_date: + properties.append({ + "name": "mock:signature:date", + "value": sig_date + }) + + sig_valid = signature_info.get("signature_valid", False) + properties.append({ + "name": "mock:signature:valid", + "value": str(sig_valid) + }) + + return properties + + def parse_spec_file(self, spec_path): + """Parses a spec file to extract source and patch files with their hashes and signatures.""" print("Parsing spec file") - # does spec file exist? if not print what it was looking for if not os.path.isfile(spec_path): print(f"Spec file not found: {spec_path}") return [] + sources = [] try: - with open(spec_path, 'r') as spec: - for line in spec: - line = line.strip() - # Match lines like Source0: or Patch1: - match = re.match(r'^(Source|Patch)[0-9]*:\s*(.+)$', line) - if match: - sources.append(match.group(2)) + chroot_spec_path = self._convert_to_chroot_path(spec_path) + # Use rpmspec --parse inside the build chroot to ensure macro expansion matches the build + cmd = ["rpmspec", "--parse", chroot_spec_path] + result, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + for line in (result or "").splitlines(): + line = line.strip() + # Match lines like Source0: or Patch1: + match = re.match(r'^(Source|Patch)[0-9]*:\s*(.+)$', line) + if match: + source_file = match.group(2) + # Extract hash if present (format: filename#hash) + if '#' in source_file: + filename, hash_value = source_file.split('#', 1) + else: + filename = source_file + hash_value = None + + # Extract actual filename from URL if it's a URL + if filename.startswith('http'): + # Extract filename from URL (last part after /) + actual_filename = filename.split('/')[-1] + else: + actual_filename = filename + + # Try to find the actual file and calculate its hash + build_dir = os.path.dirname(spec_path) + # SOURCES directory is at the same level as SPECS, not inside SPECS + sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") + file_path = os.path.join(sources_dir, actual_filename) + + actual_hash = None + if os.path.isfile(file_path): + actual_hash = self.hash_file(file_path) + print(f"Found source file {actual_filename} at {file_path}, hash: {actual_hash}") + elif hash_value: + actual_hash = hash_value + print(f"Using hash from spec file for {actual_filename}: {hash_value}") + else: + print(f"Source file {actual_filename} not found at {file_path}") + + # Check for digital signature (GPG signature) + signature = self.get_file_signature(file_path) if os.path.isfile(file_path) else None + + sources.append({ + "filename": actual_filename, + "sha256": actual_hash, + "digital_signature": signature + }) + print(f"Extracted source and patch files from spec: {sources}") except Exception as e: print(f"Failed to parse spec file {spec_path}: {e}") return sources + def get_file_signature(self, file_path): + """Attempts to detect if a file has a digital signature.""" + try: + # Check for .asc signature file + asc_file = file_path + ".asc" + if os.path.isfile(asc_file): + return "GPG signature file exists: " + os.path.basename(asc_file) + + # Check for .sig signature file + sig_file = file_path + ".sig" + if os.path.isfile(sig_file): + return "GPG signature file exists: " + os.path.basename(sig_file) + + # Check if the file itself is a signature + if file_path.endswith('.asc') or file_path.endswith('.sig'): + return "File is a signature file" + + return None + except Exception as e: + print(f"Failed to check signature for {file_path}: {e}") + return None + def get_iso_timestamp(self): """Returns the current time in ISO 8601 format.""" from datetime import datetime return datetime.utcnow().isoformat() + "Z" - def get_build_environment_packages(self): - """Returns the list of packages installed in the build environment.""" + def get_distribution(self): + """Returns the distribution name and version from /etc/os-release.""" try: - cmd = "rpm -qa --qf '%{NAME} %{VERSION}-%{RELEASE}.%{ARCH} %{SIGPGP}\n'" - output, _ = self.buildroot.doOutChroot(cmd, returnOutput=True, shell=True) + distro = None + version = None + if os.path.exists("/etc/os-release"): + with open("/etc/os-release") as f: + for line in f: + if line.startswith("NAME="): + distro = line.strip().split("=", 1)[1].strip('"') + elif line.startswith("VERSION_ID="): + version = line.strip().split("=", 1)[1].strip('"') + if distro and version: + return f"{distro} {version}" + elif distro: + return distro + else: + return "Unknown" + except Exception as e: + return f"Unknown ({e})" + + + def generate_cpe(self, package_name, package_version, vendor=None): + """Generates a CPE identifier for a package.""" + # CPE format: cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:* + + # Default vendor if not provided + if not vendor or vendor == "(none)": + vendor = "fedora" + + # Clean up vendor name for CPE + vendor = re.sub(r'[^a-zA-Z0-9._-]', '_', vendor.lower()) + + # Clean up package name for CPE + product = re.sub(r'[^a-zA-Z0-9._-]', '_', package_name.lower()) + + # Clean up version for CPE (remove release part if present) + version = package_version + if '-' in version: + version = version.split('-')[0] # Remove release part + + # Handle special cases for common packages + if package_name == "glibc": + vendor = "gnu" + product = "glibc" + elif package_name == "openssl": + vendor = "openssl" + product = "openssl" + elif package_name == "gcc": + vendor = "gnu" + product = "gcc" + elif package_name == "make": + vendor = "gnu" + product = "make" + elif package_name == "gettext": + vendor = "gnu" + product = "gettext" + + # Generate CPE + cpe = f"cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:*" + return cpe + + def detect_chroot_distribution(self): + """Detects the distribution name inside the chroot by reading /etc/os-release.""" + try: + # Use buildroot's doChroot to cat /etc/os-release + cmd = ["cat", "/etc/os-release"] + output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) + distro = None + if output: + for line in output.splitlines(): + if line.startswith("ID="): + distro = line.strip().split("=", 1)[1].strip('"').lower() + break + if distro: + return distro + else: + return "unknown" + except Exception as e: + print(f"Failed to detect chroot distribution: {e}") + return "unknown" + + def get_build_toolchain_packages(self): + """Returns the list of packages installed in the build toolchain with detailed signature information.""" + try: + # First get basic package info + query = "%{NAME}|%{VERSION}-%{RELEASE}.%{ARCH}|%{LICENSE}|%{BUILDTIME}\n" + cmd = ["rpm", "-qa", "--qf", query] + output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) packages = [] + cpe_vendor_default = self.detect_chroot_distribution() or "unknown" + import re + import datetime + for line in output.splitlines(): - parts = line.split() - if len(parts) >= 3: - packages.append({ - "name": parts[0], - "version": parts[1], - "digital_signature": parts[2] if len(parts) > 2 else None - }) - #print(f"Build environment packages: {packages}") + parts = line.split("|", 3) + if len(parts) < 3: + continue + package_name = parts[0].strip() + package_version = parts[1].strip() + package_license = parts[2].strip() + build_time = parts[3].strip() if len(parts) > 3 else None + + # Skip GPG keys and other non-package entries + if package_name.startswith('gpg-pubkey') or package_name == '(none)' or not package_name: + continue + + # Get detailed signature info for this package + digital_signature = self.get_package_signature_from_chroot(package_name) + + # Build date + if build_time and build_time.isdigit(): + try: + dt = datetime.datetime.utcfromtimestamp(int(build_time)) + digital_signature["build_date"] = dt.isoformat() + "Z" + except Exception: + digital_signature["build_date"] = None + + cpe = self.generate_cpe(package_name, package_version, vendor=cpe_vendor_default) + + # Get package checksum (SHA-256 of the RPM file) + package_checksum = self.get_package_checksum_from_chroot(package_name) + + packages.append({ + "name": package_name, + "version": package_version, + "licenseDeclared": package_license, + "digital_signature": digital_signature, + "cpe": cpe, + "checksum": package_checksum + }) + print(f"Found {len(packages)} build toolchain packages") return packages except Exception as e: - print(f"Failed to retrieve build environment packages: {e}") + print(f"Failed to get build environment packages: {e}") return [] + def get_package_checksum_from_chroot(self, package_name): + """Gets the SHA-256 checksum of an installed package from inside the chroot.""" + try: + # Try different RPM header tags to get a checksum + # SHA256HEADER is the SHA256 checksum of the RPM header + cmd = ["rpm", "-q", package_name, "--qf", "%{SHA256HEADER}"] + output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) + + if output and output.strip() and output.strip() != "(none)" and not output.strip().startswith("error"): + return output.strip().lower() + + # Try SHA1HEADER as fallback (older RPMs) + cmd = ["rpm", "-q", package_name, "--qf", "%{SHA1HEADER}"] + output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) + + if output and output.strip() and output.strip() != "(none)" and not output.strip().startswith("error"): + # It's SHA-1, but it's better than nothing + print(f"Warning: Using SHA-1 for {package_name}, SHA-256 not available") + return output.strip().lower() + + # No header checksum available + print(f"Warning: No checksum available for {package_name}") + return None + + except Exception as e: + print(f"Failed to get checksum for package {package_name}: {e}") + return None + + def get_package_signature_from_chroot(self, package_name): + """Gets detailed signature information for a specific package from inside the chroot.""" + try: + cmd = ["rpm", "-qi", package_name] + output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) + + signature_info = { + "signature_type": "unsigned", + "signature_key": None, + "signature_date": None, + "signature_algorithm": None, + "signature_valid": False, + "raw_signature_data": None, + "build_date": None + } + + for line in output.splitlines(): + line = line.strip() + if line.startswith("Signature"): + # Extract the signature data after the colon + sig_data = line.split(":", 1)[1].strip() if ":" in line else "" + signature_info["raw_signature_data"] = sig_data + + if sig_data and sig_data != "(none)" and sig_data != "": + signature_info["signature_type"] = "GPG" + signature_info["signature_valid"] = True + + # Parse signature line like: "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" + if "RSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "RSA/SHA256" + elif "DSA/SHA1" in sig_data: + signature_info["signature_algorithm"] = "DSA/SHA1" + elif "ECDSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "ECDSA/SHA256" + elif "Ed25519/SHA256" in sig_data: + signature_info["signature_algorithm"] = "Ed25519/SHA256" + + # Extract key ID + if "Key ID" in sig_data: + key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', sig_data) + if key_id_match: + signature_info["signature_key"] = key_id_match.group(1) + + # Extract date - handle various time formats including EST/EDT + date_match = re.search(r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', sig_data) + if date_match: + signature_info["signature_date"] = date_match.group(1) + else: + signature_info["signature_type"] = "unsigned" + signature_info["signature_valid"] = False + break + + return signature_info + + except Exception as e: + print(f"Failed to get signature for package {package_name}: {e}") + return { + "signature_type": "unknown", + "signature_valid": False, + "error": str(e) + } + + def get_package_detailed_signature(self, package_name): + """Gets detailed signature information for a specific package.""" + try: + import subprocess + import shlex + # Try to use rpm --root to query from outside the chroot first + # If that fails, fall back to running inside the chroot + root_path = self.buildroot.rootdir + cmd = f"rpm --root {shlex.quote(root_path)} -qi {shlex.quote(package_name)}" + result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + output = result.stdout + + # If host rpm command failed (empty output), try running inside chroot + if not output.strip(): + print(f"Host RPM command failed for {package_name}, trying inside chroot...") + # Use buildroot's doChroot method to run the command inside the chroot + cmd = ["rpm", "-qi", package_name] + output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) + print(f"Chroot RPM output for {package_name}: {output[:200]}...") # Debug output + + signature_info = { + "signature_type": None, + "signature_key": None, + "signature_date": None, + "signature_algorithm": None, + "signature_valid": None, + "raw_signature_data": None, + "build_date": None + } + + output_lines = output.splitlines() + i = 0 + signature_found = False + print(f"DEBUG: Processing {len(output_lines)} lines for package {package_name}") + while i < len(output_lines): + line = output_lines[i].strip() + print(f"DEBUG: Line {i}: '{line}'") + if line.startswith("Signature"): + signature_found = True + print(f"DEBUG: Found signature line: '{line}'") + # Extract the signature data after the colon + sig_data = line.split(":", 1)[1].strip() if ":" in line else "" + signature_info["raw_signature_data"] = sig_data + print(f"DEBUG: Extracted signature data: '{sig_data}'") + + if sig_data and sig_data != "(none)" and sig_data != "": + signature_info["signature_type"] = "GPG" + signature_info["signature_valid"] = True + + # Parse signature line like: "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" + if "RSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "RSA/SHA256" + elif "DSA/SHA1" in sig_data: + signature_info["signature_algorithm"] = "DSA/SHA1" + elif "ECDSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "ECDSA/SHA256" + elif "Ed25519/SHA256" in sig_data: + signature_info["signature_algorithm"] = "Ed25519/SHA256" + + # Extract key ID + if "Key ID" in sig_data: + key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', sig_data) + if key_id_match: + signature_info["signature_key"] = key_id_match.group(1) + + # Extract date - handle various time formats including EST/EDT + date_match = re.search(r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', sig_data) + if date_match: + signature_info["signature_date"] = date_match.group(1) + else: + signature_info["signature_type"] = "unsigned" + signature_info["signature_valid"] = False + i += 1 + continue + + if line.startswith("Build Date"): + # This can help verify the package build time + build_date = line.split(":", 1)[1].strip() if ":" in line else None + if build_date: + signature_info["build_date"] = build_date + i += 1 + + # If no signature line was found, mark as unsigned + if not signature_found: + signature_info["signature_type"] = "unsigned" + signature_info["signature_valid"] = False + + return signature_info + + except Exception as e: + print(f"Failed to get detailed signature for package {package_name}: {e}") + return { + "signature_type": "unknown", + "signature_valid": False, + "error": str(e) + } + def get_rpm_metadata(self, rpm_path): """Extracts metadata from an RPM file.""" if not os.path.isfile(rpm_path): print(f"RPM file not found: {rpm_path}") return {} - cmd = ["rpm", "-qp", rpm_path, "--queryformat", - "\{\"name\": \"%{NAME}\", \"version\": \"%{VERSION}\", \"release\": \"%{RELEASE}\", \"arch\": \"%{ARCH}\", \"summary\": \"%{SUMMARY}\", \"license\": \"%{LICENSE}\", \"vendor\": \"%{VENDOR}\", \"url\": \"%{URL}\", \"packager\": \"%{PACKAGER}\"\}"] + # Use individual rpm queries instead of trying to output JSON directly try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - if not result.stdout.strip(): - print(f"No output from RPM command for {rpm_path}") - print(f"Command run: {cmd}") - return {} - - print(f"RPM command output: {result.stdout}") - return json.loads(result.stdout) + metadata = {} + + # Get each field individually + fields = { + "name": "%{NAME}", + "version": "%{VERSION}", + "release": "%{RELEASE}", + "arch": "%{ARCH}", + "epoch": "%{EPOCH}", + "summary": "%{SUMMARY}", + "license": "%{LICENSE}", + "vendor": "%{VENDOR}", + "url": "%{URL}", + "packager": "%{PACKAGER}", + "buildtime": "%{BUILDTIME}", + "buildhost": "%{BUILDHOST}", + "sourcerpm": "%{SOURCERPM}", + "group": "%{GROUP}", + "distribution": "%{DISTRIBUTION}" + } + + for field_name, field_format in fields.items(): + cmd = ["rpm", "-qp", rpm_path, "--queryformat", field_format] + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + value = result.stdout.strip() + # Handle empty epoch (rpm returns empty string for no epoch) + if field_name == "epoch" and not value: + value = "(none)" + metadata[field_name] = value + + print(f"RPM metadata extracted: {metadata}") + return metadata + except subprocess.CalledProcessError as e: print(f"RPM command failed for {rpm_path}: {e.stderr}") return {} - except json.JSONDecodeError as e: - print(f"Failed to parse RPM metadata output: {result.stdout}") + except Exception as e: + print(f"Failed to extract RPM metadata: {e}") return {} def get_rpm_file_list(self, rpm_path): @@ -210,6 +1885,38 @@ def get_rpm_file_list(self, rpm_path): print(f"Failed to get file list for {rpm_path}: {e.stderr}") return [] + def get_rpm_file_info(self, rpm_path): + """Extracts file hashes, ownership, and permissions from an RPM file using 'rpm -qp --dump'.""" + cmd = ["rpm", "-qp", "--dump", rpm_path] + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + file_info = {} + for line in result.stdout.splitlines(): + parts = line.split() + if len(parts) >= 8: + file_path = parts[0] + sha256 = parts[3] + # If the hash is all zeroes, treat as None + if sha256 == "0" * 64 or sha256 == "0000000000000000000000000000000000000000000000000000000000000000": + sha256 = None + + # Parse permissions (field 4), owner (field 5), group (field 6) + permissions = parts[4] if len(parts) > 4 else None + owner = parts[5] if len(parts) > 5 else None + group = parts[6] if len(parts) > 6 else None + + file_info[file_path] = { + "sha256": sha256, + "permissions": permissions, + "owner": owner, + "group": group + } + print(f"File info for RPM {rpm_path}: {file_info}") + return file_info + except subprocess.CalledProcessError as e: + print(f"Failed to get file info for {rpm_path}: {e.stderr}") + return {} + def get_rpm_dependencies(self, rpm_path): """Extracts the list of dependencies from an RPM file.""" cmd = ["rpm", "-qpR", rpm_path] @@ -229,8 +1936,10 @@ def get_rpm_signature(self, rpm_path): result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) for line in result.stdout.splitlines(): if line.startswith("Signature"): - print(f"GPG Signature for {rpm_path}: {line}") - return line + # Extract the signature data after the colon + sig_data = line.split(":", 1)[1].strip() if ":" in line else "" + print(f"GPG Signature for {rpm_path}: {sig_data}") + return sig_data return None except subprocess.CalledProcessError as e: print(f"Failed to get GPG signature for {rpm_path}: {e.stderr}") @@ -247,3 +1956,52 @@ def hash_file(self, file_path): except Exception as e: print(f"Failed to hash file {file_path}: {e}") return None + + def extract_source_files_from_srpm(self, src_rpm_path): + """Extracts source files from a source RPM.""" + print(f"Extracting source files from source RPM: {src_rpm_path}") + source_files = [] + try: + temp_dir = tempfile.mkdtemp(prefix="sbom-srpm-") + try: + extract_cmd = f"rpm2cpio {shlex.quote(src_rpm_path)} | cpio -idm 2>/dev/null" + subprocess.run(extract_cmd, shell=True, cwd=temp_dir, check=True) + except subprocess.CalledProcessError as e: + print(f"Failed to unpack source RPM {src_rpm_path}: {e}") + shutil.rmtree(temp_dir, ignore_errors=True) + return source_files + + for root_dir, _, files in os.walk(temp_dir): + for file_name in files: + if file_name.endswith(".spec"): + continue + file_path = os.path.join(root_dir, file_name) + sha256 = self.hash_file(file_path) + signature = self.get_file_signature(file_path) + source_files.append({ + "filename": file_name, + "sha256": sha256, + "digital_signature": signature + }) + try: + shutil.rmtree(temp_dir) + except Exception: + pass + + print(f"Extracted source files from source RPM: {source_files}") + except Exception as e: + print(f"Failed to extract source files from source RPM {src_rpm_path}: {e}") + + return source_files + + def _convert_to_chroot_path(self, host_path): + """Convert an absolute host path into the corresponding path inside the build chroot.""" + rootdir = getattr(self.buildroot, "rootdir", "") + if not rootdir: + return host_path + if host_path.startswith(rootdir): + rel_path = host_path[len(rootdir):] + if not rel_path.startswith("/"): + rel_path = "/" + rel_path + return rel_path + return host_path From 1994dc89f5f73a7a8a17b7b9ac234056750cf4dc Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 31 Dec 2025 11:37:06 -0500 Subject: [PATCH 04/25] Remove legacy sbom_generator.py from root directory --- sbom_generator.py | 249 ---------------------------------------------- 1 file changed, 249 deletions(-) delete mode 100644 sbom_generator.py diff --git a/sbom_generator.py b/sbom_generator.py deleted file mode 100644 index 6c4821b37..000000000 --- a/sbom_generator.py +++ /dev/null @@ -1,249 +0,0 @@ -import os -import json -import subprocess -from mockbuild.trace_decorator import traceLog -import hashlib -import re - -requires_api_version = "1.1" # Ensure compatibility with mock API - -# Plugin entry point -@traceLog() -def init(plugins, conf, buildroot): - # Ensure configuration exists for the plugin - if "sbom_generator_opts" not in conf: - conf["sbom_generator_opts"] = {} - SBOMGenerator(plugins, conf["sbom_generator_opts"], buildroot) - -class SBOMGenerator(object): - """Generates SBOM for the built packages.""" - # pylint: disable=too-few-public-methods - @traceLog() - def __init__(self, plugins, conf, buildroot): - - self.buildroot = buildroot - self.state = buildroot.state - self.rootdir = buildroot.rootdir - self.builddir = buildroot.builddir - self.conf = conf - self.sbom_enabled = self.conf.get('generate_sbom', True) - self.sbom_done = False - plugins.add_hook("prebuild", self._listSPECSDirectory) - plugins.add_hook("postbuild", self._generateSBOMPostBuildHook) - - @traceLog() - def _listSPECSDirectory(self): - """Lists the contents of the SPECS directory before building.""" - - print("DEBUG: Listing contents of SPECS directory before building:") -# print(f"Buildroot values:") -# for key, value in vars(self.buildroot).items(): -# print(f" {key}: {value}") - print(f"DEBUG: builddir is {self.buildroot.builddir}") - print(f"DEBUG: rootdir is {self.rootdir}") - print(f"DEBUG: resultsdir is {self.buildroot.resultdir}") - - # specs_dir is rootdir + builddir + SPECS - #specs_dir = os.path.join(self.buildroot.rootdir, self.buildroot.builddir, "rpmbuild/SPECS") - specs_dir = os.path.join(self.rootdir, "/foo/", "/bar/") - print(f"DEBUG: spec dir is {specs_dir}") - - try: - if os.path.exists(specs_dir): - specs_files = os.listdir(specs_dir) - print(f"Contents of SPECS directory: {specs_files}") - else: - print("SPECS directory does not exist.") - except Exception as e: - print(f"Failed to list contents of SPECS directory: {e}") - - @traceLog() - def _generateSBOMPostBuildHook(self): - if self.sbom_done or not self.sbom_enabled: - return - - out_file = os.path.join(self.buildroot.resultdir, 'sbom.spdx.json') - state_text = "Generating SBOM for built packages v0.8" - self.state.start(state_text) - - try: - build_dir = self.buildroot.resultdir - rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.rpm')] - src_rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.src.rpm')] - spec_file = next((f for f in os.listdir(build_dir) if f.endswith('.spec')), None) - - if not rpm_files and not src_rpm_files and not spec_file: - print("No RPM, source RPM, or spec file found for SBOM generation.") - return - - sbom = { - "SPDXVersion": "SPDX-2.3", - "DataLicense": "CC0-1.0", - "SPDXID": "SPDXRef-DOCUMENT", - "name": "mock-build", - "creator": "Mock-SBOM-Plugin", - "created": self.get_iso_timestamp(), - "packages": [], - "source_package": {} - } - - # Process spec file for sources and patches - if spec_file: - spec_path = os.path.join(build_dir, spec_file) - source_files = self.parse_spec_file(spec_path) - sbom["source_package"]["source_files"] = source_files - - build_environment = self.get_build_environment_packages() - - # Process binary RPMs - for rpm_file in rpm_files: - rpm_path = os.path.join(build_dir, rpm_file) - package_data = self.get_rpm_metadata(rpm_path) - if package_data: - sbom_package = { - "name": package_data.get("name"), - "version": package_data.get("version"), - "release": package_data.get("release"), - "license": package_data.get("license"), - "vendor": package_data.get("vendor"), - "url": package_data.get("url"), - "packager": package_data.get("packager"), - "files": [], - "dependencies": [], - "gpg_signature": None, - } - sbom["packages"].append(sbom_package) - - sbom["build_environment"] = build_environment - - with open(out_file, "w") as f: - json.dump(sbom, f, indent=4) - - print(f"SBOM successfully written to: {out_file}") - except Exception as e: - print(f"An error occurred during SBOM generation: {e}") - finally: - self.sbom_done = True - self.state.finish(state_text) - - def parse_spec_file(self, spec_path): - """Parses a spec file to extract source and patch files.""" - - # print that we're in this function - print("Parsing spec file") - # does spec file exist? if not print what it was looking for - if not os.path.isfile(spec_path): - print(f"Spec file not found: {spec_path}") - return [] - sources = [] - try: - with open(spec_path, 'r') as spec: - for line in spec: - line = line.strip() - # Match lines like Source0: or Patch1: - match = re.match(r'^(Source|Patch)[0-9]*:\s*(.+)$', line) - if match: - sources.append(match.group(2)) - print(f"Extracted source and patch files from spec: {sources}") - except Exception as e: - print(f"Failed to parse spec file {spec_path}: {e}") - return sources - - def get_iso_timestamp(self): - """Returns the current time in ISO 8601 format.""" - from datetime import datetime - return datetime.utcnow().isoformat() + "Z" - - def get_build_environment_packages(self): - """Returns the list of packages installed in the build environment.""" - try: - cmd = "rpm -qa --qf '%{NAME} %{VERSION}-%{RELEASE}.%{ARCH} %{SIGPGP}\n'" - output, _ = self.buildroot.doOutChroot(cmd, returnOutput=True, shell=True) - packages = [] - for line in output.splitlines(): - parts = line.split() - if len(parts) >= 3: - packages.append({ - "name": parts[0], - "version": parts[1], - "digital_signature": parts[2] if len(parts) > 2 else None - }) - #print(f"Build environment packages: {packages}") - return packages - except Exception as e: - print(f"Failed to retrieve build environment packages: {e}") - return [] - - def get_rpm_metadata(self, rpm_path): - """Extracts metadata from an RPM file.""" - if not os.path.isfile(rpm_path): - print(f"RPM file not found: {rpm_path}") - return {} - - cmd = ["rpm", "-qp", rpm_path, "--queryformat", - "\{\"name\": \"%{NAME}\", \"version\": \"%{VERSION}\", \"release\": \"%{RELEASE}\", \"arch\": \"%{ARCH}\", \"summary\": \"%{SUMMARY}\", \"license\": \"%{LICENSE}\", \"vendor\": \"%{VENDOR}\", \"url\": \"%{URL}\", \"packager\": \"%{PACKAGER}\"\}"] - try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - if not result.stdout.strip(): - print(f"No output from RPM command for {rpm_path}") - print(f"Command run: {cmd}") - return {} - - print(f"RPM command output: {result.stdout}") - return json.loads(result.stdout) - except subprocess.CalledProcessError as e: - print(f"RPM command failed for {rpm_path}: {e.stderr}") - return {} - except json.JSONDecodeError as e: - print(f"Failed to parse RPM metadata output: {result.stdout}") - return {} - - def get_rpm_file_list(self, rpm_path): - """Extracts the list of files from an RPM file.""" - cmd = ["rpm", "-qpl", rpm_path] - try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - files = result.stdout.splitlines() - print(f"Files in RPM {rpm_path}: {files}") - return files - except subprocess.CalledProcessError as e: - print(f"Failed to get file list for {rpm_path}: {e.stderr}") - return [] - - def get_rpm_dependencies(self, rpm_path): - """Extracts the list of dependencies from an RPM file.""" - cmd = ["rpm", "-qpR", rpm_path] - try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - dependencies = result.stdout.splitlines() - print(f"Dependencies for RPM {rpm_path}: {dependencies}") - return dependencies - except subprocess.CalledProcessError as e: - print(f"Failed to get dependencies for {rpm_path}: {e.stderr}") - return [] - - def get_rpm_signature(self, rpm_path): - """Extracts the GPG signature of an RPM file.""" - cmd = ["rpm", "-qpi", rpm_path] - try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - for line in result.stdout.splitlines(): - if line.startswith("Signature"): - print(f"GPG Signature for {rpm_path}: {line}") - return line - return None - except subprocess.CalledProcessError as e: - print(f"Failed to get GPG signature for {rpm_path}: {e.stderr}") - return None - - def hash_file(self, file_path): - """Calculates the SHA256 hash of a file.""" - sha256 = hashlib.sha256() - try: - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - sha256.update(chunk) - return sha256.hexdigest() - except Exception as e: - print(f"Failed to hash file {file_path}: {e}") - return None From 2f6e049d3f633cd544d0daf0f03d55c80ecf6291 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 31 Dec 2025 11:48:43 -0500 Subject: [PATCH 05/25] Address PR feedback: standardize imports, logging, versioning, and docs --- docs/Plugin-SBOM.md | 9 +- mock/py/mockbuild/plugins/sbom_generator.py | 129 +++++++++++--------- 2 files changed, 78 insertions(+), 60 deletions(-) diff --git a/docs/Plugin-SBOM.md b/docs/Plugin-SBOM.md index 32da21453..99e5550ed 100644 --- a/docs/Plugin-SBOM.md +++ b/docs/Plugin-SBOM.md @@ -121,9 +121,9 @@ The plugin supports several configuration options to control SBOM generation: config_opts['plugin_conf']['sbom_generator_opts'] = { 'generate_sbom': True, # Enable SBOM generation (default: True) 'include_file_components': True, # Include file-level components (default: True) - 'include_file_dependencies': True, # Include file-to-package dependencies (default: True) + 'include_file_dependencies': False, # Include file-to-package dependencies (default: False) 'include_debug_files': False, # Include debug files in file components (default: False) - 'include_man_pages': False, # Include man pages in file components (default: False) + 'include_man_pages': True, # Include man pages in file components (default: True) 'include_toolchain_dependencies': False, # Include build toolchain in dependencies (default: False) } ``` @@ -138,7 +138,7 @@ config_opts['plugin_conf']['sbom_generator_opts'] = { ## Output -The plugin generates a file named `sbom.cyclonedx.json` in the build results directory (typically `/var/lib/mock/fedora-42-x86_64/result/`). The SBOM includes: +The plugin generates a file named `--.sbom` in the build results directory (typically `/var/lib/mock/fedora-42-x86_64/result/`). The SBOM includes: * CycloneDX document metadata * Build timestamp @@ -226,7 +226,8 @@ The plugin generates a file named `sbom.cyclonedx.json` in the build results dir "purl": "pkg:rpm/fedora/package-name@1.0-1.fc42?arch=x86_64", "externalReferences": [ { - "type": "cpe23Type", + "type": "other", + "comment": "CPE 2.3", "url": "cpe:2.3:a:fedora:package-name:1.0:*:*:*:*:*:*:*:*" }, { diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 8027e4650..33ee06cb0 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -11,7 +11,21 @@ import uuid import tempfile import shutil +import sys +import os +import shutil +import json +import subprocess +import hashlib +import time +import re import shlex +import tempfile +import rpm +from datetime import datetime + +import mockbuild.plugins +from mockbuild.trace_decorator import traceLog, getLog requires_api_version = "1.1" # Ensure compatibility with mock API @@ -52,24 +66,24 @@ def __init__(self, plugins, conf, buildroot): def _listSPECSDirectory(self): """Lists the contents of the SPECS directory before building.""" - print("DEBUG: Listing contents of SPECS directory before building:") - print(f"DEBUG: builddir is {self.buildroot.builddir}") - print(f"DEBUG: rootdir is {self.rootdir}") - print(f"DEBUG: resultsdir is {self.buildroot.resultdir}") + self.buildroot.root_log.debug("DEBUG: Listing contents of SPECS directory before building:") + self.buildroot.root_log.debug(f"DEBUG: builddir is {self.buildroot.builddir}") + self.buildroot.root_log.debug(f"DEBUG: rootdir is {self.rootdir}") + self.buildroot.root_log.debug(f"DEBUG: resultsdir is {self.buildroot.resultdir}") # Look for spec file in the build directory build_dir = self.buildroot.builddir specs_dir = os.path.join(build_dir, "SPECS") - print(f"DEBUG: spec dir is {specs_dir}") + self.buildroot.root_log.debug(f"DEBUG: spec dir is {specs_dir}") try: if os.path.exists(specs_dir): specs_files = os.listdir(specs_dir) - print(f"Contents of SPECS directory: {specs_files}") + self.buildroot.root_log.debug(f"Contents of SPECS directory: {specs_files}") else: - print("SPECS directory does not exist.") + self.buildroot.root_log.debug("SPECS directory does not exist.") except Exception as e: - print(f"Failed to list contents of SPECS directory: {e}") + self.buildroot.root_log.debug(f"Failed to list contents of SPECS directory: {e}") def _create_cyclonedx_document(self): """Creates the base CycloneDX document structure.""" @@ -85,15 +99,13 @@ def _create_cyclonedx_document(self): def _create_metadata(self): """Creates CycloneDX metadata object with Mock-specific build information.""" - from datetime import datetime - metadata = { "timestamp": datetime.utcnow().isoformat() + "Z", "tools": [ { "vendor": "Mock", "name": "mock-sbom-generator", - "version": "1.2.5" + "version": self.buildroot.config.get('version', 'unknown') } ], "lifecycles": [ @@ -168,7 +180,7 @@ def _evaluate_rpm_macro(self, macro): if output: return output.strip() except Exception as exc: # pylint: disable=broad-except - print(f"Warning: failed to eval macro {macro} in chroot: {exc}") + self.buildroot.root_log.debug(f"Warning: failed to eval macro {macro} in chroot: {exc}") try: result = subprocess.run( cmd, @@ -179,7 +191,7 @@ def _evaluate_rpm_macro(self, macro): ) return result.stdout.strip() except subprocess.CalledProcessError as exc: - print(f"Warning: failed to eval macro {macro}: {exc}") + self.buildroot.root_log.debug(f"Warning: failed to eval macro {macro}: {exc}") return "" def _read_file_from_chroot(self, relative_path): @@ -260,7 +272,7 @@ def _contains_flag(flag): for token in ["-d_fortify_source", "_fortify_source="] ), "build:hardening:pie_enabled": any( - token in flag_union for token in ["-fpie", "-fpie", "-pie"] + token in flag_union for token in ["-fpie", "-pie"] ), "build:hardening:relro_enabled": any( token in flag_union @@ -330,7 +342,7 @@ def _generateSBOMPostBuildHook(self): break if not rpm_files and not src_rpm_files and not spec_file: - print("No RPM, source RPM, or spec file found for SBOM generation.") + self.buildroot.root_log.debug("No RPM, source RPM, or spec file found for SBOM generation.") return # Create CycloneDX document @@ -367,10 +379,11 @@ def _generateSBOMPostBuildHook(self): # Construct output filename using package name-version-release format # These should always be available in a proper mock build if not build_subject_name or not build_subject_version or not build_subject_release: - print(f"WARNING: Missing package metadata - name: {build_subject_name}, version: {build_subject_version}, release: {build_subject_release}") - print("Cannot generate SBOM with proper filename - build metadata incomplete") + self.buildroot.root_log.debug(f"WARNING: Missing package metadata - name: {build_subject_name}, version: {build_subject_version}, release: {build_subject_release}") + self.buildroot.root_log.debug("Cannot generate SBOM with proper filename - build metadata incomplete") return + import os sbom_filename = f"{build_subject_name}-{build_subject_version}-{build_subject_release}.sbom" out_file = os.path.join(self.buildroot.resultdir, sbom_filename) @@ -752,12 +765,13 @@ def _generateSBOMPostBuildHook(self): bom["dependencies"] = list(dependencies_dict.values()) # Write CycloneDX BOM + import json with open(out_file, "w") as f: json.dump(bom, f, indent=2) - print(f"CycloneDX SBOM successfully written to: {out_file}") + self.buildroot.root_log.debug(f"CycloneDX SBOM successfully written to: {out_file}") except Exception as e: - print(f"An error occurred during SBOM generation: {e}") + self.buildroot.root_log.debug(f"An error occurred during SBOM generation: {e}") import traceback traceback.print_exc() finally: @@ -1410,9 +1424,9 @@ def _signature_info_to_properties(self, signature_info): def parse_spec_file(self, spec_path): """Parses a spec file to extract source and patch files with their hashes and signatures.""" - print("Parsing spec file") + self.buildroot.root_log.debug("Parsing spec file") if not os.path.isfile(spec_path): - print(f"Spec file not found: {spec_path}") + self.buildroot.root_log.debug(f"Spec file not found: {spec_path}") return [] sources = [] @@ -1453,12 +1467,12 @@ def parse_spec_file(self, spec_path): actual_hash = None if os.path.isfile(file_path): actual_hash = self.hash_file(file_path) - print(f"Found source file {actual_filename} at {file_path}, hash: {actual_hash}") + self.buildroot.root_log.debug(f"Found source file {actual_filename} at {file_path}, hash: {actual_hash}") elif hash_value: actual_hash = hash_value - print(f"Using hash from spec file for {actual_filename}: {hash_value}") + self.buildroot.root_log.debug(f"Using hash from spec file for {actual_filename}: {hash_value}") else: - print(f"Source file {actual_filename} not found at {file_path}") + self.buildroot.root_log.debug(f"Source file {actual_filename} not found at {file_path}") # Check for digital signature (GPG signature) signature = self.get_file_signature(file_path) if os.path.isfile(file_path) else None @@ -1469,9 +1483,9 @@ def parse_spec_file(self, spec_path): "digital_signature": signature }) - print(f"Extracted source and patch files from spec: {sources}") + self.buildroot.root_log.debug(f"Extracted source and patch files from spec: {sources}") except Exception as e: - print(f"Failed to parse spec file {spec_path}: {e}") + self.buildroot.root_log.debug(f"Failed to parse spec file {spec_path}: {e}") return sources def get_file_signature(self, file_path): @@ -1493,7 +1507,7 @@ def get_file_signature(self, file_path): return None except Exception as e: - print(f"Failed to check signature for {file_path}: {e}") + self.buildroot.root_log.debug(f"Failed to check signature for {file_path}: {e}") return None def get_iso_timestamp(self): @@ -1580,7 +1594,7 @@ def detect_chroot_distribution(self): else: return "unknown" except Exception as e: - print(f"Failed to detect chroot distribution: {e}") + self.buildroot.root_log.debug(f"Failed to detect chroot distribution: {e}") return "unknown" def get_build_toolchain_packages(self): @@ -1632,10 +1646,10 @@ def get_build_toolchain_packages(self): "cpe": cpe, "checksum": package_checksum }) - print(f"Found {len(packages)} build toolchain packages") + self.buildroot.root_log.debug(f"Found {len(packages)} build toolchain packages") return packages except Exception as e: - print(f"Failed to get build environment packages: {e}") + self.buildroot.root_log.debug(f"Failed to get build environment packages: {e}") return [] def get_package_checksum_from_chroot(self, package_name): @@ -1655,15 +1669,15 @@ def get_package_checksum_from_chroot(self, package_name): if output and output.strip() and output.strip() != "(none)" and not output.strip().startswith("error"): # It's SHA-1, but it's better than nothing - print(f"Warning: Using SHA-1 for {package_name}, SHA-256 not available") + self.buildroot.root_log.debug(f"Warning: Using SHA-1 for {package_name}, SHA-256 not available") return output.strip().lower() # No header checksum available - print(f"Warning: No checksum available for {package_name}") + self.buildroot.root_log.debug(f"Warning: No checksum available for {package_name}") return None except Exception as e: - print(f"Failed to get checksum for package {package_name}: {e}") + self.buildroot.root_log.debug(f"Failed to get checksum for package {package_name}: {e}") return None def get_package_signature_from_chroot(self, package_name): @@ -1721,7 +1735,7 @@ def get_package_signature_from_chroot(self, package_name): return signature_info except Exception as e: - print(f"Failed to get signature for package {package_name}: {e}") + self.buildroot.root_log.debug(f"Failed to get signature for package {package_name}: {e}") return { "signature_type": "unknown", "signature_valid": False, @@ -1742,11 +1756,11 @@ def get_package_detailed_signature(self, package_name): # If host rpm command failed (empty output), try running inside chroot if not output.strip(): - print(f"Host RPM command failed for {package_name}, trying inside chroot...") + self.buildroot.root_log.debug(f"Host RPM command failed for {package_name}, trying inside chroot...") # Use buildroot's doChroot method to run the command inside the chroot cmd = ["rpm", "-qi", package_name] output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) - print(f"Chroot RPM output for {package_name}: {output[:200]}...") # Debug output + self.buildroot.root_log.debug(f"Chroot RPM output for {package_name}: {output[:200]}...") # Debug output signature_info = { "signature_type": None, @@ -1761,17 +1775,17 @@ def get_package_detailed_signature(self, package_name): output_lines = output.splitlines() i = 0 signature_found = False - print(f"DEBUG: Processing {len(output_lines)} lines for package {package_name}") + self.buildroot.root_log.debug(f"DEBUG: Processing {len(output_lines)} lines for package {package_name}") while i < len(output_lines): line = output_lines[i].strip() - print(f"DEBUG: Line {i}: '{line}'") + self.buildroot.root_log.debug(f"DEBUG: Line {i}: '{line}'") if line.startswith("Signature"): signature_found = True - print(f"DEBUG: Found signature line: '{line}'") + self.buildroot.root_log.debug(f"DEBUG: Found signature line: '{line}'") # Extract the signature data after the colon sig_data = line.split(":", 1)[1].strip() if ":" in line else "" signature_info["raw_signature_data"] = sig_data - print(f"DEBUG: Extracted signature data: '{sig_data}'") + self.buildroot.root_log.debug(f"DEBUG: Extracted signature data: '{sig_data}'") if sig_data and sig_data != "(none)" and sig_data != "": signature_info["signature_type"] = "GPG" @@ -1818,7 +1832,7 @@ def get_package_detailed_signature(self, package_name): return signature_info except Exception as e: - print(f"Failed to get detailed signature for package {package_name}: {e}") + self.buildroot.root_log.debug(f"Failed to get detailed signature for package {package_name}: {e}") return { "signature_type": "unknown", "signature_valid": False, @@ -1828,7 +1842,7 @@ def get_package_detailed_signature(self, package_name): def get_rpm_metadata(self, rpm_path): """Extracts metadata from an RPM file.""" if not os.path.isfile(rpm_path): - print(f"RPM file not found: {rpm_path}") + self.buildroot.root_log.debug(f"RPM file not found: {rpm_path}") return {} # Use individual rpm queries instead of trying to output JSON directly @@ -1863,14 +1877,14 @@ def get_rpm_metadata(self, rpm_path): value = "(none)" metadata[field_name] = value - print(f"RPM metadata extracted: {metadata}") + self.buildroot.root_log.debug(f"RPM metadata extracted: {metadata}") return metadata except subprocess.CalledProcessError as e: - print(f"RPM command failed for {rpm_path}: {e.stderr}") + self.buildroot.root_log.debug(f"RPM command failed for {rpm_path}: {e.stderr}") return {} except Exception as e: - print(f"Failed to extract RPM metadata: {e}") + self.buildroot.root_log.debug(f"Failed to extract RPM metadata: {e}") return {} def get_rpm_file_list(self, rpm_path): @@ -1879,10 +1893,10 @@ def get_rpm_file_list(self, rpm_path): try: result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) files = result.stdout.splitlines() - print(f"Files in RPM {rpm_path}: {files}") + self.buildroot.root_log.debug(f"Files in RPM {rpm_path}: {files}") return files except subprocess.CalledProcessError as e: - print(f"Failed to get file list for {rpm_path}: {e.stderr}") + self.buildroot.root_log.debug(f"Failed to get file list for {rpm_path}: {e.stderr}") return [] def get_rpm_file_info(self, rpm_path): @@ -1911,10 +1925,10 @@ def get_rpm_file_info(self, rpm_path): "owner": owner, "group": group } - print(f"File info for RPM {rpm_path}: {file_info}") + self.buildroot.root_log.debug(f"File info for RPM {rpm_path}: {file_info}") return file_info except subprocess.CalledProcessError as e: - print(f"Failed to get file info for {rpm_path}: {e.stderr}") + self.buildroot.root_log.debug(f"Failed to get file info for {rpm_path}: {e.stderr}") return {} def get_rpm_dependencies(self, rpm_path): @@ -1923,10 +1937,10 @@ def get_rpm_dependencies(self, rpm_path): try: result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) dependencies = result.stdout.splitlines() - print(f"Dependencies for RPM {rpm_path}: {dependencies}") + self.buildroot.root_log.debug(f"Dependencies for RPM {rpm_path}: {dependencies}") return dependencies except subprocess.CalledProcessError as e: - print(f"Failed to get dependencies for {rpm_path}: {e.stderr}") + self.buildroot.root_log.debug(f"Failed to get dependencies for {rpm_path}: {e.stderr}") return [] def get_rpm_signature(self, rpm_path): @@ -1938,11 +1952,11 @@ def get_rpm_signature(self, rpm_path): if line.startswith("Signature"): # Extract the signature data after the colon sig_data = line.split(":", 1)[1].strip() if ":" in line else "" - print(f"GPG Signature for {rpm_path}: {sig_data}") + self.buildroot.root_log.debug(f"GPG Signature for {rpm_path}: {sig_data}") return sig_data return None except subprocess.CalledProcessError as e: - print(f"Failed to get GPG signature for {rpm_path}: {e.stderr}") + self.buildroot.root_log.debug(f"Failed to get GPG signature for {rpm_path}: {e.stderr}") return None def hash_file(self, file_path): @@ -1954,12 +1968,15 @@ def hash_file(self, file_path): sha256.update(chunk) return sha256.hexdigest() except Exception as e: - print(f"Failed to hash file {file_path}: {e}") + self.buildroot.root_log.debug(f"Failed to hash file {file_path}: {e}") return None def extract_source_files_from_srpm(self, src_rpm_path): """Extracts source files from a source RPM.""" - print(f"Extracting source files from source RPM: {src_rpm_path}") + import tempfile + import shutil + import shlex + self.buildroot.root_log.debug(f"Extracting source files from source RPM: {src_rpm_path}") source_files = [] try: temp_dir = tempfile.mkdtemp(prefix="sbom-srpm-") @@ -1967,7 +1984,7 @@ def extract_source_files_from_srpm(self, src_rpm_path): extract_cmd = f"rpm2cpio {shlex.quote(src_rpm_path)} | cpio -idm 2>/dev/null" subprocess.run(extract_cmd, shell=True, cwd=temp_dir, check=True) except subprocess.CalledProcessError as e: - print(f"Failed to unpack source RPM {src_rpm_path}: {e}") + self.buildroot.root_log.debug(f"Failed to unpack source RPM {src_rpm_path}: {e}") shutil.rmtree(temp_dir, ignore_errors=True) return source_files From 6425f70e265e7120ac865974cb0120d68ca2c15b Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 31 Dec 2025 12:24:00 -0500 Subject: [PATCH 06/25] Update license to GPL 2.0 or later Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 33ee06cb0..5ffd82360 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1,5 +1,6 @@ # Copyright (C) 2025, Atomicorp, Inc. -# SPDX-License-Identifier: GPL-2.0-only +# License: GPL2 or later see COPYING +# SPDX-License-Identifier: GPL-2.0-or-later import os import json From a851a6ac503c1a43b8045caa49cfb6f600a40ccf Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 10:15:28 -0500 Subject: [PATCH 07/25] docs: Update SBOM plugin availability to 6.7 Signed-off-by: Scott R. Shinn --- docs/Plugin-SBOM.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Plugin-SBOM.md b/docs/Plugin-SBOM.md index 99e5550ed..ce0e5f301 100644 --- a/docs/Plugin-SBOM.md +++ b/docs/Plugin-SBOM.md @@ -322,4 +322,4 @@ This SBOM generator leverages Mock's unique build environment visibility: * **RPM-Native Intelligence**: Deep integration with RPM metadata, spec files, and package signatures * **Reproducible Build Context**: Complete build environment fingerprinting for reproducibility verification -Available since version 6.1. \ No newline at end of file +Available since version 6.7. \ No newline at end of file From dad5c9f38898ad1ba0237ff00818210986e1cff3 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 10:31:26 -0500 Subject: [PATCH 08/25] feat: Refactor SBOM generator and add release notes Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 27 +++++++++++++++---- .../release-notes-next/sbom-generator.feature | 6 +++++ 2 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 releng/release-notes-next/sbom-generator.feature diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 5ffd82360..2c5ccaf38 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1,6 +1,5 @@ # Copyright (C) 2025, Atomicorp, Inc. # License: GPL2 or later see COPYING -# SPDX-License-Identifier: GPL-2.0-or-later import os import json @@ -36,6 +35,14 @@ def init(plugins, conf, buildroot): # Ensure configuration exists for the plugin if "sbom_generator_opts" not in conf: conf["sbom_generator_opts"] = {} + + # Check for valid SBOM type configuration + opts = conf["sbom_generator_opts"] + if "type" in opts and opts["type"] != "cyclonedx": + # We only support cyclonedx for now + buildroot.root_log.warning(f"SBOM generator type '{opts['type']}' not supported, defaulting to 'cyclonedx'") + opts["type"] = "cyclonedx" + SBOMGenerator(plugins, conf["sbom_generator_opts"], buildroot) class SBOMGenerator(object): @@ -326,8 +333,18 @@ def _generateSBOMPostBuildHook(self): try: build_dir = self.buildroot.resultdir # Filter out source RPMs from binary RPM processing - rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.rpm') and not f.endswith('.src.rpm')] - src_rpm_files = [f for f in os.listdir(build_dir) if f.endswith('.src.rpm')] + rpm_files = [] + src_rpm_files = [] + + # Use os.scandir for better performance + with os.scandir(build_dir) as entries: + for entry in entries: + if not entry.is_file(): + continue + if entry.name.endswith('.rpm') and not entry.name.endswith('.src.rpm'): + rpm_files.append(entry.name) + elif entry.name.endswith('.src.rpm'): + src_rpm_files.append(entry.name) # Look for spec file in the build directory (during build process) build_build_dir = os.path.join(self.buildroot.rootdir, "builddir/build") @@ -1513,8 +1530,8 @@ def get_file_signature(self, file_path): def get_iso_timestamp(self): """Returns the current time in ISO 8601 format.""" - from datetime import datetime - return datetime.utcnow().isoformat() + "Z" + from datetime import datetime, timezone + return datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') def get_distribution(self): """Returns the distribution name and version from /etc/os-release.""" diff --git a/releng/release-notes-next/sbom-generator.feature b/releng/release-notes-next/sbom-generator.feature new file mode 100644 index 000000000..280a7b702 --- /dev/null +++ b/releng/release-notes-next/sbom-generator.feature @@ -0,0 +1,6 @@ +[cyclonedx] +Add support for generating Software Bill of Materials (SBOM) for built packages. +This plugin generates an SBOM in CycloneDX format, including detailed information +about the build environment, source files, and dependencies. +The plugin can be enabled via configuration or command line arguments. +This feature is available as an experimental plugin. From 0710ab2d2ee6468510a850d75e976c7ccd67c8a0 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 11:07:55 -0500 Subject: [PATCH 09/25] fix: Use rpm2archive instead of rpm2cpio for source extraction Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 23 ++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 2c5ccaf38..5eadc45c1 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1999,9 +1999,26 @@ def extract_source_files_from_srpm(self, src_rpm_path): try: temp_dir = tempfile.mkdtemp(prefix="sbom-srpm-") try: - extract_cmd = f"rpm2cpio {shlex.quote(src_rpm_path)} | cpio -idm 2>/dev/null" - subprocess.run(extract_cmd, shell=True, cwd=temp_dir, check=True) - except subprocess.CalledProcessError as e: + # Use rpm2archive instead of rpm2cpio to handle large files (>4GB) + # rpm2archive creates a .tgz file in the current directory + extract_cmd = ["rpm2archive", src_rpm_path] + subprocess.run(extract_cmd, cwd=temp_dir, check=True, stderr=subprocess.PIPE, text=True) + + # Find the generated archive (should be only one file ending in .tgz or .tar.gz) + archive_file = None + for f in os.listdir(temp_dir): + if f.endswith(".tgz") or f.endswith(".tar.gz"): + archive_file = os.path.join(temp_dir, f) + break + + if archive_file: + tar_cmd = ["tar", "-xf", archive_file] + subprocess.run(tar_cmd, cwd=temp_dir, check=True) + os.remove(archive_file) + else: + self.buildroot.root_log.debug(f"rpm2archive did not produce expected output for {src_rpm_path}") + + except (subprocess.CalledProcessError, OSError) as e: self.buildroot.root_log.debug(f"Failed to unpack source RPM {src_rpm_path}: {e}") shutil.rmtree(temp_dir, ignore_errors=True) return source_files From 9b9f66229ef656f09298204cb4c35c558389546c Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 12:44:02 -0500 Subject: [PATCH 10/25] fix: Refactor SBOM generator to use python-rpm and fix local variable error Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 89 ++++++++++++++------- 1 file changed, 61 insertions(+), 28 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 5eadc45c1..44449a8e1 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -401,7 +401,6 @@ def _generateSBOMPostBuildHook(self): self.buildroot.root_log.debug("Cannot generate SBOM with proper filename - build metadata incomplete") return - import os sbom_filename = f"{build_subject_name}-{build_subject_version}-{build_subject_release}.sbom" out_file = os.path.join(self.buildroot.resultdir, sbom_filename) @@ -1918,35 +1917,69 @@ def get_rpm_file_list(self, rpm_path): return [] def get_rpm_file_info(self, rpm_path): - """Extracts file hashes, ownership, and permissions from an RPM file using 'rpm -qp --dump'.""" - cmd = ["rpm", "-qp", "--dump", rpm_path] + """Extracts file hashes, ownership, and permissions from an RPM file using rpm-python.""" + file_info = {} try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - file_info = {} - for line in result.stdout.splitlines(): - parts = line.split() - if len(parts) >= 8: - file_path = parts[0] - sha256 = parts[3] - # If the hash is all zeroes, treat as None - if sha256 == "0" * 64 or sha256 == "0000000000000000000000000000000000000000000000000000000000000000": - sha256 = None - - # Parse permissions (field 4), owner (field 5), group (field 6) - permissions = parts[4] if len(parts) > 4 else None - owner = parts[5] if len(parts) > 5 else None - group = parts[6] if len(parts) > 6 else None - - file_info[file_path] = { - "sha256": sha256, - "permissions": permissions, - "owner": owner, - "group": group - } - self.buildroot.root_log.debug(f"File info for RPM {rpm_path}: {file_info}") + ts = rpm.TransactionSet() + ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES | rpm._RPMVSF_NODIGESTS) + with open(rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) + + # Use dirnames/basenames/dirindexes to construct paths reliably + dirnames = hdr[rpm.RPMTAG_DIRNAMES] + basenames = hdr[rpm.RPMTAG_BASENAMES] + dirindexes = hdr[rpm.RPMTAG_DIRINDEXES] + + filedigests = hdr[rpm.RPMTAG_FILEDIGESTS] + filemodes = hdr[rpm.RPMTAG_FILEMODES] + fileusernames = hdr[rpm.RPMTAG_FILEUSERNAME] + filegroupnames = hdr[rpm.RPMTAG_FILEGROUPNAME] + + for i, basename in enumerate(basenames): + dirname = dirnames[dirindexes[i]] + + # Decode bytes to strings if needed + if isinstance(dirname, bytes): + dirname = dirname.decode('utf-8', 'replace') + if isinstance(basename, bytes): + basename = basename.decode('utf-8', 'replace') + + filename = os.path.join(dirname, basename) + + digest = filedigests[i] + if isinstance(digest, bytes): + digest = digest.decode('utf-8') + + # Empty digest usually means empty string or all zeros + if not digest: + digest = None + + mode = filemodes[i] + # Format permissions as octal string (e.g., 0100755) to match rpm --dump format + permissions = "0%o" % mode + + owner = fileusernames[i] + if isinstance(owner, bytes): + owner = owner.decode('utf-8', 'replace') + + group = filegroupnames[i] + if isinstance(group, bytes): + group = group.decode('utf-8', 'replace') + + file_info[filename] = { + "sha256": digest, + "permissions": permissions, + "owner": owner, + "group": group + } + + self.buildroot.root_log.debug(f"File info for RPM {rpm_path}: {len(file_info)} files processed") return file_info - except subprocess.CalledProcessError as e: - self.buildroot.root_log.debug(f"Failed to get file info for {rpm_path}: {e.stderr}") + + except Exception as e: + self.buildroot.root_log.debug(f"Failed to get file info for {rpm_path}: {e}") + import traceback + self.buildroot.root_log.debug(traceback.format_exc()) return {} def get_rpm_dependencies(self, rpm_path): From 32fba57a2becbcdfa4a8c4e48cebff4511d3e1a9 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 12:47:35 -0500 Subject: [PATCH 11/25] fix: Replace shutil.rmtree with mockbuild.file_util.rmtree for safer cleanup Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 44449a8e1..d0055ba55 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -10,13 +10,10 @@ import socket import uuid import tempfile -import shutil import sys import os -import shutil import json import subprocess -import hashlib import time import re import shlex @@ -2025,8 +2022,9 @@ def hash_file(self, file_path): def extract_source_files_from_srpm(self, src_rpm_path): """Extracts source files from a source RPM.""" import tempfile - import shutil import shlex + import mockbuild.file_util + self.buildroot.root_log.debug(f"Extracting source files from source RPM: {src_rpm_path}") source_files = [] try: @@ -2053,7 +2051,7 @@ def extract_source_files_from_srpm(self, src_rpm_path): except (subprocess.CalledProcessError, OSError) as e: self.buildroot.root_log.debug(f"Failed to unpack source RPM {src_rpm_path}: {e}") - shutil.rmtree(temp_dir, ignore_errors=True) + mockbuild.file_util.rmtree(temp_dir) return source_files for root_dir, _, files in os.walk(temp_dir): @@ -2069,7 +2067,7 @@ def extract_source_files_from_srpm(self, src_rpm_path): "digital_signature": signature }) try: - shutil.rmtree(temp_dir) + mockbuild.file_util.rmtree(temp_dir) except Exception: pass From ef8c84bf2e8750ea4c7d9157e9b909bc7a9cb05b Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 12:54:42 -0500 Subject: [PATCH 12/25] fix: Use python-distro to detect chroot distribution Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index d0055ba55..7fde2fa88 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1592,19 +1592,13 @@ def generate_cpe(self, package_name, package_version, vendor=None): return cpe def detect_chroot_distribution(self): - """Detects the distribution name inside the chroot by reading /etc/os-release.""" + """Detects the distribution name inside the chroot using python-distro.""" try: - # Use buildroot's doChroot to cat /etc/os-release - cmd = ["cat", "/etc/os-release"] - output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) - distro = None - if output: - for line in output.splitlines(): - if line.startswith("ID="): - distro = line.strip().split("=", 1)[1].strip('"').lower() - break - if distro: - return distro + import distro + # Query the chroot filesystem directly using root_dir parameter + distro_id = distro.id(root_dir=self.buildroot.rootdir) + if distro_id: + return distro_id.lower() else: return "unknown" except Exception as e: From 97f7c85087037215fede3407a60c648d993d12e1 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 12:59:15 -0500 Subject: [PATCH 13/25] refactor: Rename _convert_to_chroot_path to from_chroot_path Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 7fde2fa88..e60bf873f 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1445,7 +1445,7 @@ def parse_spec_file(self, spec_path): sources = [] try: - chroot_spec_path = self._convert_to_chroot_path(spec_path) + chroot_spec_path = self.from_chroot_path(spec_path) # Use rpmspec --parse inside the build chroot to ensure macro expansion matches the build cmd = ["rpmspec", "--parse", chroot_spec_path] result, _ = self.buildroot.doChroot( @@ -2071,7 +2071,7 @@ def extract_source_files_from_srpm(self, src_rpm_path): return source_files - def _convert_to_chroot_path(self, host_path): + def from_chroot_path(self, host_path): """Convert an absolute host path into the corresponding path inside the build chroot.""" rootdir = getattr(self.buildroot, "rootdir", "") if not rootdir: From 03c63d30263f4962bbae2abb902ffd84014cec7e Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 13:03:44 -0500 Subject: [PATCH 14/25] refactor: Remove redundant special-case CPE vendor/product mappings Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index e60bf873f..67679c0a5 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1570,23 +1570,6 @@ def generate_cpe(self, package_name, package_version, vendor=None): if '-' in version: version = version.split('-')[0] # Remove release part - # Handle special cases for common packages - if package_name == "glibc": - vendor = "gnu" - product = "glibc" - elif package_name == "openssl": - vendor = "openssl" - product = "openssl" - elif package_name == "gcc": - vendor = "gnu" - product = "gcc" - elif package_name == "make": - vendor = "gnu" - product = "make" - elif package_name == "gettext": - vendor = "gnu" - product = "gettext" - # Generate CPE cpe = f"cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:*" return cpe From 830853758a7ff478635fbeb88c4d0d29ea134ddd Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Wed, 7 Jan 2026 13:04:49 -0500 Subject: [PATCH 15/25] fix: Replace deprecated datetime.utcnow() with timezone-aware datetime.now() Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 67679c0a5..4153fa803 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -19,7 +19,7 @@ import shlex import tempfile import rpm -from datetime import datetime +from datetime import datetime, timezone import mockbuild.plugins from mockbuild.trace_decorator import traceLog, getLog @@ -105,7 +105,7 @@ def _create_cyclonedx_document(self): def _create_metadata(self): """Creates CycloneDX metadata object with Mock-specific build information.""" metadata = { - "timestamp": datetime.utcnow().isoformat() + "Z", + "timestamp": datetime.now(timezone.utc).isoformat(), "tools": [ { "vendor": "Mock", From be45f13601437f0b29f9fe0d01b54456621c2af4 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Thu, 29 Jan 2026 15:11:25 -0500 Subject: [PATCH 16/25] Lint revisions Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 1490 +++++++++---------- 1 file changed, 738 insertions(+), 752 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 4153fa803..97bf2733d 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -1,50 +1,56 @@ -# Copyright (C) 2025, Atomicorp, Inc. +# -*- coding: utf-8 -*- +# vim:expandtab:autoindent:tabstop=4:shiftwidth=4:filetype=python:textwidth=0: # License: GPL2 or later see COPYING +# Written by Scott R. Shinn +# Copyright (C) 2025, Atomicorp, Inc. +"""Mock plugin for generating CycloneDX SBOMs from built RPM packages.""" import os import json import subprocess -from mockbuild.trace_decorator import traceLog import hashlib import re import socket import uuid import tempfile -import sys -import os -import json -import subprocess -import time -import re import shlex -import tempfile -import rpm +import traceback from datetime import datetime, timezone -import mockbuild.plugins -from mockbuild.trace_decorator import traceLog, getLog +import distro +import rpm # pylint: disable=no-member + + +import mockbuild.file_util +from mockbuild.trace_decorator import traceLog + +# pylint: disable=invalid-name requires_api_version = "1.1" # Ensure compatibility with mock API +# pylint: enable=invalid-name # Plugin entry point @traceLog() def init(plugins, conf, buildroot): + """Initializes the SBOM generator plugin.""" # Ensure configuration exists for the plugin if "sbom_generator_opts" not in conf: conf["sbom_generator_opts"] = {} - + # Check for valid SBOM type configuration opts = conf["sbom_generator_opts"] if "type" in opts and opts["type"] != "cyclonedx": # We only support cyclonedx for now - buildroot.root_log.warning(f"SBOM generator type '{opts['type']}' not supported, defaulting to 'cyclonedx'") + buildroot.root_log.warning( + f"SBOM generator type '{opts['type']}' not supported, defaulting to 'cyclonedx'" + ) opts["type"] = "cyclonedx" - + SBOMGenerator(plugins, conf["sbom_generator_opts"], buildroot) -class SBOMGenerator(object): +class SBOMGenerator: """Generates SBOM for the built packages.""" - # pylint: disable=too-few-public-methods + # pylint: disable=too-few-public-methods,too-many-instance-attributes @traceLog() def __init__(self, plugins, conf, buildroot): @@ -55,7 +61,7 @@ def __init__(self, plugins, conf, buildroot): self.conf = conf self.sbom_enabled = self.conf.get('generate_sbom', True) self.sbom_done = False - + # Configuration options for file-level dependencies and filtering self.include_file_dependencies = self.conf.get('include_file_dependencies', False) self.include_file_components = self.conf.get('include_file_components', True) @@ -63,12 +69,12 @@ def __init__(self, plugins, conf, buildroot): self.include_man_pages = self.conf.get('include_man_pages', True) self.include_source_dependencies = self.conf.get('include_source_dependencies', True) self.include_toolchain_dependencies = self.conf.get('include_toolchain_dependencies', False) - - plugins.add_hook("prebuild", self._listSPECSDirectory) - plugins.add_hook("postbuild", self._generateSBOMPostBuildHook) + + plugins.add_hook("prebuild", self._list_specs_directory) + plugins.add_hook("postbuild", self._generate_sbom_post_build_hook) @traceLog() - def _listSPECSDirectory(self): + def _list_specs_directory(self): """Lists the contents of the SPECS directory before building.""" self.buildroot.root_log.debug("DEBUG: Listing contents of SPECS directory before building:") @@ -87,7 +93,7 @@ def _listSPECSDirectory(self): self.buildroot.root_log.debug(f"Contents of SPECS directory: {specs_files}") else: self.buildroot.root_log.debug("SPECS directory does not exist.") - except Exception as e: + except OSError as e: self.buildroot.root_log.debug(f"Failed to list contents of SPECS directory: {e}") def _create_cyclonedx_document(self): @@ -127,35 +133,35 @@ def _create_metadata(self): ], "properties": [] } - + # Add Mock-specific build metadata as properties properties = metadata["properties"] - + # Add SBOM completeness declaration properties.append({ "name": "sbom:completeness", "value": "complete" }) - + properties.append({ "name": "mock:build:host", "value": socket.gethostname() }) - - distro = self.get_distribution() - if distro: + + distro_name = self.get_distribution() + if distro_name: properties.append({ "name": "mock:build:distribution", - "value": distro + "value": distro_name }) - + # Add chroot information if available if hasattr(self.buildroot, 'rootdir') and self.buildroot.rootdir: properties.append({ "name": "mock:build:chroot", "value": self.buildroot.rootdir }) - + # Add Mock config if available if hasattr(self.buildroot, 'config') and self.buildroot.config: config_name = self.buildroot.config.get('config_path', 'unknown') @@ -163,11 +169,11 @@ def _create_metadata(self): "name": "mock:build:config", "value": config_name }) - + hardening_props = self._collect_build_hardening_properties() if hardening_props: properties.extend(hardening_props) - + return metadata def _evaluate_rpm_macro(self, macro): @@ -185,7 +191,9 @@ def _evaluate_rpm_macro(self, macro): if output: return output.strip() except Exception as exc: # pylint: disable=broad-except - self.buildroot.root_log.debug(f"Warning: failed to eval macro {macro} in chroot: {exc}") + self.buildroot.root_log.debug( + f"Warning: failed to eval macro {macro} in chroot: {exc}" + ) try: result = subprocess.run( cmd, @@ -235,7 +243,7 @@ def _collect_build_hardening_properties(self): "build:hardening:global_ldflags": "%{?__global_ldflags}", "build:hardening:build_ldflags": "%{?build_ldflags}", } - + properties = [] macro_values = {} for prop_name, macro in macro_queries.items(): @@ -246,7 +254,7 @@ def _collect_build_hardening_properties(self): "name": prop_name, "value": value }) - + cflags_combined = " ".join( filter( None, @@ -267,10 +275,10 @@ def _collect_build_hardening_properties(self): ) ).lower() flag_union = f"{cflags_combined} {ldflags_combined}" - + def _contains_flag(flag): return flag in flag_union if flag_union else False - + feature_map = { "build:hardening:fortify_enabled": any( token in flag_union @@ -290,50 +298,43 @@ def _contains_flag(flag): "name": name, "value": "true" if enabled else "false" }) - + fips_value = self._read_file_from_chroot("/proc/sys/crypto/fips_enabled") if fips_value != "": properties.append({ "name": "build:hardening:fips_enabled", "value": "true" if fips_value.strip() == "1" else "false" }) - + return properties - def _generate_purl(self, package_name, version, distro=None, arch=None): + def _generate_purl(self, package_name, version, distro_obj=None, arch=None): """Generate Package URL (PURL) for RPM package.""" - if not distro: - distro = self.detect_chroot_distribution() or "fedora" - + if not distro_obj: + distro_obj = self.detect_chroot_distribution() or "fedora" + # Clean package name for PURL (lowercase, no special chars) clean_name = re.sub(r'[^a-zA-Z0-9._-]', '-', package_name.lower()) - - purl = f"pkg:rpm/{distro}/{clean_name}@{version}" + + purl = f"pkg:rpm/{distro_obj}/{clean_name}@{version}" if arch: purl += f"?arch={arch}" return purl - def _generate_bom_ref(self, package_name, version, component_type="package"): + def _generate_bom_ref(self, package_name, version, _component_type="package"): """Generate a unique bom-ref identifier for a component.""" # Use PURL as bom-ref for consistency - distro = self.detect_chroot_distribution() or "fedora" - return self._generate_purl(package_name, version, distro) + distro_obj = self.detect_chroot_distribution() or "fedora" + return self._generate_purl(package_name, version, distro_obj) - @traceLog() - def _generateSBOMPostBuildHook(self): - if self.sbom_done or not self.sbom_enabled: - return - - state_text = "Generating CycloneDX SBOM for built packages v1.0" - self.state.start(state_text) + def _find_build_artifacts(self, build_dir): + """Locates RPMs, source RPMs, and spec files in the build directory.""" + rpm_files = [] + src_rpm_files = [] + spec_file = None + # Use os.scandir for better performance try: - build_dir = self.buildroot.resultdir - # Filter out source RPMs from binary RPM processing - rpm_files = [] - src_rpm_files = [] - - # Use os.scandir for better performance with os.scandir(build_dir) as entries: for entry in entries: if not entry.is_file(): @@ -342,478 +343,403 @@ def _generateSBOMPostBuildHook(self): rpm_files.append(entry.name) elif entry.name.endswith('.src.rpm'): src_rpm_files.append(entry.name) - - # Look for spec file in the build directory (during build process) - build_build_dir = os.path.join(self.buildroot.rootdir, "builddir/build") - spec_file = None - if os.path.exists(build_build_dir): - # Look for spec file in the build directory - for root, dirs, files in os.walk(build_build_dir): + except OSError as e: + self.buildroot.root_log.debug(f"Failed to scan build directory {build_dir}: {e}") + + # Look for spec file in the chroot build directory + build_build_dir = os.path.join(self.buildroot.rootdir, "builddir/build") + if os.path.exists(build_build_dir): + try: + for root, _dirs, files in os.walk(build_build_dir): for file in files: if file.endswith('.spec'): spec_file = os.path.join(root, file) break if spec_file: break + except OSError as e: + self.buildroot.root_log.debug( + f"Failed to scan chroot build dir {build_build_dir}: {e}" + ) + + return rpm_files, src_rpm_files, spec_file + + def _get_build_subject_metadata(self, spec_file, src_rpm_files, build_dir): + """Determines the build subject metadata (name, version, release).""" + build_subject_name = None + build_subject_version = None + build_subject_release = None + source_files = [] + + if spec_file: + build_subject_name = os.path.splitext(os.path.basename(spec_file))[0] + parsed_sources = self.parse_spec_file(spec_file) + if parsed_sources: + source_files = parsed_sources + + if src_rpm_files: + srpm_path = os.path.join(build_dir, src_rpm_files[0]) + srpm_metadata = self.get_rpm_metadata(srpm_path) + if srpm_metadata: + if not build_subject_name: + build_subject_name = srpm_metadata.get("name") + if not build_subject_version: + build_subject_version = srpm_metadata.get("version") + if not build_subject_release: + build_subject_release = srpm_metadata.get("release") + + if not source_files: + # Extract from source RPM if available + source_files = self.extract_source_files_from_srpm(srpm_path) + + return ( + build_subject_name, build_subject_version, + build_subject_release, source_files + ) + + def _add_source_components(self, bom, source_files): + """Converts source files to CycloneDX components and returns metadata entries.""" + source_component_entries = [] + for source_file in source_files: + component = self._create_source_file_component(source_file) + if component: + bom["components"].append(component) + filename = source_file.get("filename") + source_component_entries.append({ + "filename": filename, + "bom_ref": component.get("bom-ref"), + "type": "patch" if self._is_patch_file(filename) else "source" + }) + return source_component_entries + + def _add_toolchain_components(self, bom, build_toolchain_packages, distro_id): + """Adds toolchain components to the BOM and returns their bom-refs.""" + toolchain_bom_refs = [] + for toolchain_pkg in build_toolchain_packages: + component = self._create_toolchain_component(toolchain_pkg, distro_id) + if component: + bom_ref = component.get("bom-ref") + if bom_ref: + toolchain_bom_refs.append(bom_ref) + bom["components"].append(component) + return toolchain_bom_refs + + @traceLog() + # pylint: disable=too-many-locals + def _generate_sbom_post_build_hook(self): + """Plugin hook called after the build is complete.""" + if self.sbom_done or not self.sbom_enabled: + return + + state_text = "Generating CycloneDX SBOM for built packages v1.0" + self.state.start(state_text) + + try: + build_dir = self.buildroot.resultdir + rpm_files, src_rpm_files, spec_file = self._find_build_artifacts(build_dir) if not rpm_files and not src_rpm_files and not spec_file: - self.buildroot.root_log.debug("No RPM, source RPM, or spec file found for SBOM generation.") + self.buildroot.root_log.debug( + "No RPM, source RPM, or spec file found for SBOM generation." + ) return - # Create CycloneDX document - bom = self._create_cyclonedx_document() - - # Get build toolchain packages - build_toolchain_packages = self.get_build_toolchain_packages() - - # Process source files from spec file - source_files = [] - source_component_entries = [] - build_subject_name = None - build_subject_version = None - build_subject_release = None - - if spec_file: - build_subject_name = os.path.splitext(os.path.basename(spec_file))[0] - parsed_sources = self.parse_spec_file(spec_file) - if parsed_sources: - source_files = parsed_sources - - srpm_metadata = None - if src_rpm_files: - srpm_path = os.path.join(build_dir, src_rpm_files[0]) - srpm_metadata = self.get_rpm_metadata(srpm_path) - if srpm_metadata: - if not build_subject_name: - build_subject_name = srpm_metadata.get("name") - if not build_subject_version: - build_subject_version = srpm_metadata.get("version") - if not build_subject_release: - build_subject_release = srpm_metadata.get("release") - + # Get build subject metadata + ( + build_subject_name, build_subject_version, + build_subject_release, source_files + ) = self._get_build_subject_metadata(spec_file, src_rpm_files, build_dir) + # Construct output filename using package name-version-release format - # These should always be available in a proper mock build if not build_subject_name or not build_subject_version or not build_subject_release: - self.buildroot.root_log.debug(f"WARNING: Missing package metadata - name: {build_subject_name}, version: {build_subject_version}, release: {build_subject_release}") - self.buildroot.root_log.debug("Cannot generate SBOM with proper filename - build metadata incomplete") + self.buildroot.root_log.debug("Cannot generate SBOM - build metadata incomplete") return - - sbom_filename = f"{build_subject_name}-{build_subject_version}-{build_subject_release}.sbom" + + sbom_filename = ( + f"{build_subject_name}-{build_subject_version}-{build_subject_release}.sbom" + ) out_file = os.path.join(self.buildroot.resultdir, sbom_filename) - if not source_files and src_rpm_files: - # Extract from source RPM if available - src_rpm_path = os.path.join(build_dir, src_rpm_files[0]) - source_files = self.extract_source_files_from_srpm(src_rpm_path) - - # Convert source files to CycloneDX components - for source_file in source_files: - component = self._create_source_file_component(source_file) - if component: - bom["components"].append(component) - filename = source_file.get("filename") - source_component_entries.append({ - "filename": filename, - "bom_ref": component.get("bom-ref"), - "type": "patch" if self._is_patch_file(filename) else "source" - }) + # Create CycloneDX document + bom = self._create_cyclonedx_document() + build_toolchain_packages = self.get_build_toolchain_packages() - # Convert build toolchain packages to components - distro = self.detect_chroot_distribution() or "fedora" - toolchain_bom_refs = [] - for toolchain_pkg in build_toolchain_packages: - component = self._create_toolchain_component(toolchain_pkg, distro) - if component: - bom_ref = component.get("bom-ref") - if bom_ref: - toolchain_bom_refs.append(bom_ref) - bom["components"].append(component) + # Add source and toolchain components + source_component_entries = self._add_source_components(bom, source_files) + distro_id = self.detect_chroot_distribution() or "fedora" + toolchain_bom_refs = self._add_toolchain_components( + bom, build_toolchain_packages, distro_id + ) # Process binary RPMs and convert to components - built_package_bom_refs = [] - component_map = {} # Map package names to bom-refs for dependency resolution - primary_rpm_metadata = None # Store metadata from primary package for metadata enhancement - - # Build component map from toolchain packages - for toolchain_pkg in build_toolchain_packages: - pkg_name = toolchain_pkg.get("name") - pkg_version = toolchain_pkg.get("version") - if pkg_name and pkg_version: - purl = self._generate_purl(pkg_name, pkg_version, distro) - component_map[pkg_name.lower()] = purl - - for rpm_file in rpm_files: - rpm_path = os.path.join(build_dir, rpm_file) - component = self._create_built_package_component(rpm_path, distro, source_component_entries) - if component: - bom_ref = component.get("bom-ref") - package_name = component.get("name") - package_version = component.get("version") - if bom_ref: - built_package_bom_refs.append(bom_ref) - # Add to component map for dependency resolution - if package_name: - component_map[package_name.lower()] = bom_ref - bom["components"].append(component) - - # Store metadata from primary package (prefer main package matching build subject) - if not primary_rpm_metadata: - # Prefer the main package over debuginfo packages - if not package_name or 'debuginfo' not in package_name.lower(): - primary_rpm_metadata = self.get_rpm_metadata(rpm_path) - else: - # If we have metadata, check if we should replace it with a better match - current_name = primary_rpm_metadata.get('name', '').lower() - is_current_debuginfo = 'debuginfo' in current_name - is_current_main = build_subject_name and current_name == build_subject_name.lower() - - # Replace if: current is debuginfo and new is not, OR new matches build subject name - should_replace = False - if is_current_debuginfo and package_name and 'debuginfo' not in package_name.lower(): - should_replace = True - elif build_subject_name and package_name and package_name.lower() == build_subject_name.lower(): - # New package matches build subject name - always prefer it - should_replace = True - - if should_replace: - primary_rpm_metadata = self.get_rpm_metadata(rpm_path) - - # Create file components for files within this package - if package_name and package_version and self.include_file_components: - file_components = self._create_file_components(rpm_path, package_name, package_version) - for file_comp in file_components: - bom["components"].append(file_comp) - # Create dependency: file depends on package (only if configured) - if self._should_include_file_dependency(file_comp.get("name", "")): - file_dep = { - "ref": file_comp["bom-ref"], - "dependsOn": [bom_ref] - } - bom["dependencies"].append(file_dep) - - # Create dependency entry for runtime dependencies (libraries/RPMs) - dependencies = self.get_rpm_dependencies(rpm_path) - runtime_dependency = self._create_dependency(bom_ref, dependencies, component_map, distro) - - # Build dependsOn array with runtime dependencies and optionally toolchain - all_depends_on = [] - - # Add runtime dependencies (libraries/RPMs this package depends on) - if runtime_dependency and runtime_dependency.get("dependsOn"): - for dep_ref in runtime_dependency.get("dependsOn", []): - if dep_ref not in all_depends_on: - all_depends_on.append(dep_ref) - - # Add toolchain dependencies if configured (build-time dependencies) - if self.include_toolchain_dependencies and toolchain_bom_refs: - for toolchain_ref in toolchain_bom_refs: - if toolchain_ref not in all_depends_on: - all_depends_on.append(toolchain_ref) - - # Deduplicate final dependsOn array - all_depends_on = list(set(all_depends_on)) - - # Create dependency entry if we have any dependencies - if all_depends_on: - combined_dep = { - "ref": bom_ref, - "dependsOn": all_depends_on - } - bom["dependencies"].append(combined_dep) - elif runtime_dependency: - # Fall back to just runtime dependencies if no other deps - bom["dependencies"].append(runtime_dependency) - - # Note: Source code relationships are represented in component properties - # (mock:source:files, mock:source:refs, mock:patch:files, mock:patch:refs) - # rather than in dependencies, as source code is a build input, not a runtime dependency - - # Add RPM-specific metadata to metadata.properties - if primary_rpm_metadata: - rpm_props = bom["metadata"]["properties"] - - # Add buildhost if available - buildhost = primary_rpm_metadata.get("buildhost") - if buildhost and buildhost != "(none)": - rpm_props.append({ - "name": "mock:rpm:buildhost", - "value": buildhost - }) - - # Add buildtime if available - buildtime = primary_rpm_metadata.get("buildtime") - if buildtime and buildtime != "(none)": - rpm_props.append({ - "name": "mock:rpm:buildtime", - "value": buildtime - }) - - # Add source RPM if available - sourcerpm = primary_rpm_metadata.get("sourcerpm") - if sourcerpm and sourcerpm != "(none)": - rpm_props.append({ - "name": "mock:rpm:sourcerpm", - "value": sourcerpm - }) - - # Add group if available - group = primary_rpm_metadata.get("group") - if group and group != "(none)": - rpm_props.append({ - "name": "mock:rpm:group", - "value": group - }) - - # Add epoch if available and not empty - epoch = primary_rpm_metadata.get("epoch") - if epoch and epoch != "(none)" and epoch.strip(): - rpm_props.append({ - "name": "mock:rpm:epoch", - "value": epoch - }) - - # Add distribution if available - distribution = primary_rpm_metadata.get("distribution") - if distribution and distribution != "(none)": - rpm_props.append({ - "name": "mock:rpm:distribution", - "value": distribution - }) - - # Add manufacture field if vendor is available - vendor = primary_rpm_metadata.get("vendor") - if vendor and vendor != "(none)": - bom["metadata"]["manufacture"] = { - "name": vendor - } - # Also add as authors (sbomqs expects this) - bom["metadata"]["authors"] = [ - { - "name": vendor - } - ] - - # Add supplier (from Packager field) - packager = primary_rpm_metadata.get("packager") - if packager and packager != "(none)": - bom["metadata"]["supplier"] = { - "name": packager - } - - # Add metadata.component representing what this SBOM is about - # Use the primary built package(s) or create an aggregate component - if built_package_bom_refs: - # For single package builds, use that package - # For multi-package builds, use the first/main package or create aggregate - if len(built_package_bom_refs) == 1: - # Single package: use it as the component - primary_ref = built_package_bom_refs[0] - primary_component = next((c for c in bom["components"] if c.get("bom-ref") == primary_ref), None) - if primary_component: - component_obj = { - "type": primary_component.get("type", "application"), - "name": primary_component.get("name"), - "version": primary_component.get("version"), - "bom-ref": primary_ref, - "purl": primary_component.get("purl") - } - - # Add description if available - if primary_component.get("description"): - component_obj["description"] = primary_component.get("description") - elif primary_rpm_metadata: - summary = primary_rpm_metadata.get("summary") - if summary and summary != "(none)": - component_obj["description"] = summary - - # Add externalReferences - external_refs = [] - if primary_rpm_metadata: - # Add source RPM reference - sourcerpm = primary_rpm_metadata.get("sourcerpm") - if sourcerpm and sourcerpm != "(none)": - external_refs.append({ - "type": "distribution", - "url": sourcerpm - }) - # Add project URL - url = primary_rpm_metadata.get("url") - if url and url != "(none)": - external_refs.append({ - "type": "website", - "url": url - }) - if external_refs: - component_obj["externalReferences"] = external_refs - - # Add license information - if primary_component.get("licenses"): - component_obj["licenses"] = primary_component.get("licenses") - elif primary_rpm_metadata: - license_str = primary_rpm_metadata.get("license") - if license_str and license_str != "(none)": - component_obj["licenses"] = [ - { - "license": { - "id": license_str - } - } - ] - - bom["metadata"]["component"] = component_obj - else: - # Multi-package build: create aggregate component that represents the full build output - first_pkg = next((c for c in bom["components"] if c.get("bom-ref") == built_package_bom_refs[0]), None) - if first_pkg: - aggregate_name = build_subject_name or first_pkg.get("name", "unknown") - aggregate_version = None - if build_subject_version and build_subject_release: - aggregate_version = f"{build_subject_version}-{build_subject_release}" - elif primary_rpm_metadata: - meta_version = primary_rpm_metadata.get("version") - meta_release = primary_rpm_metadata.get("release") - if meta_version and meta_release: - aggregate_version = f"{meta_version}-{meta_release}" - if not aggregate_version: - aggregate_version = first_pkg.get("version", "unknown") - - # Build description - prefer summary from RPM, fall back to generic description - description = f"Build output containing {len(built_package_bom_refs)} package(s)" - if primary_rpm_metadata: - summary = primary_rpm_metadata.get("summary") - if summary and summary != "(none)": - description = f"{summary} (build output containing {len(built_package_bom_refs)} package(s))" - - component_obj = { - "type": "application", - "name": aggregate_name, - "version": aggregate_version, - "bom-ref": f"build-output:{aggregate_name}", - "description": description - } - - if aggregate_name and aggregate_version: - component_obj["purl"] = self._generate_purl(aggregate_name, aggregate_version, distro) - elif first_pkg.get("purl"): - component_obj["purl"] = first_pkg.get("purl") - - # Add externalReferences - external_refs = [] - if primary_rpm_metadata: - # Add source RPM reference - sourcerpm = primary_rpm_metadata.get("sourcerpm") - if sourcerpm and sourcerpm != "(none)": - external_refs.append({ - "type": "distribution", - "url": sourcerpm - }) - # Add project URL - url = primary_rpm_metadata.get("url") - if url and url != "(none)": - external_refs.append({ - "type": "website", - "url": url - }) - if external_refs: - component_obj["externalReferences"] = external_refs - - # Add license information - if first_pkg.get("licenses"): - component_obj["licenses"] = first_pkg.get("licenses") - elif primary_rpm_metadata: - license_str = primary_rpm_metadata.get("license") - if license_str and license_str != "(none)": - component_obj["licenses"] = [ - { - "license": { - "id": license_str - } - } - ] - - bom["metadata"]["component"] = component_obj - - # Add dependency entries for all components that don't have them - # CycloneDX requires every component to have a dependency entry - # Use dictionary to ensure uniqueness (no duplicates) - dependencies_dict = {} - - # Start with existing dependencies (from built packages) - for dep in bom["dependencies"]: - ref = dep.get("ref") - if ref: - dependencies_dict[ref] = dep - - # Add entries for source file components (no dependencies) - for source_entry in source_component_entries: - bom_ref = source_entry.get("bom_ref") - if bom_ref and bom_ref not in dependencies_dict: - dependencies_dict[bom_ref] = { - "ref": bom_ref, - "dependsOn": [] - } - - # Add entries for toolchain components (no dependencies tracked for now) - for toolchain_pkg in build_toolchain_packages: - pkg_name = toolchain_pkg.get("name") - pkg_version = toolchain_pkg.get("version") - if pkg_name and pkg_version: - purl = self._generate_purl(pkg_name, pkg_version, distro) - if purl and purl not in dependencies_dict: - dependencies_dict[purl] = { - "ref": purl, - "dependsOn": [] - } - - # Add entries for any remaining components - # (in case we have components that weren't explicitly handled) - for component in bom["components"]: - comp_bom_ref = component.get("bom-ref") - if comp_bom_ref and comp_bom_ref not in dependencies_dict: - dependencies_dict[comp_bom_ref] = { - "ref": comp_bom_ref, - "dependsOn": [] - } - - # Replace dependencies array with deduplicated list - bom["dependencies"] = list(dependencies_dict.values()) + ( + built_package_bom_refs, primary_rpm_metadata + ) = self._process_built_packages( + bom, rpm_files, build_dir, distro_id, source_component_entries, + build_subject_name, build_toolchain_packages, toolchain_bom_refs + ) + + # Add RPM-specific metadata and finalize dependencies + self._finalize_bom_metadata(bom, primary_rpm_metadata, built_package_bom_refs, + build_subject_name, build_subject_version, + build_subject_release, distro_id) + self._finalize_dependencies(bom, source_component_entries, + build_toolchain_packages, distro_id) # Write CycloneDX BOM - import json - with open(out_file, "w") as f: + with open(out_file, "w", encoding="utf-8") as f: json.dump(bom, f, indent=2) self.buildroot.root_log.debug(f"CycloneDX SBOM successfully written to: {out_file}") + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"An error occurred during SBOM generation: {e}") - import traceback traceback.print_exc() finally: self.sbom_done = True self.state.finish(state_text) - def _create_built_package_component(self, rpm_path, distro, source_components=None): + # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-positional-arguments + def _process_built_packages(self, bom, rpm_files, build_dir, distro_id, + source_component_entries, build_subject_name, + build_toolchain_packages, toolchain_bom_refs): + """Processes binary RPMs and creates CycloneDX components and dependencies.""" + built_package_bom_refs = [] + component_map = {} + primary_rpm_metadata = None + + # Build component map from toolchain packages + for toolchain_pkg in build_toolchain_packages: + pkg_name = toolchain_pkg.get("name") + pkg_version = toolchain_pkg.get("version") + if pkg_name and pkg_version: + purl = self._generate_purl(pkg_name, pkg_version, distro_id) + component_map[pkg_name.lower()] = purl + + for rpm_file in rpm_files: + rpm_path = os.path.join(build_dir, rpm_file) + component = self._create_built_package_component( + rpm_path, distro_id, source_component_entries + ) + if not component: + continue + + bom_ref = component.get("bom-ref") + package_name = component.get("name") + package_version = component.get("version") + + if bom_ref: + built_package_bom_refs.append(bom_ref) + if package_name: + component_map[package_name.lower()] = bom_ref + + bom["components"].append(component) + + # Determine primary RPM metadata + if not primary_rpm_metadata: + if not package_name or 'debuginfo' not in package_name.lower(): + primary_rpm_metadata = self.get_rpm_metadata(rpm_path) + else: + current_name = primary_rpm_metadata.get('name', '').lower() + is_current_debuginfo = 'debuginfo' in current_name + should_replace = False + if (is_current_debuginfo and package_name and + 'debuginfo' not in package_name.lower()): + should_replace = True + elif (build_subject_name and package_name and + package_name.lower() == build_subject_name.lower()): + should_replace = True + + if should_replace: + primary_rpm_metadata = self.get_rpm_metadata(rpm_path) + + # File components + if package_name and package_version and self.include_file_components: + file_components = self._create_file_components( + rpm_path, package_name, package_version + ) + for file_comp in file_components: + bom["components"].append(file_comp) + if self._should_include_file_dependency(file_comp.get("name", "")): + bom["dependencies"].append({ + "ref": file_comp["bom-ref"], + "dependsOn": [bom_ref] + }) + + # Dependencies + dependencies = self.get_rpm_dependencies(rpm_path) + runtime_dependency = self._create_dependency( + bom_ref, dependencies, component_map, distro_id + ) + + all_depends_on = [] + if runtime_dependency and runtime_dependency.get("dependsOn"): + all_depends_on.extend(runtime_dependency.get("dependsOn")) + + if self.include_toolchain_dependencies and toolchain_bom_refs: + for t_ref in toolchain_bom_refs: + if t_ref not in all_depends_on: + all_depends_on.append(t_ref) + + all_depends_on = list(set(all_depends_on)) + if all_depends_on: + bom["dependencies"].append({"ref": bom_ref, "dependsOn": all_depends_on}) + elif runtime_dependency: + bom["dependencies"].append(runtime_dependency) + + return built_package_bom_refs, primary_rpm_metadata + + # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-positional-arguments + def _finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_refs, + build_subject_name, build_subject_version, + build_subject_release, distro_id): + """Adds RPM-specific metadata and component information to the BOM.""" + if primary_rpm_metadata: + rpm_props = bom["metadata"]["properties"] + for key, prop_name in [("buildhost", "mock:rpm:buildhost"), + ("buildtime", "mock:rpm:buildtime"), + ("sourcerpm", "mock:rpm:sourcerpm"), + ("group", "mock:rpm:group"), + ("epoch", "mock:rpm:epoch"), + ("distribution", "mock:rpm:distribution")]: + val = primary_rpm_metadata.get(key) + if val and val != "(none)" and (key != "epoch" or val.strip()): + rpm_props.append({"name": prop_name, "value": val}) + + vendor = primary_rpm_metadata.get("vendor") + if vendor and vendor != "(none)": + bom["metadata"]["manufacture"] = {"name": vendor} + bom["metadata"]["authors"] = [{"name": vendor}] + + packager = primary_rpm_metadata.get("packager") + if packager and packager != "(none)": + bom["metadata"]["supplier"] = {"name": packager} + + if built_package_bom_refs: + if len(built_package_bom_refs) == 1: + primary_ref = built_package_bom_refs[0] + primary_component = next((c for c in bom["components"] + if c.get("bom-ref") == primary_ref), None) + if primary_component: + component_obj = { + "type": primary_component.get("type", "application"), + "name": primary_component.get("name"), + "version": primary_component.get("version"), + "bom-ref": primary_ref, + "purl": primary_component.get("purl") + } + if primary_component.get("description"): + component_obj["description"] = primary_component.get("description") + elif primary_rpm_metadata: + summary = primary_rpm_metadata.get("summary") + if summary and summary != "(none)": + component_obj["description"] = summary + + external_refs = [] + if primary_rpm_metadata: + sourcerpm = primary_rpm_metadata.get("sourcerpm") + if sourcerpm and sourcerpm != "(none)": + external_refs.append({"type": "distribution", "url": sourcerpm}) + url = primary_rpm_metadata.get("url") + if url and url != "(none)": + external_refs.append({"type": "website", "url": url}) + if external_refs: + component_obj["externalReferences"] = external_refs + + if primary_component.get("licenses"): + component_obj["licenses"] = primary_component.get("licenses") + elif primary_rpm_metadata: + lic = primary_rpm_metadata.get("license") + if lic and lic != "(none)": + component_obj["licenses"] = [{"license": {"id": lic}}] + bom["metadata"]["component"] = component_obj + else: + first_pkg = next((c for c in bom["components"] + if c.get("bom-ref") == built_package_bom_refs[0]), None) + if first_pkg: + aggregate_name = build_subject_name or first_pkg.get("name", "unknown") + aggregate_version = None + if build_subject_version and build_subject_release: + aggregate_version = f"{build_subject_version}-{build_subject_release}" + elif primary_rpm_metadata: + v = primary_rpm_metadata.get("version") + r = primary_rpm_metadata.get("release") + if v and r: + aggregate_version = f"{v}-{r}" + if not aggregate_version: + aggregate_version = first_pkg.get("version", "unknown") + + description = ( + f"Build output containing {len(built_package_bom_refs)} package(s)" + ) + if primary_rpm_metadata: + summary = primary_rpm_metadata.get("summary") + if summary and summary != "(none)": + description = f"{summary} ({description})" + + component_obj = { + "type": "application", + "name": aggregate_name, + "version": aggregate_version, + "bom-ref": f"build-output:{aggregate_name}", + "description": description + } + if aggregate_name and aggregate_version: + component_obj["purl"] = self._generate_purl( + aggregate_name, aggregate_version, distro_id + ) + bom["metadata"]["component"] = component_obj + + # pylint: disable=too-many-locals,too-many-branches,too-many-statements + def _finalize_dependencies(self, bom, source_component_entries, + build_toolchain_packages, distro_id): + """Ensures every component has a dependency entry.""" + dependencies_dict = {dep.get("ref"): dep for dep in bom["dependencies"] if dep.get("ref")} + + for source_entry in source_component_entries: + ref = source_entry.get("bom_ref") + if ref and ref not in dependencies_dict: + dependencies_dict[ref] = {"ref": ref, "dependsOn": []} + + for toolchain_pkg in build_toolchain_packages: + name, ver = toolchain_pkg.get("name"), toolchain_pkg.get("version") + if name and ver: + purl = self._generate_purl(name, ver, distro_id) + if purl and purl not in dependencies_dict: + dependencies_dict[purl] = {"ref": purl, "dependsOn": []} + + for component in bom["components"]: + ref = component.get("bom-ref") + if ref and ref not in dependencies_dict: + dependencies_dict[ref] = {"ref": ref, "dependsOn": []} + + bom["dependencies"] = list(dependencies_dict.values()) + + + def _create_built_package_component( + self, rpm_path, distro_obj, _source_components=None + ): """Creates a CycloneDX component for a built RPM package.""" package_data = self.get_rpm_metadata(rpm_path) if not package_data: return None - + package_name = package_data.get("name") version = package_data.get("version") release = package_data.get("release") arch = package_data.get("arch") - + # Combine version and release full_version = f"{version}-{release}" if release else version - + # Generate PURL and bom-ref - purl = self._generate_purl(package_name, full_version, distro, arch) + purl = self._generate_purl(package_name, full_version, distro_obj, arch) bom_ref = purl - + # Determine component type (application vs library) # Most RPMs are libraries, but we could check for executables component_type = "library" - + component = { "type": component_type, "bom-ref": bom_ref, @@ -821,7 +747,7 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No "version": full_version, "purl": purl } - + # Add external references (CPE) cpe = self.generate_cpe(package_name, version) if cpe: @@ -832,7 +758,7 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No "url": cpe } ] - + # Add hash of RPM file rpm_hash = self.hash_file(rpm_path) if rpm_hash: @@ -842,7 +768,7 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No "content": rpm_hash } ] - + # Add license information license_str = package_data.get("license") if license_str and license_str != "(none)": @@ -853,24 +779,24 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No } } ] - + # Add supplier information (from Packager field) packager = package_data.get("packager") if packager and packager != "(none)": component["supplier"] = { "name": packager } - + # Add properties for RPM metadata properties = [] - + vendor = package_data.get("vendor") if vendor and vendor != "(none)": properties.append({ "name": "mock:rpm:vendor", "value": vendor }) - + packager = package_data.get("packager") if packager and packager != "(none)": properties.append({ @@ -919,7 +845,7 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No "name": "mock:rpm:distribution", "value": distribution }) - + url = package_data.get("url") if url and url != "(none)": component["externalReferences"] = component.get("externalReferences", []) @@ -927,18 +853,18 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No "type": "website", "url": url }) - + summary = package_data.get("summary") if summary and summary != "(none)": component["description"] = summary - + # Add GPG signature information if available signature = self.get_rpm_signature(rpm_path) if signature: # Parse signature info sig_props = self._parse_signature_to_properties(signature) properties.extend(sig_props) - + # Note: Source/patch file relationships are represented in component properties # (mock:source:files, mock:source:refs, mock:patch:files, mock:patch:refs) # but are removed from individual package components to reduce noise. @@ -946,7 +872,7 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No if properties: component["properties"] = properties - + # Add external reference for source RPM if available if sourcerpm and sourcerpm != "(none)": component["externalReferences"] = component.get("externalReferences", []) @@ -954,21 +880,21 @@ def _create_built_package_component(self, rpm_path, distro, source_components=No "type": "distribution", "url": sourcerpm }) - + return component - def _create_toolchain_component(self, toolchain_pkg, distro): + def _create_toolchain_component(self, toolchain_pkg, distro_obj): """Creates a CycloneDX component for a build toolchain package.""" package_name = toolchain_pkg.get("name") version = toolchain_pkg.get("version") - + if not package_name or not version: return None - + # Generate PURL and bom-ref - purl = self._generate_purl(package_name, version, distro) + purl = self._generate_purl(package_name, version, distro_obj) bom_ref = purl - + component = { "type": "library", "bom-ref": bom_ref, @@ -976,7 +902,7 @@ def _create_toolchain_component(self, toolchain_pkg, distro): "version": version, "purl": purl } - + # Add checksum if available checksum = toolchain_pkg.get("checksum") if checksum and checksum != "error" and not checksum.startswith("error"): @@ -987,14 +913,14 @@ def _create_toolchain_component(self, toolchain_pkg, distro): alg = "SHA-1" else: alg = "SHA-256" # Default assumption - + component["hashes"] = [ { "alg": alg, "content": checksum } ] - + # Add CPE cpe = toolchain_pkg.get("cpe") if cpe: @@ -1005,7 +931,7 @@ def _create_toolchain_component(self, toolchain_pkg, distro): "url": cpe } ] - + # Add license license_str = toolchain_pkg.get("licenseDeclared") if license_str and license_str != "(none)": @@ -1016,22 +942,22 @@ def _create_toolchain_component(self, toolchain_pkg, distro): } } ] - + # Add properties properties = [] - + # Mark as build toolchain properties.append({ "name": "mock:role", "value": "build-toolchain" }) - + # Add signature information signature_info = toolchain_pkg.get("digital_signature", {}) if signature_info: sig_props = self._signature_info_to_properties(signature_info) properties.extend(sig_props) - + # Add build date if available build_date = signature_info.get("build_date") if build_date: @@ -1039,10 +965,10 @@ def _create_toolchain_component(self, toolchain_pkg, distro): "name": "mock:build:date", "value": build_date }) - + if properties: component["properties"] = properties - + return component def _create_source_file_component(self, source_file): @@ -1050,20 +976,20 @@ def _create_source_file_component(self, source_file): filename = source_file.get("filename") if not filename: return None - + # Generate bom-ref from filename and hash sha256 = source_file.get("sha256") if sha256: bom_ref = f"file:{filename}#{sha256[:16]}" else: bom_ref = f"file:{filename}" - + component = { "type": "file", "bom-ref": bom_ref, "name": filename } - + # Add hash if sha256: component["hashes"] = [ @@ -1072,17 +998,17 @@ def _create_source_file_component(self, source_file): "content": sha256 } ] - + # Add properties properties = [] - + source_type = "patch" if self._is_patch_file(filename) else "source" - + properties.append({ "name": "mock:source:type", "value": source_type }) - + # Add signature information if available signature = source_file.get("digital_signature") if signature: @@ -1090,10 +1016,10 @@ def _create_source_file_component(self, source_file): "name": "mock:signature:info", "value": signature }) - + if properties: component["properties"] = properties - + return component def _is_patch_file(self, filename): @@ -1101,7 +1027,11 @@ def _is_patch_file(self, filename): if not filename: return False lower_name = filename.lower() - return lower_name.startswith("patch") or lower_name.endswith(".patch") or lower_name.endswith(".diff") + return ( + lower_name.startswith("patch") or + lower_name.endswith(".patch") or + lower_name.endswith(".diff") + ) def _format_epoch_timestamp(self, epoch_value): """Convert epoch timestamp string to ISO8601 if possible.""" @@ -1111,9 +1041,8 @@ def _format_epoch_timestamp(self, epoch_value): epoch_int = int(epoch_value) if epoch_int <= 0: return None - from datetime import datetime, timezone return datetime.fromtimestamp(epoch_int, tz=timezone.utc).isoformat() - except Exception: + except (ValueError, TypeError, OSError, OverflowError): return epoch_value def _append_source_properties(self, properties, source_entries): @@ -1161,60 +1090,63 @@ def _append_source_properties(self, properties, source_entries): def _generate_file_bom_ref(self, package_name, package_version, file_path): """Generates a bom-ref for a file component within a package. - + Format: file:package-name@version:/absolute/path/to/file """ # Normalize file path (ensure it starts with /) if not file_path.startswith('/'): file_path = '/' + file_path - + return f"file:{package_name}@{package_version}:{file_path}" def _should_include_file_dependency(self, file_path): """Determine if a file should have a dependency entry.""" if not self.include_file_dependencies: return False - + # Filter out debug files if configured if not self.include_debug_files: if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): return False - + # Filter out man pages if configured if not self.include_man_pages: - if '/usr/share/man/' in file_path or (file_path.endswith('.gz') and '/man' in file_path): + if ( + '/usr/share/man/' in file_path or + (file_path.endswith('.gz') and '/man' in file_path) + ): return False - + return True def _create_file_components(self, rpm_path, package_name, package_version): """Creates file components for all files in an RPM package.""" if not self.include_file_components: return [] - + file_list = self.get_rpm_file_list(rpm_path) file_info = self.get_rpm_file_info(rpm_path) - + file_components = [] for file_path in file_list: if not file_path or not file_path.strip(): continue - + # Filter files based on configuration if not self.include_debug_files: if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): continue - + file_data = file_info.get(file_path, {}) file_hash = file_data.get("sha256") - + bom_ref = self._generate_file_bom_ref(package_name, package_version, file_path) component = { "type": "file", "bom-ref": bom_ref, "name": file_path } - + # Add hash if available if file_hash: component["hashes"] = [ @@ -1223,7 +1155,7 @@ def _create_file_components(self, rpm_path, package_name, package_version): "content": file_hash } ] - + # Add properties for file metadata properties = [] if file_data.get("permissions"): @@ -1241,35 +1173,39 @@ def _create_file_components(self, rpm_path, package_name, package_version): "name": "mock:file:group", "value": file_data["group"] }) - + if properties: component["properties"] = properties - + file_components.append(component) - + return file_components - def _get_source_file_bom_refs(self, package_name, source_files): + def _get_source_file_bom_refs(self, _package_name, source_files): """Gets bom-refs for source files that were used to build a package. - + Returns list of bom-refs for source tar.gz and patch files. """ source_bom_refs = [] - + for source_file in source_files: filename = source_file.get("filename", "") if not filename: continue - + # Include source tar.gz files - if filename.endswith('.tar.gz') or filename.endswith('.tar.bz2') or filename.endswith('.tar.xz'): + if ( + filename.endswith('.tar.gz') or + filename.endswith('.tar.bz2') or + filename.endswith('.tar.xz') + ): sha256 = source_file.get("sha256") if sha256: bom_ref = f"file:{filename}#{sha256[:16]}" else: bom_ref = f"file:{filename}" source_bom_refs.append(bom_ref) - + # Include patch files (matching pattern like 00xx*.patch) elif filename.endswith('.patch') or '.patch' in filename.lower(): sha256 = source_file.get("sha256") @@ -1278,57 +1214,57 @@ def _get_source_file_bom_refs(self, package_name, source_files): else: bom_ref = f"file:{filename}" source_bom_refs.append(bom_ref) - + return source_bom_refs - def _create_dependency(self, bom_ref, dependencies, component_map, distro): + def _create_dependency(self, bom_ref, dependencies, component_map, distro_obj): """Creates a CycloneDX dependency entry.""" if not bom_ref: return None - + # Convert dependency strings to bom-refs depends_on = [] for dep in dependencies: # Parse RPM dependency format (e.g., "libc.so.6()(64bit)", "package >= 1.0") - dep_bom_ref = self._dependency_to_bom_ref(dep, component_map, distro) + dep_bom_ref = self._dependency_to_bom_ref(dep, component_map, distro_obj) if dep_bom_ref: depends_on.append(dep_bom_ref) - + # Deduplicate dependsOn array depends_on = list(set(depends_on)) - + if not depends_on: return None - + return { "ref": bom_ref, "dependsOn": depends_on } - def _dependency_to_bom_ref(self, dependency_string, component_map, distro): + def _dependency_to_bom_ref(self, dependency_string, component_map, _distro): """Converts an RPM dependency string to a bom-ref (PURL).""" if not dependency_string: return None - + # RPM dependencies can be complex: "package >= version", "libc.so.6()(64bit)", etc. # Try to extract package name dep = dependency_string.split()[0] if dependency_string else "" - + # Remove version constraints (>=, <=, =, etc.) dep = re.sub(r'\s*[><=!]+\s*.*$', '', dep) - + # Remove parentheses content (e.g., "libc.so.6()(64bit)" -> "libc.so.6") dep = re.sub(r'\(.*?\)', '', dep) dep = dep.strip() - + if not dep or dep.startswith('/'): return None - + # Try to match against known components (case-insensitive) dep_lower = dep.lower() if dep_lower in component_map: return component_map[dep_lower] - + # If no match found, try to find by package name pattern # Some dependencies are library names, try to find matching package for pkg_name, bom_ref in component_map.items(): @@ -1336,7 +1272,7 @@ def _dependency_to_bom_ref(self, dependency_string, component_map, distro): # (e.g., "libc.so.6" might come from "glibc" package) if dep_lower in pkg_name or pkg_name in dep_lower: return bom_ref - + # If still no match, return None (don't create invalid references) return None @@ -1345,13 +1281,14 @@ def _parse_signature_to_properties(self, signature_string): properties = [] if not signature_string or signature_string == "(none)": return properties - - # Parse signature line like: "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" + + # Parse signature like: + # "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" properties.append({ "name": "mock:signature:type", "value": "GPG" }) - + if "RSA/SHA256" in signature_string: properties.append({ "name": "mock:signature:algorithm", @@ -1372,7 +1309,7 @@ def _parse_signature_to_properties(self, signature_string): "name": "mock:signature:algorithm", "value": "Ed25519/SHA256" }) - + # Extract key ID key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', signature_string) if key_id_match: @@ -1380,60 +1317,67 @@ def _parse_signature_to_properties(self, signature_string): "name": "mock:signature:key", "value": key_id_match.group(1) }) - + + # Extract date # Extract date - date_match = re.search(r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', signature_string) + date_match = re.search( + r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', signature_string + ) if date_match: properties.append({ "name": "mock:signature:date", "value": date_match.group(1) }) - + properties.append({ "name": "mock:signature:raw", "value": signature_string }) - + return properties def _signature_info_to_properties(self, signature_info): """Converts signature info dict to CycloneDX properties.""" properties = [] - + sig_type = signature_info.get("signature_type", "unsigned") properties.append({ "name": "mock:signature:type", "value": sig_type }) - - if sig_type != "unsigned" and sig_type != "unknown": + + if ( + sig_type not in ('unsigned', 'unknown') and + 'missing key' not in sig_type and + 'BAD' not in sig_type + ): algorithm = signature_info.get("signature_algorithm") if algorithm: properties.append({ "name": "mock:signature:algorithm", "value": algorithm }) - + key_id = signature_info.get("signature_key") if key_id: properties.append({ "name": "mock:signature:key", "value": key_id }) - + sig_date = signature_info.get("signature_date") if sig_date: properties.append({ "name": "mock:signature:date", "value": sig_date }) - + sig_valid = signature_info.get("signature_valid", False) properties.append({ "name": "mock:signature:valid", "value": str(sig_valid) }) - + return properties def parse_spec_file(self, spec_path): @@ -1442,16 +1386,17 @@ def parse_spec_file(self, spec_path): if not os.path.isfile(spec_path): self.buildroot.root_log.debug(f"Spec file not found: {spec_path}") return [] - + sources = [] try: chroot_spec_path = self.from_chroot_path(spec_path) - # Use rpmspec --parse inside the build chroot to ensure macro expansion matches the build + # Use rpmspec --parse inside the build chroot to insure macro expansion + # matches the build cmd = ["rpmspec", "--parse", chroot_spec_path] result, _ = self.buildroot.doChroot( cmd, shell=False, returnOutput=True, printOutput=False ) - + for line in (result or "").splitlines(): line = line.strip() # Match lines like Source0: or Patch1: @@ -1464,40 +1409,50 @@ def parse_spec_file(self, spec_path): else: filename = source_file hash_value = None - + # Extract actual filename from URL if it's a URL if filename.startswith('http'): # Extract filename from URL (last part after /) actual_filename = filename.split('/')[-1] else: actual_filename = filename - + # Try to find the actual file and calculate its hash build_dir = os.path.dirname(spec_path) # SOURCES directory is at the same level as SPECS, not inside SPECS sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") file_path = os.path.join(sources_dir, actual_filename) - + actual_hash = None if os.path.isfile(file_path): actual_hash = self.hash_file(file_path) - self.buildroot.root_log.debug(f"Found source file {actual_filename} at {file_path}, hash: {actual_hash}") + self.buildroot.root_log.debug( + f"Found source file {actual_filename} at {file_path}, " + f"hash: {actual_hash}" + ) elif hash_value: actual_hash = hash_value - self.buildroot.root_log.debug(f"Using hash from spec file for {actual_filename}: {hash_value}") + self.buildroot.root_log.debug( + f"Using hash from spec file for {actual_filename}: {hash_value}" + ) else: - self.buildroot.root_log.debug(f"Source file {actual_filename} not found at {file_path}") - + self.buildroot.root_log.debug( + f"Source file {actual_filename} not found at {file_path}" + ) + # Check for digital signature (GPG signature) - signature = self.get_file_signature(file_path) if os.path.isfile(file_path) else None - + signature = ( + self.get_file_signature(file_path) if os.path.isfile(file_path) else None + ) + sources.append({ "filename": actual_filename, "sha256": actual_hash, "digital_signature": signature }) - + self.buildroot.root_log.debug(f"Extracted source and patch files from spec: {sources}") + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to parse spec file {spec_path}: {e}") return sources @@ -1509,67 +1464,63 @@ def get_file_signature(self, file_path): asc_file = file_path + ".asc" if os.path.isfile(asc_file): return "GPG signature file exists: " + os.path.basename(asc_file) - + # Check for .sig signature file sig_file = file_path + ".sig" if os.path.isfile(sig_file): return "GPG signature file exists: " + os.path.basename(sig_file) - + # Check if the file itself is a signature if file_path.endswith('.asc') or file_path.endswith('.sig'): return "File is a signature file" - + return None - except Exception as e: + except OSError as e: self.buildroot.root_log.debug(f"Failed to check signature for {file_path}: {e}") return None def get_iso_timestamp(self): """Returns the current time in ISO 8601 format.""" - from datetime import datetime, timezone return datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') def get_distribution(self): """Returns the distribution name and version from /etc/os-release.""" try: - distro = None + distro_name = None version = None if os.path.exists("/etc/os-release"): - with open("/etc/os-release") as f: + with open("/etc/os-release", encoding="utf-8") as f: for line in f: if line.startswith("NAME="): - distro = line.strip().split("=", 1)[1].strip('"') + distro_name = line.strip().split("=", 1)[1].strip('"') elif line.startswith("VERSION_ID="): version = line.strip().split("=", 1)[1].strip('"') - if distro and version: - return f"{distro} {version}" - elif distro: - return distro - else: - return "Unknown" - except Exception as e: + if distro_name and version: + return f"{distro_name} {version}" + return distro_name or "Unknown" + except OSError as e: return f"Unknown ({e})" def generate_cpe(self, package_name, package_version, vendor=None): """Generates a CPE identifier for a package.""" # CPE format: cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:* - + # Default vendor if not provided if not vendor or vendor == "(none)": vendor = "fedora" - + # Clean up vendor name for CPE vendor = re.sub(r'[^a-zA-Z0-9._-]', '_', vendor.lower()) - + # Clean up package name for CPE product = re.sub(r'[^a-zA-Z0-9._-]', '_', package_name.lower()) - + # Clean up version for CPE (remove release part if present) version = package_version if '-' in version: version = version.split('-')[0] # Remove release part - + # Generate CPE cpe = f"cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:*" return cpe @@ -1577,29 +1528,30 @@ def generate_cpe(self, package_name, package_version, vendor=None): def detect_chroot_distribution(self): """Detects the distribution name inside the chroot using python-distro.""" try: - import distro # Query the chroot filesystem directly using root_dir parameter + # pylint: disable=unexpected-keyword-arg distro_id = distro.id(root_dir=self.buildroot.rootdir) if distro_id: return distro_id.lower() - else: - return "unknown" + return "unknown" + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to detect chroot distribution: {e}") return "unknown" def get_build_toolchain_packages(self): - """Returns the list of packages installed in the build toolchain with detailed signature information.""" + """Returns the list of packages installed in the build toolchain + with detailed signature information.""" try: # First get basic package info query = "%{NAME}|%{VERSION}-%{RELEASE}.%{ARCH}|%{LICENSE}|%{BUILDTIME}\n" cmd = ["rpm", "-qa", "--qf", query] - output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) packages = [] cpe_vendor_default = self.detect_chroot_distribution() or "unknown" - import re - import datetime - + for line in output.splitlines(): parts = line.split("|", 3) if len(parts) < 3: @@ -1608,27 +1560,31 @@ def get_build_toolchain_packages(self): package_version = parts[1].strip() package_license = parts[2].strip() build_time = parts[3].strip() if len(parts) > 3 else None - + # Skip GPG keys and other non-package entries - if package_name.startswith('gpg-pubkey') or package_name == '(none)' or not package_name: + if ( + package_name.startswith('gpg-pubkey') or + package_name == '(none)' or + not package_name + ): continue - + # Get detailed signature info for this package digital_signature = self.get_package_signature_from_chroot(package_name) - + # Build date if build_time and build_time.isdigit(): try: - dt = datetime.datetime.utcfromtimestamp(int(build_time)) - digital_signature["build_date"] = dt.isoformat() + "Z" - except Exception: + dt = datetime.fromtimestamp(int(build_time), tz=timezone.utc) + digital_signature["build_date"] = dt.isoformat() + except (ValueError, TypeError, OverflowError): digital_signature["build_date"] = None - + cpe = self.generate_cpe(package_name, package_version, vendor=cpe_vendor_default) - + # Get package checksum (SHA-256 of the RPM file) package_checksum = self.get_package_checksum_from_chroot(package_name) - + packages.append({ "name": package_name, "version": package_version, @@ -1639,6 +1595,7 @@ def get_build_toolchain_packages(self): }) self.buildroot.root_log.debug(f"Found {len(packages)} build toolchain packages") return packages + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to get build environment packages: {e}") return [] @@ -1649,34 +1606,85 @@ def get_package_checksum_from_chroot(self, package_name): # Try different RPM header tags to get a checksum # SHA256HEADER is the SHA256 checksum of the RPM header cmd = ["rpm", "-q", package_name, "--qf", "%{SHA256HEADER}"] - output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) - - if output and output.strip() and output.strip() != "(none)" and not output.strip().startswith("error"): + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + if ( + output and output.strip() and + output.strip() != "(none)" and + not output.strip().startswith("error") + ): return output.strip().lower() - + # Try SHA1HEADER as fallback (older RPMs) cmd = ["rpm", "-q", package_name, "--qf", "%{SHA1HEADER}"] - output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) - - if output and output.strip() and output.strip() != "(none)" and not output.strip().startswith("error"): + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + if ( + output and output.strip() and + output.strip() != "(none)" and + not output.strip().startswith("error") + ): # It's SHA-1, but it's better than nothing - self.buildroot.root_log.debug(f"Warning: Using SHA-1 for {package_name}, SHA-256 not available") + self.buildroot.root_log.debug( + f"Warning: Using SHA-1 for {package_name}, SHA-256 not available" + ) return output.strip().lower() - + # No header checksum available self.buildroot.root_log.debug(f"Warning: No checksum available for {package_name}") return None - + + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to get checksum for package {package_name}: {e}") return None + def _parse_signature_data(self, sig_data, signature_info): + """Parses the raw signature string and updates the signature_info dict.""" + if sig_data and sig_data != "(none)" and sig_data != "": + signature_info["signature_type"] = "GPG" + signature_info["signature_valid"] = True + + # Parse signature line like: + # "RSA/SHA256, Fri 08 Nov 2024 ... Key ID ..." + if "RSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "RSA/SHA256" + elif "DSA/SHA1" in sig_data: + signature_info["signature_algorithm"] = "DSA/SHA1" + elif "ECDSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "ECDSA/SHA256" + elif "Ed25519/SHA256" in sig_data: + signature_info["signature_algorithm"] = "Ed25519/SHA256" + + # Extract key ID + if "Key ID" in sig_data: + key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', sig_data) + if key_id_match: + signature_info["signature_key"] = key_id_match.group(1) + + # Extract date - handle various time formats including EST/EDT + date_match = re.search( + r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', + sig_data + ) + if date_match: + signature_info["signature_date"] = date_match.group(1) + else: + signature_info["signature_type"] = "unsigned" + signature_info["signature_valid"] = False + def get_package_signature_from_chroot(self, package_name): """Gets detailed signature information for a specific package from inside the chroot.""" try: cmd = ["rpm", "-qi", package_name] - output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) - + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + signature_info = { "signature_type": "unsigned", "signature_key": None, @@ -1686,47 +1694,23 @@ def get_package_signature_from_chroot(self, package_name): "raw_signature_data": None, "build_date": None } - + for line in output.splitlines(): line = line.strip() if line.startswith("Signature"): # Extract the signature data after the colon sig_data = line.split(":", 1)[1].strip() if ":" in line else "" signature_info["raw_signature_data"] = sig_data - - if sig_data and sig_data != "(none)" and sig_data != "": - signature_info["signature_type"] = "GPG" - signature_info["signature_valid"] = True - - # Parse signature line like: "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" - if "RSA/SHA256" in sig_data: - signature_info["signature_algorithm"] = "RSA/SHA256" - elif "DSA/SHA1" in sig_data: - signature_info["signature_algorithm"] = "DSA/SHA1" - elif "ECDSA/SHA256" in sig_data: - signature_info["signature_algorithm"] = "ECDSA/SHA256" - elif "Ed25519/SHA256" in sig_data: - signature_info["signature_algorithm"] = "Ed25519/SHA256" - - # Extract key ID - if "Key ID" in sig_data: - key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', sig_data) - if key_id_match: - signature_info["signature_key"] = key_id_match.group(1) - - # Extract date - handle various time formats including EST/EDT - date_match = re.search(r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', sig_data) - if date_match: - signature_info["signature_date"] = date_match.group(1) - else: - signature_info["signature_type"] = "unsigned" - signature_info["signature_valid"] = False + self._parse_signature_data(sig_data, signature_info) break - + return signature_info - + + # pylint: disable=broad-exception-caught except Exception as e: - self.buildroot.root_log.debug(f"Failed to get signature for package {package_name}: {e}") + self.buildroot.root_log.debug( + f"Failed to get signature for package {package_name}: {e}" + ) return { "signature_type": "unknown", "signature_valid": False, @@ -1736,23 +1720,30 @@ def get_package_signature_from_chroot(self, package_name): def get_package_detailed_signature(self, package_name): """Gets detailed signature information for a specific package.""" try: - import subprocess - import shlex # Try to use rpm --root to query from outside the chroot first # If that fails, fall back to running inside the chroot root_path = self.buildroot.rootdir cmd = f"rpm --root {shlex.quote(root_path)} -qi {shlex.quote(package_name)}" - result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + result = subprocess.run( + cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + text=True, check=False + ) output = result.stdout - + # If host rpm command failed (empty output), try running inside chroot if not output.strip(): - self.buildroot.root_log.debug(f"Host RPM command failed for {package_name}, trying inside chroot...") + self.buildroot.root_log.debug( + f"Host RPM command failed for {package_name}, trying inside chroot..." + ) # Use buildroot's doChroot method to run the command inside the chroot cmd = ["rpm", "-qi", package_name] - output, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) - self.buildroot.root_log.debug(f"Chroot RPM output for {package_name}: {output[:200]}...") # Debug output - + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + self.buildroot.root_log.debug( + f"Chroot RPM output for {package_name}: {output[:200]}..." + ) # Debug output + signature_info = { "signature_type": None, "signature_key": None, @@ -1762,11 +1753,13 @@ def get_package_detailed_signature(self, package_name): "raw_signature_data": None, "build_date": None } - + output_lines = output.splitlines() i = 0 signature_found = False - self.buildroot.root_log.debug(f"DEBUG: Processing {len(output_lines)} lines for package {package_name}") + self.buildroot.root_log.debug( + f"DEBUG: Processing {len(output_lines)} lines for package {package_name}" + ) while i < len(output_lines): line = output_lines[i].strip() self.buildroot.root_log.debug(f"DEBUG: Line {i}: '{line}'") @@ -1777,53 +1770,29 @@ def get_package_detailed_signature(self, package_name): sig_data = line.split(":", 1)[1].strip() if ":" in line else "" signature_info["raw_signature_data"] = sig_data self.buildroot.root_log.debug(f"DEBUG: Extracted signature data: '{sig_data}'") - - if sig_data and sig_data != "(none)" and sig_data != "": - signature_info["signature_type"] = "GPG" - signature_info["signature_valid"] = True - - # Parse signature line like: "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" - if "RSA/SHA256" in sig_data: - signature_info["signature_algorithm"] = "RSA/SHA256" - elif "DSA/SHA1" in sig_data: - signature_info["signature_algorithm"] = "DSA/SHA1" - elif "ECDSA/SHA256" in sig_data: - signature_info["signature_algorithm"] = "ECDSA/SHA256" - elif "Ed25519/SHA256" in sig_data: - signature_info["signature_algorithm"] = "Ed25519/SHA256" - - # Extract key ID - if "Key ID" in sig_data: - key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', sig_data) - if key_id_match: - signature_info["signature_key"] = key_id_match.group(1) - - # Extract date - handle various time formats including EST/EDT - date_match = re.search(r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', sig_data) - if date_match: - signature_info["signature_date"] = date_match.group(1) - else: - signature_info["signature_type"] = "unsigned" - signature_info["signature_valid"] = False + self._parse_signature_data(sig_data, signature_info) i += 1 continue - + if line.startswith("Build Date"): # This can help verify the package build time build_date = line.split(":", 1)[1].strip() if ":" in line else None if build_date: signature_info["build_date"] = build_date i += 1 - + # If no signature line was found, mark as unsigned if not signature_found: signature_info["signature_type"] = "unsigned" signature_info["signature_valid"] = False - + return signature_info - + + # pylint: disable=broad-exception-caught except Exception as e: - self.buildroot.root_log.debug(f"Failed to get detailed signature for package {package_name}: {e}") + self.buildroot.root_log.debug( + f"Failed to get detailed signature for package {package_name}: {e}" + ) return { "signature_type": "unknown", "signature_valid": False, @@ -1839,7 +1808,7 @@ def get_rpm_metadata(self, rpm_path): # Use individual rpm queries instead of trying to output JSON directly try: metadata = {} - + # Get each field individually fields = { "name": "%{NAME}", @@ -1858,22 +1827,25 @@ def get_rpm_metadata(self, rpm_path): "group": "%{GROUP}", "distribution": "%{DISTRIBUTION}" } - + for field_name, field_format in fields.items(): cmd = ["rpm", "-qp", rpm_path, "--queryformat", field_format] - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + result = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True + ) value = result.stdout.strip() # Handle empty epoch (rpm returns empty string for no epoch) if field_name == "epoch" and not value: value = "(none)" metadata[field_name] = value - + self.buildroot.root_log.debug(f"RPM metadata extracted: {metadata}") return metadata - + except subprocess.CalledProcessError as e: self.buildroot.root_log.debug(f"RPM command failed for {rpm_path}: {e.stderr}") return {} + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to extract RPM metadata: {e}") return {} @@ -1882,7 +1854,9 @@ def get_rpm_file_list(self, rpm_path): """Extracts the list of files from an RPM file.""" cmd = ["rpm", "-qpl", rpm_path] try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + result = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True + ) files = result.stdout.splitlines() self.buildroot.root_log.debug(f"Files in RPM {rpm_path}: {files}") return files @@ -1892,10 +1866,13 @@ def get_rpm_file_list(self, rpm_path): def get_rpm_file_info(self, rpm_path): """Extracts file hashes, ownership, and permissions from an RPM file using rpm-python.""" + # pylint: disable=no-member file_info = {} try: ts = rpm.TransactionSet() + # pylint: disable=protected-access ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES | rpm._RPMVSF_NODIGESTS) + # pylint: enable=protected-access with open(rpm_path, "rb") as f: hdr = ts.hdrFromFdno(f.fileno()) @@ -1903,7 +1880,7 @@ def get_rpm_file_info(self, rpm_path): dirnames = hdr[rpm.RPMTAG_DIRNAMES] basenames = hdr[rpm.RPMTAG_BASENAMES] dirindexes = hdr[rpm.RPMTAG_DIRINDEXES] - + filedigests = hdr[rpm.RPMTAG_FILEDIGESTS] filemodes = hdr[rpm.RPMTAG_FILEMODES] fileusernames = hdr[rpm.RPMTAG_FILEUSERNAME] @@ -1911,27 +1888,27 @@ def get_rpm_file_info(self, rpm_path): for i, basename in enumerate(basenames): dirname = dirnames[dirindexes[i]] - + # Decode bytes to strings if needed if isinstance(dirname, bytes): dirname = dirname.decode('utf-8', 'replace') if isinstance(basename, bytes): basename = basename.decode('utf-8', 'replace') - + filename = os.path.join(dirname, basename) - + digest = filedigests[i] if isinstance(digest, bytes): digest = digest.decode('utf-8') - + # Empty digest usually means empty string or all zeros if not digest: digest = None - + mode = filemodes[i] # Format permissions as octal string (e.g., 0100755) to match rpm --dump format - permissions = "0%o" % mode - + permissions = f"0{mode:o}" + owner = fileusernames[i] if isinstance(owner, bytes): owner = owner.decode('utf-8', 'replace') @@ -1946,13 +1923,15 @@ def get_rpm_file_info(self, rpm_path): "owner": owner, "group": group } - - self.buildroot.root_log.debug(f"File info for RPM {rpm_path}: {len(file_info)} files processed") + + self.buildroot.root_log.debug( + f"File info for RPM {rpm_path}: {len(file_info)} files processed" + ) return file_info - + + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to get file info for {rpm_path}: {e}") - import traceback self.buildroot.root_log.debug(traceback.format_exc()) return {} @@ -1960,7 +1939,9 @@ def get_rpm_dependencies(self, rpm_path): """Extracts the list of dependencies from an RPM file.""" cmd = ["rpm", "-qpR", rpm_path] try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + result = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True + ) dependencies = result.stdout.splitlines() self.buildroot.root_log.debug(f"Dependencies for RPM {rpm_path}: {dependencies}") return dependencies @@ -1972,7 +1953,9 @@ def get_rpm_signature(self, rpm_path): """Extracts the GPG signature of an RPM file.""" cmd = ["rpm", "-qpi", rpm_path] try: - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + result = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True + ) for line in result.stdout.splitlines(): if line.startswith("Signature"): # Extract the signature data after the colon @@ -1992,16 +1975,14 @@ def hash_file(self, file_path): for chunk in iter(lambda: f.read(4096), b""): sha256.update(chunk) return sha256.hexdigest() + # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to hash file {file_path}: {e}") return None def extract_source_files_from_srpm(self, src_rpm_path): """Extracts source files from a source RPM.""" - import tempfile - import shlex - import mockbuild.file_util - + self.buildroot.root_log.debug(f"Extracting source files from source RPM: {src_rpm_path}") source_files = [] try: @@ -2010,27 +1991,31 @@ def extract_source_files_from_srpm(self, src_rpm_path): # Use rpm2archive instead of rpm2cpio to handle large files (>4GB) # rpm2archive creates a .tgz file in the current directory extract_cmd = ["rpm2archive", src_rpm_path] - subprocess.run(extract_cmd, cwd=temp_dir, check=True, stderr=subprocess.PIPE, text=True) - + subprocess.run( + extract_cmd, cwd=temp_dir, check=True, stderr=subprocess.PIPE, text=True + ) + # Find the generated archive (should be only one file ending in .tgz or .tar.gz) archive_file = None for f in os.listdir(temp_dir): if f.endswith(".tgz") or f.endswith(".tar.gz"): archive_file = os.path.join(temp_dir, f) break - + if archive_file: tar_cmd = ["tar", "-xf", archive_file] subprocess.run(tar_cmd, cwd=temp_dir, check=True) os.remove(archive_file) else: - self.buildroot.root_log.debug(f"rpm2archive did not produce expected output for {src_rpm_path}") + self.buildroot.root_log.debug( + f"rpm2archive did not produce expected output for {src_rpm_path}" + ) except (subprocess.CalledProcessError, OSError) as e: self.buildroot.root_log.debug(f"Failed to unpack source RPM {src_rpm_path}: {e}") mockbuild.file_util.rmtree(temp_dir) return source_files - + for root_dir, _, files in os.walk(temp_dir): for file_name in files: if file_name.endswith(".spec"): @@ -2045,13 +2030,14 @@ def extract_source_files_from_srpm(self, src_rpm_path): }) try: mockbuild.file_util.rmtree(temp_dir) - except Exception: + except OSError: pass - + print(f"Extracted source files from source RPM: {source_files}") + # pylint: disable=broad-exception-caught except Exception as e: print(f"Failed to extract source files from source RPM {src_rpm_path}: {e}") - + return source_files def from_chroot_path(self, host_path): From 09f0448040cbd91a7c79fe2f065fa204daa08562 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Thu, 29 Jan 2026 15:53:27 -0500 Subject: [PATCH 17/25] Minor refactors Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 97bf2733d..d4eeb9dfa 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -339,10 +339,10 @@ def _find_build_artifacts(self, build_dir): for entry in entries: if not entry.is_file(): continue - if entry.name.endswith('.rpm') and not entry.name.endswith('.src.rpm'): - rpm_files.append(entry.name) - elif entry.name.endswith('.src.rpm'): + if entry.name.endswith('.src.rpm'): src_rpm_files.append(entry.name) + elif entry.name.endswith('.rpm'): + rpm_files.append(entry.name) except OSError as e: self.buildroot.root_log.debug(f"Failed to scan build directory {build_dir}: {e}") @@ -651,7 +651,7 @@ def _finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_re elif primary_rpm_metadata: lic = primary_rpm_metadata.get("license") if lic and lic != "(none)": - component_obj["licenses"] = [{"license": {"id": lic}}] + component_obj["licenses"] = [{"expression": lic}] bom["metadata"]["component"] = component_obj else: first_pkg = next((c for c in bom["components"] @@ -774,9 +774,7 @@ def _create_built_package_component( if license_str and license_str != "(none)": component["licenses"] = [ { - "license": { - "id": license_str - } + "expression": license_str } ] @@ -937,9 +935,7 @@ def _create_toolchain_component(self, toolchain_pkg, distro_obj): if license_str and license_str != "(none)": component["licenses"] = [ { - "license": { - "id": license_str - } + "expression": license_str } ] From 77d45a63f1ad81af41fd5689c329278d3bbbe609 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Thu, 29 Jan 2026 16:26:47 -0500 Subject: [PATCH 18/25] SPDX option support config_opts['sbom_generator_opts'] = { 'type': 'spdx', } Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 284 +++++++++++++++++--- 1 file changed, 250 insertions(+), 34 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index d4eeb9dfa..39807f0e8 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -39,8 +39,8 @@ def init(plugins, conf, buildroot): # Check for valid SBOM type configuration opts = conf["sbom_generator_opts"] - if "type" in opts and opts["type"] != "cyclonedx": - # We only support cyclonedx for now + if "type" in opts and opts["type"] not in ("cyclonedx", "spdx"): + # We only support cyclonedx and spdx for now buildroot.root_log.warning( f"SBOM generator type '{opts['type']}' not supported, defaulting to 'cyclonedx'" ) @@ -60,6 +60,7 @@ def __init__(self, plugins, conf, buildroot): self.builddir = buildroot.builddir self.conf = conf self.sbom_enabled = self.conf.get('generate_sbom', True) + self.sbom_type = self.conf.get('type', 'cyclonedx') self.sbom_done = False # Configuration options for file-level dependencies and filtering @@ -431,7 +432,7 @@ def _generate_sbom_post_build_hook(self): if self.sbom_done or not self.sbom_enabled: return - state_text = "Generating CycloneDX SBOM for built packages v1.0" + state_text = f"Generating {self.sbom_type.upper()} SBOM for built packages v1.0" self.state.start(state_text) try: @@ -450,47 +451,69 @@ def _generate_sbom_post_build_hook(self): build_subject_release, source_files ) = self._get_build_subject_metadata(spec_file, src_rpm_files, build_dir) - # Construct output filename using package name-version-release format if not build_subject_name or not build_subject_version or not build_subject_release: self.buildroot.root_log.debug("Cannot generate SBOM - build metadata incomplete") return - sbom_filename = ( - f"{build_subject_name}-{build_subject_version}-{build_subject_release}.sbom" - ) - out_file = os.path.join(self.buildroot.resultdir, sbom_filename) - - # Create CycloneDX document - bom = self._create_cyclonedx_document() + # Gather common data + distro_id = self.detect_chroot_distribution() or "fedora" build_toolchain_packages = self.get_build_toolchain_packages() - # Add source and toolchain components - source_component_entries = self._add_source_components(bom, source_files) - distro_id = self.detect_chroot_distribution() or "fedora" - toolchain_bom_refs = self._add_toolchain_components( - bom, build_toolchain_packages, distro_id - ) + # Dispatch based on type + if self.sbom_type == "spdx": + sbom_filename = ( + f"{build_subject_name}-{build_subject_version}-{build_subject_release}.spdx.json" + ) + out_file = os.path.join(self.buildroot.resultdir, sbom_filename) - # Process binary RPMs and convert to components - ( - built_package_bom_refs, primary_rpm_metadata - ) = self._process_built_packages( - bom, rpm_files, build_dir, distro_id, source_component_entries, - build_subject_name, build_toolchain_packages, toolchain_bom_refs - ) + doc = self._generate_spdx_document( + build_subject_name, build_subject_version, build_subject_release, + build_dir, rpm_files, source_files, + build_toolchain_packages, distro_id + ) + + with open(out_file, "w", encoding="utf-8") as f: + json.dump(doc, f, indent=2) + + self.buildroot.root_log.debug(f"SPDX SBOM successfully written to: {out_file}") + + else: + # Default: CycloneDX + sbom_filename = ( + f"{build_subject_name}-{build_subject_version}-{build_subject_release}.sbom" + ) + out_file = os.path.join(self.buildroot.resultdir, sbom_filename) + + # Create CycloneDX document + bom = self._create_cyclonedx_document() + + # Add source and toolchain components + source_component_entries = self._add_source_components(bom, source_files) + toolchain_bom_refs = self._add_toolchain_components( + bom, build_toolchain_packages, distro_id + ) + + # Process binary RPMs and convert to components + ( + built_package_bom_refs, primary_rpm_metadata + ) = self._process_built_packages( + bom, rpm_files, build_dir, distro_id, source_component_entries, + build_subject_name, build_toolchain_packages, toolchain_bom_refs + ) + + # Add RPM-specific metadata and finalize dependencies + self._finalize_bom_metadata(bom, primary_rpm_metadata, built_package_bom_refs, + build_subject_name, build_subject_version, + build_subject_release, distro_id) + self._finalize_dependencies(bom, source_component_entries, + build_toolchain_packages, distro_id) - # Add RPM-specific metadata and finalize dependencies - self._finalize_bom_metadata(bom, primary_rpm_metadata, built_package_bom_refs, - build_subject_name, build_subject_version, - build_subject_release, distro_id) - self._finalize_dependencies(bom, source_component_entries, - build_toolchain_packages, distro_id) + # Write CycloneDX BOM + with open(out_file, "w", encoding="utf-8") as f: + json.dump(bom, f, indent=2) - # Write CycloneDX BOM - with open(out_file, "w", encoding="utf-8") as f: - json.dump(bom, f, indent=2) + self.buildroot.root_log.debug(f"CycloneDX SBOM successfully written to: {out_file}") - self.buildroot.root_log.debug(f"CycloneDX SBOM successfully written to: {out_file}") # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"An error occurred during SBOM generation: {e}") @@ -2047,3 +2070,196 @@ def from_chroot_path(self, host_path): rel_path = "/" + rel_path return rel_path return host_path + + def _generate_spdx_document(self, name, version, release, build_dir, rpm_files, + source_files, toolchain_components, distro_id): + """Generates the full SPDX document.""" + doc_spdx_id = "SPDXRef-DOCUMENT" + creation_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # Basic Document Structure + document = { + "spdxVersion": "SPDX-2.3", + "dataLicense": "CC0-1.0", + "SPDXID": doc_spdx_id, + "name": f"SBOM for {name}-{version}-{release}", + "documentNamespace": f"http://spdx.org/spdxdocs/{name}-{version}-{release}-{uuid.uuid4()}", + "creationInfo": { + "creators": [ + "Tool: mock-sbom-generator-1.0", + "Organization: Atomicorp" + ], + "created": creation_time + }, + "packages": [], + "files": [], + "relationships": [] + } + + # Add Toolchain Packages + for tc in toolchain_components: + spdx_pkg = self._create_spdx_package_from_dict(tc) + if spdx_pkg: + document["packages"].append(spdx_pkg) + # Relationship: Document DESCRIBES toolchain (conceptually part of build environment) + # But strictly, Document DESCRIBES the output artifacts. + # We'll just list them. + + # Add Source Files + for src_file in source_files: + spdx_file = self._create_spdx_file(src_file) + if spdx_file: + document["files"].append(spdx_file) + # Relationship: Document CONTAINS file + document["relationships"].append({ + "spdxElementId": doc_spdx_id, + "relatedSpdxElement": spdx_file["SPDXID"], + "relationshipType": "CONTAINS" + }) + + # Add Build Artifacts (RPMs) + for rpm_file in rpm_files: + rpm_path = os.path.join(build_dir, rpm_file) + spdx_pkg = self._create_spdx_package_from_rpm(rpm_path, distro_id) + if spdx_pkg: + document["packages"].append(spdx_pkg) + # Relationship: Document DESCRIBES Package + document["relationships"].append({ + "spdxElementId": doc_spdx_id, + "relatedSpdxElement": spdx_pkg["SPDXID"], + "relationshipType": "DESCRIBES" + }) + + return document + + def _create_spdx_package_from_rpm(self, rpm_path, distro_obj): + """Creates an SPDX Package from an RPM file.""" + pkg_data = self.get_rpm_metadata(rpm_path) + if not pkg_data: + return None + + name = pkg_data.get("name") + version = pkg_data.get("version") + release = pkg_data.get("release") + arch = pkg_data.get("arch") + full_version = f"{version}-{release}" if release else version + + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', name) + safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', full_version) + spdx_id = f"SPDXRef-Package-{safe_name}-{safe_ver}" + + package = { + "name": name, + "SPDXID": spdx_id, + "versionInfo": full_version, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + "supplier": "NOASSERTION" + } + + # License + lic = pkg_data.get("license") + if lic and lic != "(none)": + package["licenseDeclared"] = lic + else: + package["licenseDeclared"] = "NOASSERTION" + package["licenseConcluded"] = "NOASSERTION" + + # Supplier + packager = pkg_data.get("packager") + if packager and packager != "(none)": + package["supplier"] = f"Person: {packager}" + + # Checksums + rpm_hash = self.hash_file(rpm_path) + if rpm_hash: + package["checksums"] = [{ + "algorithm": "SHA256", + "checksumValue": rpm_hash + }] + + # External Refs + external_refs = [] + purl = self._generate_purl(name, full_version, distro_obj, arch) + if purl: + external_refs.append({ + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": purl + }) + + cpe = self.generate_cpe(name, version) + if cpe: + external_refs.append({ + "referenceCategory": "SECURITY", + "referenceType": "cpe23Type", + "referenceLocator": cpe + }) + + if external_refs: + package["externalRefs"] = external_refs + + return package + + def _create_spdx_package_from_dict(self, pkg_data): + """Creates an SPDX Package from a dictionary (e.g. toolchain).""" + name = pkg_data.get("name") + version = pkg_data.get("version") + if not name or not version: + return None + + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', name) + safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', version) + spdx_id = f"SPDXRef-Package-{safe_name}-{safe_ver}" + + package = { + "name": name, + "SPDXID": spdx_id, + "versionInfo": version, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + "supplier": "NOASSERTION" + } + + lic = pkg_data.get("licenseDeclared") + if lic and lic != "(none)": + package["licenseDeclared"] = lic + else: + package["licenseDeclared"] = "NOASSERTION" + package["licenseConcluded"] = "NOASSERTION" + + checksum = pkg_data.get("checksum") + if checksum and not checksum.startswith("error"): + # Assume SHA256 if len 64 else SHA1 + alg = "SHA256" if len(checksum) == 64 else "SHA1" + package["checksums"] = [{ + "algorithm": alg, + "checksumValue": checksum + }] + + return package + + def _create_spdx_file(self, file_data): + """Creates an SPDX File from file metadata.""" + filename = file_data.get("filename") + if not filename: + return None + + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', filename) + spdx_id = f"SPDXRef-File-{safe_name}" + + file_obj = { + "fileName": f"./{filename}", + "SPDXID": spdx_id, + "licenseConcluded": "NOASSERTION", + "copyrightText": "NOASSERTION" + } + + sha256 = file_data.get("sha256") + if sha256: + file_obj["checksums"] = [{ + "algorithm": "SHA256", + "checksumValue": sha256 + }] + + return file_obj From 110dc73c3c30a371723e9d66296e129b98e0ae44 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Tue, 24 Feb 2026 20:13:11 -0500 Subject: [PATCH 19/25] Refactor: Move from_chroot_path to Buildroot class This moves the path conversion logic from the SBOM generator plugin to the core Buildroot class, as it is a general-purpose utility providing the reverse of make_chroot_path(). --- mock/py/mockbuild/buildroot.py | 11 ++ mock/py/mockbuild/plugins/sbom_generator.py | 140 +++++++++----------- mock/tests/plugins/test_sbom_generator.py | 82 ++++++++++++ mock/tests/test_from_chroot_path.py | 67 ++++++++++ 4 files changed, 226 insertions(+), 74 deletions(-) create mode 100644 mock/tests/plugins/test_sbom_generator.py create mode 100644 mock/tests/test_from_chroot_path.py diff --git a/mock/py/mockbuild/buildroot.py b/mock/py/mockbuild/buildroot.py index 2952519d8..42fd92931 100644 --- a/mock/py/mockbuild/buildroot.py +++ b/mock/py/mockbuild/buildroot.py @@ -195,6 +195,17 @@ def make_chroot_path(self, *paths): new_path = os.path.join(new_path, path) return new_path + def from_chroot_path(self, host_path): + """Convert an absolute host path into the corresponding path inside the build chroot.""" + if not self.rootdir: + return host_path + if host_path.startswith(self.rootdir): + rel_path = host_path[len(self.rootdir):] + if not rel_path.startswith("/"): + rel_path = "/" + rel_path + return rel_path + return host_path + @traceLog() def initialize(self, prebuild=False): """ diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 39807f0e8..7732b7b2f 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -2,7 +2,7 @@ # vim:expandtab:autoindent:tabstop=4:shiftwidth=4:filetype=python:textwidth=0: # License: GPL2 or later see COPYING # Written by Scott R. Shinn -# Copyright (C) 2025, Atomicorp, Inc. +# Copyright (C) 2026, Atomicorp, Inc. """Mock plugin for generating CycloneDX SBOMs from built RPM packages.""" import os @@ -1400,80 +1400,70 @@ def _signature_info_to_properties(self, signature_info): return properties def parse_spec_file(self, spec_path): - """Parses a spec file to extract source and patch files with their hashes and signatures.""" - self.buildroot.root_log.debug("Parsing spec file") + """Parses a spec file to extract source and patch files using the specfile library.""" + self.buildroot.root_log.debug("Parsing spec file using specfile library") if not os.path.isfile(spec_path): self.buildroot.root_log.debug(f"Spec file not found: {spec_path}") return [] + from specfile import Specfile + sources = [] try: - chroot_spec_path = self.from_chroot_path(spec_path) - # Use rpmspec --parse inside the build chroot to insure macro expansion - # matches the build + chroot_spec_path = self.buildroot.from_chroot_path(spec_path) + # Use rpmspec --parse inside the build chroot to ensure macro expansion + # matches the build environment exactly. cmd = ["rpmspec", "--parse", chroot_spec_path] result, _ = self.buildroot.doChroot( cmd, shell=False, returnOutput=True, printOutput=False ) - for line in (result or "").splitlines(): - line = line.strip() - # Match lines like Source0: or Patch1: - match = re.match(r'^(Source|Patch)[0-9]*:\s*(.+)$', line) - if match: - source_file = match.group(2) - # Extract hash if present (format: filename#hash) - if '#' in source_file: - filename, hash_value = source_file.split('#', 1) - else: - filename = source_file - hash_value = None - - # Extract actual filename from URL if it's a URL - if filename.startswith('http'): - # Extract filename from URL (last part after /) - actual_filename = filename.split('/')[-1] - else: - actual_filename = filename - - # Try to find the actual file and calculate its hash - build_dir = os.path.dirname(spec_path) - # SOURCES directory is at the same level as SPECS, not inside SPECS - sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") - file_path = os.path.join(sources_dir, actual_filename) - - actual_hash = None - if os.path.isfile(file_path): - actual_hash = self.hash_file(file_path) - self.buildroot.root_log.debug( - f"Found source file {actual_filename} at {file_path}, " - f"hash: {actual_hash}" - ) - elif hash_value: - actual_hash = hash_value - self.buildroot.root_log.debug( - f"Using hash from spec file for {actual_filename}: {hash_value}" - ) - else: - self.buildroot.root_log.debug( - f"Source file {actual_filename} not found at {file_path}" - ) - - # Check for digital signature (GPG signature) - signature = ( - self.get_file_signature(file_path) if os.path.isfile(file_path) else None - ) + if not result: + return [] + + # Use specfile to parse the expanded content + spec = Specfile(content=result, sourcedir=os.path.dirname(spec_path)) + + # Extract both sources and patches from the spec object model + all_locs = [] + with spec.sources() as spec_sources: + all_locs.extend(s.location for s in spec_sources if s.location) + with spec.patches() as spec_patches: + all_locs.extend(p.location for p in spec_patches if p.location) + + for loc in all_locs: + # Extract hash if present in Source (format: filename#hash) + filename, _, hash_value = loc.partition('#') + + # Extract actual filename from URL or path + actual_filename = os.path.basename(filename) + + # Locate the file in the SOURCES directory + build_dir = os.path.dirname(spec_path) + sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") + file_path = os.path.join(sources_dir, actual_filename) + + actual_hash = None + if os.path.isfile(file_path): + actual_hash = self.hash_file(file_path) + elif hash_value: + actual_hash = hash_value + + signature = ( + self.get_file_signature(file_path) if os.path.isfile(file_path) else None + ) - sources.append({ - "filename": actual_filename, - "sha256": actual_hash, - "digital_signature": signature - }) + sources.append({ + "filename": actual_filename, + "sha256": actual_hash, + "digital_signature": signature + }) - self.buildroot.root_log.debug(f"Extracted source and patch files from spec: {sources}") - # pylint: disable=broad-exception-caught + self.buildroot.root_log.debug(f"Extracted {len(sources)} source/patch files from spec") except Exception as e: self.buildroot.root_log.debug(f"Failed to parse spec file {spec_path}: {e}") + self.buildroot.root_log.debug(traceback.format_exc()) + return sources def get_file_signature(self, file_path): @@ -1547,9 +1537,22 @@ def generate_cpe(self, package_name, package_version, vendor=None): def detect_chroot_distribution(self): """Detects the distribution name inside the chroot using python-distro.""" try: - # Query the chroot filesystem directly using root_dir parameter - # pylint: disable=unexpected-keyword-arg - distro_id = distro.id(root_dir=self.buildroot.rootdir) + # Query the chroot filesystem directly. Attempting root_dir first. + try: + # pylint: disable=unexpected-keyword-arg + distro_id = distro.id(root_dir=self.buildroot.rootdir) + except TypeError: + # Fallback for older python-distro versions (<1.6.0) + # We could use os-release file directly + os_release = os.path.join(self.buildroot.rootdir, "etc/os-release") + distro_id = "unknown" + if os.path.isfile(os_release): + with open(os_release, 'r') as f: + for line in f: + if line.startswith("ID="): + distro_id = line.split("=")[1].strip().strip('"').strip("'") + break + if distro_id: return distro_id.lower() return "unknown" @@ -2059,17 +2062,6 @@ def extract_source_files_from_srpm(self, src_rpm_path): return source_files - def from_chroot_path(self, host_path): - """Convert an absolute host path into the corresponding path inside the build chroot.""" - rootdir = getattr(self.buildroot, "rootdir", "") - if not rootdir: - return host_path - if host_path.startswith(rootdir): - rel_path = host_path[len(rootdir):] - if not rel_path.startswith("/"): - rel_path = "/" + rel_path - return rel_path - return host_path def _generate_spdx_document(self, name, version, release, build_dir, rpm_files, source_files, toolchain_components, distro_id): diff --git a/mock/tests/plugins/test_sbom_generator.py b/mock/tests/plugins/test_sbom_generator.py new file mode 100644 index 000000000..20b3f3dd8 --- /dev/null +++ b/mock/tests/plugins/test_sbom_generator.py @@ -0,0 +1,82 @@ +import os +import unittest +from unittest.mock import MagicMock, patch +import sys +from pathlib import Path + +# Add mock/py to sys.path so we can import mockbuild +sys.path.insert(0, str(Path(__file__).parents[2] / "mock" / "py")) + +from mockbuild.plugins.sbom_generator import SBOMGenerator + +class TestSBOMGenerator(unittest.TestCase): + def setUp(self): + self.plugins = MagicMock() + self.conf = {} + self.buildroot = MagicMock() + self.buildroot.rootdir = "/var/lib/mock/fedora-rawhide-x86_64/root" + self.buildroot.builddir = "/builddir" + self.buildroot.from_chroot_path = MagicMock(side_effect=lambda x: x.replace(self.buildroot.rootdir, "")) + + # Mocking root_log + self.buildroot.root_log = MagicMock() + + self.generator = SBOMGenerator(self.plugins, self.conf, self.buildroot) + + @patch('os.path.isfile') + @patch('os.path.isdir') + @patch('os.path.exists') + @patch('os.path.getsize') + def test_parse_spec_file_with_specfile_library(self, mock_getsize, mock_exists, mock_isdir, mock_isfile): + # We need to mock isfile for the spec file itself + def side_effect_isfile(path): + if path == "/builddir/SPECS/test.spec": + return True + return False + + mock_isfile.side_effect = side_effect_isfile + mock_exists.return_value = True + + spec_content = """ +Name: test-package +Version: 1.0.0 +Release: 1 +Summary: A test package +License: MIT + +Source0: https://example.com/source1.tar.gz +Source1: source2.tar.gz#sha256:1234567890abcdef +Patch0: patch1.diff + +%description +A test package for unit testing SBOM generator. + +%files +""" + # Mock doChroot to return the expanded spec content + self.buildroot.doChroot.return_value = (spec_content, 0) + + # Mock hash_file to return a dummy hash + with patch.object(SBOMGenerator, 'hash_file', return_value="deadbeef"): + # Mock get_file_signature + with patch.object(SBOMGenerator, 'get_file_signature', return_value=None): + # We also need to mock os.path.dirname and os.path.join if they behave differently on host + # but standard ones should be fine. + + sources = self.generator.parse_spec_file("/builddir/SPECS/test.spec") + + # Should have 3 items: source1, source2, and patch1 + self.assertEqual(len(sources), 3) + + # Verify source 0 (from URL) + self.assertEqual(sources[0]['filename'], "source1.tar.gz") + + # Verify source 1 (with inline hash) + self.assertEqual(sources[1]['filename'], "source2.tar.gz") + self.assertEqual(sources[1]['sha256'], "sha256:1234567890abcdef") + + # Verify patch 0 + self.assertEqual(sources[2]['filename'], "patch1.diff") + +if __name__ == '__main__': + unittest.main() diff --git a/mock/tests/test_from_chroot_path.py b/mock/tests/test_from_chroot_path.py new file mode 100644 index 000000000..d922255ca --- /dev/null +++ b/mock/tests/test_from_chroot_path.py @@ -0,0 +1,67 @@ +""" Tests for from_chroot_path in buildroot.py """ + +import pytest +from unittest.mock import MagicMock +from mockbuild import buildroot + +def test_from_chroot_path(): + """ test from_chroot_path method """ + config = MagicMock() + uid_manager = MagicMock() + state = MagicMock() + plugins = MagicMock() + + # Mock config and rootdir + config_dict = { + 'root': 'fedora-rawhide-x86_64', + 'basedir': '/var/lib/mock', + 'rootdir': '/var/lib/mock/fedora-rawhide-x86_64/root', + 'resultdir': 'results', + 'chroothome': '/builddir', + 'cache_topdir': '/var/cache/mock', + 'plugin_conf': {'selinux_enable': False}, + 'chrootuid': 1000, + 'chrootuser': 'mockbuild', + 'chrootgid': 1000, + 'chrootgroup': 'mock', + 'environment': {}, + 'use_buildroot_image': False, + 'buildroot_image': None, + 'buildroot_image_skip_pull': False, + 'buildroot_image_keep_getting': False, + 'additional_packages': [], + 'version': '1.0', + 'files': {}, + 'extra_chroot_dirs': [], + 'macros': {}, + 'package_manager': 'dnf', + 'tar_binary': 'tar', + 'image_fallback': True, + 'nspawn_args': [], + 'rpm_command': 'rpm', + 'unique-ext': 'none' + } + config.__getitem__.side_effect = lambda key: config_dict.get(key) + config.__contains__.side_effect = lambda key: key in config_dict + config.get.side_effect = lambda key, default=None: config_dict.get(key, default) + + # Initialize Buildroot + br = buildroot.Buildroot(config, uid_manager, state, plugins) + br.rootdir = "/var/lib/mock/fedora-rawhide-x86_64/root" + + # Test cases + host_path = "/var/lib/mock/fedora-rawhide-x86_64/root/builddir/build/SPECS/test.spec" + expected_chroot_path = "/builddir/build/SPECS/test.spec" + assert br.from_chroot_path(host_path) == expected_chroot_path + + # Test path not in rootdir + other_path = "/tmp/test.spec" + assert br.from_chroot_path(other_path) == other_path + + # Test rootdir without trailing slash + br.rootdir = "/myroot" + assert br.from_chroot_path("/myroot/etc/passwd") == "/etc/passwd" + + # Test rootdir with trailing slash (should handle it gracefully) + br.rootdir = "/myroot/" + assert br.from_chroot_path("/myroot/etc/passwd") == "/etc/passwd" From 0a1f97f2812893af73274e139e7d1d96daa3be10 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Sat, 14 Mar 2026 10:55:19 -0400 Subject: [PATCH 20/25] modernize metadata extraction and fix forensic grouping - Refactor sbom_generator to use native python-rpm bindings for metadata extraction - Fix NameError in _create_built_package_component when accessing sourcerpm - Improve forensic fidelity by moving signature metadata (date, algorithm) from aggregate signer-groups to individual toolchain components - Resolve merge conflicts with upstream/main in PLUGIN_LIST Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 1035 ++++++++++++------- 1 file changed, 654 insertions(+), 381 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 7732b7b2f..991c8ce49 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -71,31 +71,34 @@ def __init__(self, plugins, conf, buildroot): self.include_source_dependencies = self.conf.get('include_source_dependencies', True) self.include_toolchain_dependencies = self.conf.get('include_toolchain_dependencies', False) - plugins.add_hook("prebuild", self._list_specs_directory) + self.prebuild_source_files = [] + self.prebuild_spec_metadata = {} + + plugins.add_hook("prebuild", self._capture_prebuild_state) plugins.add_hook("postbuild", self._generate_sbom_post_build_hook) @traceLog() - def _list_specs_directory(self): - """Lists the contents of the SPECS directory before building.""" - - self.buildroot.root_log.debug("DEBUG: Listing contents of SPECS directory before building:") - self.buildroot.root_log.debug(f"DEBUG: builddir is {self.buildroot.builddir}") - self.buildroot.root_log.debug(f"DEBUG: rootdir is {self.rootdir}") - self.buildroot.root_log.debug(f"DEBUG: resultsdir is {self.buildroot.resultdir}") + def _capture_prebuild_state(self): + """Captures pristine source artifacts before the build begins.""" + self.buildroot.root_log.debug("Capturing pre-build state from SPECS and SOURCES") + # Look for spec file in the build directory - build_dir = self.buildroot.builddir - specs_dir = os.path.join(build_dir, "SPECS") - self.buildroot.root_log.debug(f"DEBUG: spec dir is {specs_dir}") - + specs_dir = os.path.join(self.buildroot.rootdir, "builddir/build/SPECS") try: if os.path.exists(specs_dir): - specs_files = os.listdir(specs_dir) - self.buildroot.root_log.debug(f"Contents of SPECS directory: {specs_files}") + for file in os.listdir(specs_dir): + if file.endswith('.spec'): + spec_file = os.path.join(specs_dir, file) + self.buildroot.root_log.debug(f"Parsing spec file for pre-build state: {spec_file}") + metadata, sources = self.parse_spec_file(spec_file) + self.prebuild_spec_metadata = metadata + self.prebuild_source_files = sources + break else: - self.buildroot.root_log.debug("SPECS directory does not exist.") - except OSError as e: - self.buildroot.root_log.debug(f"Failed to list contents of SPECS directory: {e}") + self.buildroot.root_log.debug("SPECS directory does not exist for pre-build capture.") + except Exception as e: + self.buildroot.root_log.debug(f"Failed to capture pre-build state: {e}") def _create_cyclonedx_document(self): """Creates the base CycloneDX document structure.""" @@ -165,12 +168,40 @@ def _create_metadata(self): # Add Mock config if available if hasattr(self.buildroot, 'config') and self.buildroot.config: - config_name = self.buildroot.config.get('config_path', 'unknown') + config = self.buildroot.config + config_name = config.get('config_path', 'unknown') properties.append({ "name": "mock:build:config", "value": config_name }) + # Capture network isolation and access status + online = config.get('online', True) + properties.append({ + "name": "mock:build:network:online", + "value": str(online).lower() + }) + + rpm_net = config.get('rpmbuild_networking', False) + properties.append({ + "name": "mock:build:network:rpmbuild", + "value": str(rpm_net).lower() + }) + + isolation = config.get('isolation') + if isolation: + properties.append({ + "name": "mock:build:isolation", + "value": str(isolation) + }) + + use_nspawn = config.get('use_nspawn') + if use_nspawn is not None: + properties.append({ + "name": "mock:build:nspawn", + "value": str(use_nspawn).lower() + }) + hardening_props = self._collect_build_hardening_properties() if hardening_props: properties.extend(hardening_props) @@ -371,10 +402,20 @@ def _get_build_subject_metadata(self, spec_file, src_rpm_files, build_dir): build_subject_version = None build_subject_release = None source_files = [] - - if spec_file: - build_subject_name = os.path.splitext(os.path.basename(spec_file))[0] - parsed_sources = self.parse_spec_file(spec_file) + spec_metadata = {} + + if hasattr(self, 'prebuild_spec_metadata') and self.prebuild_spec_metadata: + spec_metadata = self.prebuild_spec_metadata + source_files = self.prebuild_source_files + build_subject_name = spec_metadata.get("name") + build_subject_version = spec_metadata.get("version") + build_subject_release = spec_metadata.get("release") + elif spec_file: + spec_metadata, parsed_sources = self.parse_spec_file(spec_file) + if spec_metadata: + build_subject_name = spec_metadata.get("name") + build_subject_version = spec_metadata.get("version") + build_subject_release = spec_metadata.get("release") if parsed_sources: source_files = parsed_sources @@ -390,31 +431,43 @@ def _get_build_subject_metadata(self, spec_file, src_rpm_files, build_dir): build_subject_release = srpm_metadata.get("release") if not source_files: - # Extract from source RPM if available + # Extract metadata for source files from source RPM without full extraction source_files = self.extract_source_files_from_srpm(srpm_path) + # Record the source RPM itself as an input artifact + srpm_name = src_rpm_files[0] + srpm_sig = self.get_rpm_signature(srpm_path) + # Add to the beginning of the list for visibility + source_files.insert(0, { + "filename": srpm_name, + "digital_signature": srpm_sig, + "source_type": "source_rpm" + }) + return ( - build_subject_name, build_subject_version, + spec_metadata, build_subject_name, build_subject_version, build_subject_release, source_files ) - def _add_source_components(self, bom, source_files): - """Converts source files to CycloneDX components and returns metadata entries.""" + def _add_source_components(self, _bom, source_files): + """Converts source files to CycloneDX components and returns components and metadata entries.""" + source_components = [] source_component_entries = [] for source_file in source_files: component = self._create_source_file_component(source_file) if component: - bom["components"].append(component) + source_components.append(component) filename = source_file.get("filename") source_component_entries.append({ "filename": filename, - "bom_ref": component.get("bom-ref"), + "bom-ref": component.get("bom-ref"), "type": "patch" if self._is_patch_file(filename) else "source" }) - return source_component_entries + return source_components, source_component_entries - def _add_toolchain_components(self, bom, build_toolchain_packages, distro_id): - """Adds toolchain components to the BOM and returns their bom-refs.""" + def _add_toolchain_components(self, _bom, build_toolchain_packages, distro_id): + """Adds toolchain components to the BOM and returns their components and bom-refs.""" + toolchain_components = [] toolchain_bom_refs = [] for toolchain_pkg in build_toolchain_packages: component = self._create_toolchain_component(toolchain_pkg, distro_id) @@ -422,8 +475,8 @@ def _add_toolchain_components(self, bom, build_toolchain_packages, distro_id): bom_ref = component.get("bom-ref") if bom_ref: toolchain_bom_refs.append(bom_ref) - bom["components"].append(component) - return toolchain_bom_refs + toolchain_components.append(component) + return toolchain_components, toolchain_bom_refs @traceLog() # pylint: disable=too-many-locals @@ -447,7 +500,7 @@ def _generate_sbom_post_build_hook(self): # Get build subject metadata ( - build_subject_name, build_subject_version, + spec_metadata, build_subject_name, build_subject_version, build_subject_release, source_files ) = self._get_build_subject_metadata(spec_file, src_rpm_files, build_dir) @@ -469,7 +522,7 @@ def _generate_sbom_post_build_hook(self): doc = self._generate_spdx_document( build_subject_name, build_subject_version, build_subject_release, build_dir, rpm_files, source_files, - build_toolchain_packages, distro_id + build_toolchain_packages, distro_id, spec_metadata=spec_metadata ) with open(out_file, "w", encoding="utf-8") as f: @@ -488,25 +541,31 @@ def _generate_sbom_post_build_hook(self): bom = self._create_cyclonedx_document() # Add source and toolchain components - source_component_entries = self._add_source_components(bom, source_files) - toolchain_bom_refs = self._add_toolchain_components( + source_components, source_component_entries = self._add_source_components(bom, source_files) + toolchain_components, toolchain_bom_refs = self._add_toolchain_components( bom, build_toolchain_packages, distro_id ) # Process binary RPMs and convert to components ( - built_package_bom_refs, primary_rpm_metadata + built_package_bom_refs, primary_rpm_metadata, all_built_components ) = self._process_built_packages( - bom, rpm_files, build_dir, distro_id, source_component_entries, + bom, rpm_files + src_rpm_files, build_dir, distro_id, source_component_entries, build_subject_name, build_toolchain_packages, toolchain_bom_refs ) # Add RPM-specific metadata and finalize dependencies self._finalize_bom_metadata(bom, primary_rpm_metadata, built_package_bom_refs, build_subject_name, build_subject_version, - build_subject_release, distro_id) + build_subject_release, distro_id, + spec_metadata=spec_metadata) self._finalize_dependencies(bom, source_component_entries, - build_toolchain_packages, distro_id) + build_toolchain_packages, distro_id, + built_package_bom_refs, toolchain_bom_refs, + spec_metadata=spec_metadata, + source_components=source_components, + toolchain_components=toolchain_components, + all_built_components=all_built_components) # Write CycloneDX BOM with open(out_file, "w", encoding="utf-8") as f: @@ -526,8 +585,9 @@ def _generate_sbom_post_build_hook(self): def _process_built_packages(self, bom, rpm_files, build_dir, distro_id, source_component_entries, build_subject_name, build_toolchain_packages, toolchain_bom_refs): - """Processes binary RPMs and creates CycloneDX components and dependencies.""" + """Processes binary RPMs and creates structured CycloneDX components and dependencies.""" built_package_bom_refs = [] + all_built_components = [] component_map = {} primary_rpm_metadata = None @@ -578,16 +638,39 @@ def _process_built_packages(self, bom, rpm_files, build_dir, distro_id, # File components if package_name and package_version and self.include_file_components: + # Extract CPE and GPG info from the component to pass to files + rpm_cpe = None + for ext_ref in component.get("externalReferences", []): + if ext_ref.get("comment") == "CPE 2.3": + rpm_cpe = ext_ref.get("url") + + rpm_gpg = None + for prop in component.get("properties", []): + if prop.get("name") == "mock:signature:key": + rpm_gpg = prop.get("value") + file_components = self._create_file_components( - rpm_path, package_name, package_version + rpm_path, package_name, package_version, + rpm_cpe=rpm_cpe, rpm_gpg=rpm_gpg ) - for file_comp in file_components: - bom["components"].append(file_comp) - if self._should_include_file_dependency(file_comp.get("name", "")): - bom["dependencies"].append({ - "ref": file_comp["bom-ref"], - "dependsOn": [bom_ref] - }) + + if file_components: + if "components" not in component: + component["components"] = [] + + for file_comp in file_components: + # Set scope to required for all files in the produced RPM + file_comp["scope"] = "required" + component["components"].append(file_comp) + + if self._should_include_file_dependency(file_comp.get("name", "")): + bom["dependencies"].append({ + "ref": file_comp["bom-ref"], + "dependsOn": [bom_ref] + }) + + # Sort file components alphabetically + component["components"].sort(key=lambda x: x.get("name", "")) # Dependencies dependencies = self.get_rpm_dependencies(rpm_path) @@ -604,24 +687,46 @@ def _process_built_packages(self, bom, rpm_files, build_dir, distro_id, if t_ref not in all_depends_on: all_depends_on.append(t_ref) - all_depends_on = list(set(all_depends_on)) + all_depends_on = sorted(list(set(all_depends_on))) if all_depends_on: bom["dependencies"].append({"ref": bom_ref, "dependsOn": all_depends_on}) elif runtime_dependency: bom["dependencies"].append(runtime_dependency) + + all_built_components.append(component) - return built_package_bom_refs, primary_rpm_metadata + return built_package_bom_refs, primary_rpm_metadata, all_built_components # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-positional-arguments def _finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_refs, - build_subject_name, build_subject_version, - build_subject_release, distro_id): - """Adds RPM-specific metadata and component information to the BOM.""" + build_subject_name, build_subject_version, + build_subject_release, distro_id, spec_metadata=None): + """Finalizes BOM metadata, sets the primary component, and adds RPM properties.""" + # Add BuildRequires and Requires from spec if available + if spec_metadata: + metadata_props = [] + build_reqs = spec_metadata.get("build_requires", []) + if build_reqs: + metadata_props.append({ + "name": "mock:spec:build_requires", + "value": ",".join(build_reqs) + }) + + reqs = spec_metadata.get("requires", []) + if reqs: + metadata_props.append({ + "name": "mock:spec:requires", + "value": ",".join(reqs) + }) + + if metadata_props: + bom["metadata"]["properties"] = bom["metadata"].get("properties", []) + bom["metadata"]["properties"].extend(metadata_props) + if primary_rpm_metadata: rpm_props = bom["metadata"]["properties"] for key, prop_name in [("buildhost", "mock:rpm:buildhost"), ("buildtime", "mock:rpm:buildtime"), - ("sourcerpm", "mock:rpm:sourcerpm"), ("group", "mock:rpm:group"), ("epoch", "mock:rpm:epoch"), ("distribution", "mock:rpm:distribution")]: @@ -631,7 +736,7 @@ def _finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_re vendor = primary_rpm_metadata.get("vendor") if vendor and vendor != "(none)": - bom["metadata"]["manufacture"] = {"name": vendor} + bom["metadata"]["manufacturer"] = {"name": vendor} bom["metadata"]["authors"] = [{"name": vendor}] packager = primary_rpm_metadata.get("packager") @@ -707,6 +812,13 @@ def _finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_re "bom-ref": f"build-output:{aggregate_name}", "description": description } + if primary_rpm_metadata: + lic = primary_rpm_metadata.get("license") + if lic and lic != "(none)": + component_obj["licenses"] = [{"expression": lic}] + elif spec_metadata and spec_metadata.get("license"): + component_obj["licenses"] = [{"expression": spec_metadata["license"]}] + if aggregate_name and aggregate_version: component_obj["purl"] = self._generate_purl( aggregate_name, aggregate_version, distro_id @@ -715,28 +827,148 @@ def _finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_re # pylint: disable=too-many-locals,too-many-branches,too-many-statements def _finalize_dependencies(self, bom, source_component_entries, - build_toolchain_packages, distro_id): - """Ensures every component has a dependency entry.""" - dependencies_dict = {dep.get("ref"): dep for dep in bom["dependencies"] if dep.get("ref")} + build_toolchain_packages, distro_id, + built_package_bom_refs, toolchain_bom_refs, + spec_metadata=None, + source_components=None, + toolchain_components=None, + all_built_components=None): + """Finalizes BOM dependencies, linking primary package to hierarchical grouping components + and implementing nested component composition.""" + # Find primary component ref (metadata.component or first built package) + primary_ref = None + if bom.get("metadata") and bom["metadata"].get("component"): + primary_ref = bom["metadata"]["component"].get("bom-ref") + + if not primary_ref: + return - for source_entry in source_component_entries: - ref = source_entry.get("bom_ref") - if ref and ref not in dependencies_dict: - dependencies_dict[ref] = {"ref": ref, "dependsOn": []} + # Create virtual grouping references + inputs_ref = "build:inputs" + toolchain_ref = "build:toolchain" + outputs_ref = "build:outputs" + + # Prepare grouping components + inputs_group = { + "type": "application", + "bom-ref": inputs_ref, + "name": "Build Inputs", + "description": "Source code and patches used for the build", + "properties": [{"name": "mock:type", "value": "grouping-node"}] + } + if source_components: + inputs_group["components"] = sorted(source_components, key=lambda x: x.get("name", "")) + + toolchain_group = { + "type": "application", + "bom-ref": toolchain_ref, + "name": "Build Toolchain", + "description": "Packages and tools used to perform the build", + "scope": "excluded", # Tools are not part of the runtime payload + "properties": [{"name": "mock:type", "value": "grouping-node"}] + } + if toolchain_components: + # Group toolchain components by their GPG Key ID + signer_groups = {} + pkg_map = {p.get("name"): p for p in build_toolchain_packages} + + for comp in toolchain_components: + comp["scope"] = "excluded" + pkg_info = pkg_map.get(comp.get("name")) + sig_info = pkg_info.get("digital_signature", {}) if pkg_info else {} + key_id = sig_info.get("signature_key", "unsigned") + + # Attach signature properties to the individual package component + if sig_info: + sig_props = self._signature_info_to_properties(sig_info) + comp["properties"] = comp.get("properties", []) + comp["properties"].extend([p for p in sig_props if p["name"] != "mock:signature:raw"]) + + if key_id not in signer_groups: + # Create group properties - common only to the signer + group_props = [ + {"name": "mock:role", "value": "build-toolchain"}, + {"name": "mock:type", "value": "signer-group"}, + {"name": "mock:signature:key", "value": key_id} + ] + + signer_groups[key_id] = { + "type": "application", + "bom-ref": f"signer:{key_id}", + "name": f"Packages signed by {key_id}" if key_id != "unsigned" else "Unsigned Packages", + "scope": "excluded", + "properties": group_props, + "components": [] + } + signer_groups[key_id]["components"].append(comp) + + # Add signer groups as children of toolchain_group + sorted_groups = sorted( + list(signer_groups.values()), + key=lambda x: x.get("name", "") + ) + for group in sorted_groups: + group["components"].sort(key=lambda x: x.get("name", "")) + + toolchain_group["components"] = sorted_groups + + outputs_group = { + "type": "application", + "bom-ref": outputs_ref, + "name": "RPM Contents", + "description": "RPM packages and their contained files produced by the build", + "scope": "required", + "properties": [{"name": "mock:type", "value": "grouping-node"}] + } + if all_built_components: + outputs_group["components"] = sorted(all_built_components, key=lambda x: x.get("name", "")) + + # Nest groups into the primary component + primary_comp = bom["metadata"]["component"] + primary_comp["components"] = [inputs_group, toolchain_group, outputs_group] + # Sort metadata components alphabetically + primary_comp["components"].sort(key=lambda x: x.get("name", "")) + + # 1. Primary component depends on the three groups + bom["dependencies"].append({ + "ref": primary_ref, + "dependsOn": sorted([inputs_ref, toolchain_ref, outputs_ref]) + }) - for toolchain_pkg in build_toolchain_packages: - name, ver = toolchain_pkg.get("name"), toolchain_pkg.get("version") - if name and ver: - purl = self._generate_purl(name, ver, distro_id) - if purl and purl not in dependencies_dict: - dependencies_dict[purl] = {"ref": purl, "dependsOn": []} + # 2. Build Inputs Group -> Source components + input_deps = [] + for entry in source_component_entries: + if entry.get("bom-ref"): + input_deps.append(entry["bom-ref"]) + + if input_deps: + bom["dependencies"].append({ + "ref": inputs_ref, + "dependsOn": sorted(list(set(input_deps))) + }) - for component in bom["components"]: - ref = component.get("bom-ref") - if ref and ref not in dependencies_dict: - dependencies_dict[ref] = {"ref": ref, "dependsOn": []} + # 3. Build Toolchain Group -> Signer Groups + signer_refs = [g["bom-ref"] for g in toolchain_group.get("components", [])] + if signer_refs: + bom["dependencies"].append({ + "ref": toolchain_ref, + "dependsOn": sorted(signer_refs) + }) + + # 3b. Signer Groups -> Individual packages + for group in toolchain_group["components"]: + pkg_refs = [c["bom-ref"] for c in group["components"]] + bom["dependencies"].append({ + "ref": group["bom-ref"], + "dependsOn": sorted(pkg_refs) + }) - bom["dependencies"] = list(dependencies_dict.values()) + # 4. RPM Contents Group -> Built RPMs (Packages) + if built_package_bom_refs: + bom["dependencies"].append({ + "ref": outputs_ref, + "dependsOn": sorted(list(set(built_package_bom_refs))) + }) def _create_built_package_component( @@ -772,7 +1004,8 @@ def _create_built_package_component( } # Add external references (CPE) - cpe = self.generate_cpe(package_name, version) + vendor = package_data.get("vendor") + cpe = self.generate_cpe(package_name, version, vendor=vendor) if cpe: component["externalReferences"] = [ { @@ -782,15 +1015,22 @@ def _create_built_package_component( } ] - # Add hash of RPM file - rpm_hash = self.hash_file(rpm_path) - if rpm_hash: - component["hashes"] = [ - { - "alg": "SHA-256", - "content": rpm_hash - } - ] + # Add hierarchical grouping for "RPM Contents" + outputs_ref = "build:outputs" # This will be the "RPM Contents" group + + # Add hash of RPM file - REMOVED per user request to only have hashes for files contained in RPM + # or if needed for PURL integrity, but we'll prioritize the "only" constraint. + # rpm_hash = package_data.get("sha256") + # if not rpm_hash or rpm_hash == "(none)": + # rpm_hash = self.hash_file(rpm_path) + + # if rpm_hash: + # component["hashes"] = [ + # { + # "alg": "SHA-256", + # "content": rpm_hash + # } + # ] # Add license information license_str = package_data.get("license") @@ -811,6 +1051,11 @@ def _create_built_package_component( # Add properties for RPM metadata properties = [] + properties.append({ + "name": "mock:rpm:filename", + "value": os.path.basename(rpm_path) + }) + vendor = package_data.get("vendor") if vendor and vendor != "(none)": properties.append({ @@ -839,13 +1084,6 @@ def _create_built_package_component( "value": buildtime_iso }) - sourcerpm = package_data.get("sourcerpm") - if sourcerpm and sourcerpm != "(none)": - properties.append({ - "name": "mock:rpm:sourcerpm", - "value": sourcerpm - }) - group = package_data.get("group") if group and group != "(none)": properties.append({ @@ -895,6 +1133,7 @@ def _create_built_package_component( component["properties"] = properties # Add external reference for source RPM if available + sourcerpm = package_data.get("sourcerpm") if sourcerpm and sourcerpm != "(none)": component["externalReferences"] = component.get("externalReferences", []) component["externalReferences"].append({ @@ -924,23 +1163,17 @@ def _create_toolchain_component(self, toolchain_pkg, distro_obj): "purl": purl } - # Add checksum if available - checksum = toolchain_pkg.get("checksum") - if checksum and checksum != "error" and not checksum.startswith("error"): - # Determine algorithm based on hash length - if len(checksum) == 64: - alg = "SHA-256" - elif len(checksum) == 40: - alg = "SHA-1" - else: - alg = "SHA-256" # Default assumption - - component["hashes"] = [ - { - "alg": alg, - "content": checksum - } - ] + # Add checksum - REMOVED per user request to only have hashes for files contained in RPM + # (This follows the rule that only the 'RPM Contents' section should have hashes) + # checksum = toolchain_pkg.get("checksum") + # if checksum and checksum != "error" and not checksum.startswith("error"): + # if len(checksum) == 64: + # alg = "SHA-256" + # elif len(checksum) == 40: + # alg = "SHA-1" + # else: + # alg = "SHA-256" + # component["hashes"] = [{"alg": alg, "content": checksum}] # Add CPE cpe = toolchain_pkg.get("cpe") @@ -965,19 +1198,8 @@ def _create_toolchain_component(self, toolchain_pkg, distro_obj): # Add properties properties = [] - # Mark as build toolchain - properties.append({ - "name": "mock:role", - "value": "build-toolchain" - }) - - # Add signature information - signature_info = toolchain_pkg.get("digital_signature", {}) - if signature_info: - sig_props = self._signature_info_to_properties(signature_info) - properties.extend(sig_props) - # Add build date if available + signature_info = toolchain_pkg.get("digital_signature", {}) build_date = signature_info.get("build_date") if build_date: properties.append({ @@ -1010,6 +1232,7 @@ def _create_source_file_component(self, source_file): } # Add hash + sha256 = source_file.get("sha256") if sha256: component["hashes"] = [ { @@ -1021,7 +1244,9 @@ def _create_source_file_component(self, source_file): # Add properties properties = [] - source_type = "patch" if self._is_patch_file(filename) else "source" + source_type = source_file.get("source_type") + if not source_type: + source_type = "patch" if self._is_patch_file(filename) else "source" properties.append({ "name": "mock:source:type", @@ -1031,10 +1256,14 @@ def _create_source_file_component(self, source_file): # Add signature information if available signature = source_file.get("digital_signature") if signature: - properties.append({ - "name": "mock:signature:info", - "value": signature - }) + if source_type == "source_rpm" and not signature.startswith("GPG signature file exists") and not signature.startswith("File is a signature file"): + sig_props = self._parse_signature_to_properties(signature) + properties.extend(sig_props) + else: + properties.append({ + "name": "mock:signature:info", + "value": signature + }) if properties: component["properties"] = properties @@ -1138,7 +1367,8 @@ def _should_include_file_dependency(self, file_path): return True - def _create_file_components(self, rpm_path, package_name, package_version): + def _create_file_components(self, rpm_path, package_name, package_version, + rpm_cpe=None, rpm_gpg=None): """Creates file components for all files in an RPM package.""" if not self.include_file_components: return [] @@ -1157,7 +1387,8 @@ def _create_file_components(self, rpm_path, package_name, package_version): continue file_data = file_info.get(file_path, {}) - file_hash = file_data.get("sha256") + file_hash = file_data.get("hash") + algo_id = file_data.get("algo") bom_ref = self._generate_file_bom_ref(package_name, package_version, file_path) component = { @@ -1166,11 +1397,23 @@ def _create_file_components(self, rpm_path, package_name, package_version): "name": file_path } - # Add hash if available + # Add hash if available with detected algorithm if file_hash: + # Map RPM algo ID to CycloneDX algo name + # 8: SHA-256, 10: SHA-512, 1: MD5, 2: SHA-1 + algo_map = { + 8: "SHA-256", + 10: "SHA-512", + 1: "MD5", + 2: "SHA-1", + 9: "SHA-384", + 11: "SHA-224" + } + alg_name = algo_map.get(algo_id, "SHA-256") + component["hashes"] = [ { - "alg": "SHA-256", + "alg": alg_name, "content": file_hash } ] @@ -1192,6 +1435,17 @@ def _create_file_components(self, rpm_path, package_name, package_version): "name": "mock:file:group", "value": file_data["group"] }) + + if rpm_cpe: + properties.append({ + "name": "mock:package:cpe", + "value": rpm_cpe + }) + if rpm_gpg: + properties.append({ + "name": "mock:package:gpg:key", + "value": rpm_gpg + }) if properties: component["properties"] = properties @@ -1394,21 +1648,29 @@ def _signature_info_to_properties(self, signature_info): sig_valid = signature_info.get("signature_valid", False) properties.append({ "name": "mock:signature:valid", - "value": str(sig_valid) + "value": str(sig_valid).lower() }) + raw_data = signature_info.get("raw_signature_data") + if raw_data: + properties.append({ + "name": "mock:signature:raw", + "value": raw_data + }) + return properties def parse_spec_file(self, spec_path): - """Parses a spec file to extract source and patch files using the specfile library.""" + """Parses a spec file to extract metadata and source/patch files using the specfile library.""" self.buildroot.root_log.debug("Parsing spec file using specfile library") if not os.path.isfile(spec_path): self.buildroot.root_log.debug(f"Spec file not found: {spec_path}") - return [] + return {}, [] from specfile import Specfile sources = [] + metadata = {} try: chroot_spec_path = self.buildroot.from_chroot_path(spec_path) # Use rpmspec --parse inside the build chroot to ensure macro expansion @@ -1419,11 +1681,39 @@ def parse_spec_file(self, spec_path): ) if not result: - return [] + return {}, [] # Use specfile to parse the expanded content spec = Specfile(content=result, sourcedir=os.path.dirname(spec_path)) + + import rpm + # Extract canonical metadata + metadata = { + "name": spec.expanded_name, + "version": spec.expanded_version, + "release": spec.expanded_release, + "license": spec.expanded_license, + } + # Extract BuildRequires and Requires from headers + try: + br = spec.rpm_spec.sourceHeader[rpm.RPMTAG_REQUIRENAME] + metadata["build_requires"] = [ + r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) + for r in br + ] if br else [] + except (AttributeError, KeyError): + metadata["build_requires"] = [] + + try: + r = spec.rpm_spec.packages[0].header[rpm.RPMTAG_REQUIRENAME] + metadata["requires"] = [ + req.decode('utf-8', 'replace') if isinstance(req, bytes) else str(req) + for req in r + ] if r else [] + except (AttributeError, KeyError, IndexError): + metadata["requires"] = [] + # Extract both sources and patches from the spec object model all_locs = [] with spec.sources() as spec_sources: @@ -1434,7 +1724,7 @@ def parse_spec_file(self, spec_path): for loc in all_locs: # Extract hash if present in Source (format: filename#hash) filename, _, hash_value = loc.partition('#') - + # Extract actual filename from URL or path actual_filename = os.path.basename(filename) @@ -1459,12 +1749,12 @@ def parse_spec_file(self, spec_path): "digital_signature": signature }) - self.buildroot.root_log.debug(f"Extracted {len(sources)} source/patch files from spec") + self.buildroot.root_log.debug(f"Extracted metadata {metadata} and {len(sources)} source/patch files from spec") except Exception as e: self.buildroot.root_log.debug(f"Failed to parse spec file {spec_path}: {e}") self.buildroot.root_log.debug(traceback.format_exc()) - - return sources + + return metadata, sources def get_file_signature(self, file_path): """Attempts to detect if a file has a digital signature.""" @@ -1563,25 +1853,57 @@ def detect_chroot_distribution(self): def get_build_toolchain_packages(self): """Returns the list of packages installed in the build toolchain - with detailed signature information.""" + with detailed signature information collected in a single batch query.""" try: - # First get basic package info - query = "%{NAME}|%{VERSION}-%{RELEASE}.%{ARCH}|%{LICENSE}|%{BUILDTIME}\n" + # Get detailed package info including signature data in one batch query + # Tags: Name, EVR, License, BuildTime, Signature data (RSA, DSA, GPG, PGP) + fields = [ + "%{NAME}", + "%{VERSION}-%{RELEASE}.%{ARCH}", + "%{LICENSE}", + "%{BUILDTIME}", + "%{RSAHEADER:pgpsig}", + "%{DSAHEADER:pgpsig}", + "%{SIGGPG:pgpsig}", + "%{SIGPGP:pgpsig}", + "%{SHA256HEADER}", + "%{SOURCERPM}" + ] + query = "|".join(fields) + "\n" cmd = ["rpm", "-qa", "--qf", query] output, _ = self.buildroot.doChroot( cmd, shell=False, returnOutput=True, printOutput=False ) + packages = [] cpe_vendor_default = self.detect_chroot_distribution() or "unknown" for line in output.splitlines(): - parts = line.split("|", 3) - if len(parts) < 3: + parts = line.split("|") + if len(parts) < 5: continue + package_name = parts[0].strip() package_version = parts[1].strip() package_license = parts[2].strip() - build_time = parts[3].strip() if len(parts) > 3 else None + build_time = parts[3].strip() + + # Signature data is in the middle parts + raw_sig = None + for sig_candidate in parts[4:8]: + sig_candidate = sig_candidate.strip() + if sig_candidate and sig_candidate != "(none)": + raw_sig = sig_candidate + break + + # Checksum is part 8, SOURCERPM is part 9 + package_checksum = parts[8].strip() if len(parts) > 8 else None + if package_checksum == "(none)": + package_checksum = None + + source_rpm = parts[9].strip() if len(parts) > 9 else None + if source_rpm == "(none)": + source_rpm = None # Skip GPG keys and other non-package entries if ( @@ -1591,31 +1913,41 @@ def get_build_toolchain_packages(self): ): continue - # Get detailed signature info for this package - digital_signature = self.get_package_signature_from_chroot(package_name) + # Prepare signature info structure + digital_signature = { + "signature_type": "unsigned", + "signature_key": None, + "signature_date": None, + "signature_algorithm": None, + "signature_valid": False, + "raw_signature_data": raw_sig, + "build_date": None + } + + if raw_sig: + self._parse_signature_data(raw_sig, digital_signature) - # Build date + # Build date from metadata if build_time and build_time.isdigit(): try: dt = datetime.fromtimestamp(int(build_time), tz=timezone.utc) digital_signature["build_date"] = dt.isoformat() except (ValueError, TypeError, OverflowError): - digital_signature["build_date"] = None + pass cpe = self.generate_cpe(package_name, package_version, vendor=cpe_vendor_default) - # Get package checksum (SHA-256 of the RPM file) - package_checksum = self.get_package_checksum_from_chroot(package_name) - packages.append({ "name": package_name, "version": package_version, "licenseDeclared": package_license, "digital_signature": digital_signature, + "sourcerpm": source_rpm, "cpe": cpe, "checksum": package_checksum }) - self.buildroot.root_log.debug(f"Found {len(packages)} build toolchain packages") + + self.buildroot.root_log.debug(f"Found {len(packages)} build toolchain packages with integrated signature metadata") return packages # pylint: disable=broad-exception-caught except Exception as e: @@ -1699,177 +2031,83 @@ def _parse_signature_data(self, sig_data, signature_info): signature_info["signature_type"] = "unsigned" signature_info["signature_valid"] = False - def get_package_signature_from_chroot(self, package_name): - """Gets detailed signature information for a specific package from inside the chroot.""" - try: - cmd = ["rpm", "-qi", package_name] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - - signature_info = { - "signature_type": "unsigned", - "signature_key": None, - "signature_date": None, - "signature_algorithm": None, - "signature_valid": False, - "raw_signature_data": None, - "build_date": None - } - - for line in output.splitlines(): - line = line.strip() - if line.startswith("Signature"): - # Extract the signature data after the colon - sig_data = line.split(":", 1)[1].strip() if ":" in line else "" - signature_info["raw_signature_data"] = sig_data - self._parse_signature_data(sig_data, signature_info) - break - - return signature_info - - # pylint: disable=broad-exception-caught - except Exception as e: - self.buildroot.root_log.debug( - f"Failed to get signature for package {package_name}: {e}" - ) - return { - "signature_type": "unknown", - "signature_valid": False, - "error": str(e) - } - - def get_package_detailed_signature(self, package_name): - """Gets detailed signature information for a specific package.""" - try: - # Try to use rpm --root to query from outside the chroot first - # If that fails, fall back to running inside the chroot - root_path = self.buildroot.rootdir - cmd = f"rpm --root {shlex.quote(root_path)} -qi {shlex.quote(package_name)}" - result = subprocess.run( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - text=True, check=False - ) - output = result.stdout - - # If host rpm command failed (empty output), try running inside chroot - if not output.strip(): - self.buildroot.root_log.debug( - f"Host RPM command failed for {package_name}, trying inside chroot..." - ) - # Use buildroot's doChroot method to run the command inside the chroot - cmd = ["rpm", "-qi", package_name] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - self.buildroot.root_log.debug( - f"Chroot RPM output for {package_name}: {output[:200]}..." - ) # Debug output - - signature_info = { - "signature_type": None, - "signature_key": None, - "signature_date": None, - "signature_algorithm": None, - "signature_valid": None, - "raw_signature_data": None, - "build_date": None - } - - output_lines = output.splitlines() - i = 0 - signature_found = False - self.buildroot.root_log.debug( - f"DEBUG: Processing {len(output_lines)} lines for package {package_name}" - ) - while i < len(output_lines): - line = output_lines[i].strip() - self.buildroot.root_log.debug(f"DEBUG: Line {i}: '{line}'") - if line.startswith("Signature"): - signature_found = True - self.buildroot.root_log.debug(f"DEBUG: Found signature line: '{line}'") - # Extract the signature data after the colon - sig_data = line.split(":", 1)[1].strip() if ":" in line else "" - signature_info["raw_signature_data"] = sig_data - self.buildroot.root_log.debug(f"DEBUG: Extracted signature data: '{sig_data}'") - self._parse_signature_data(sig_data, signature_info) - i += 1 - continue - - if line.startswith("Build Date"): - # This can help verify the package build time - build_date = line.split(":", 1)[1].strip() if ":" in line else None - if build_date: - signature_info["build_date"] = build_date - i += 1 - - # If no signature line was found, mark as unsigned - if not signature_found: - signature_info["signature_type"] = "unsigned" - signature_info["signature_valid"] = False - - return signature_info - - # pylint: disable=broad-exception-caught - except Exception as e: - self.buildroot.root_log.debug( - f"Failed to get detailed signature for package {package_name}: {e}" - ) - return { - "signature_type": "unknown", - "signature_valid": False, - "error": str(e) - } - def get_rpm_metadata(self, rpm_path): - """Extracts metadata from an RPM file.""" + """Extracts metadata from an RPM file using python-rpm bindings.""" if not os.path.isfile(rpm_path): self.buildroot.root_log.debug(f"RPM file not found: {rpm_path}") return {} - # Use individual rpm queries instead of trying to output JSON directly try: - metadata = {} + import rpm + ts = rpm.TransactionSet() + with open(rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) - # Get each field individually - fields = { - "name": "%{NAME}", - "version": "%{VERSION}", - "release": "%{RELEASE}", - "arch": "%{ARCH}", - "epoch": "%{EPOCH}", - "summary": "%{SUMMARY}", - "license": "%{LICENSE}", - "vendor": "%{VENDOR}", - "url": "%{URL}", - "packager": "%{PACKAGER}", - "buildtime": "%{BUILDTIME}", - "buildhost": "%{BUILDHOST}", - "sourcerpm": "%{SOURCERPM}", - "group": "%{GROUP}", - "distribution": "%{DISTRIBUTION}" + # Map of internal names to RPM tags + tag_map = { + "name": rpm.RPMTAG_NAME, + "version": rpm.RPMTAG_VERSION, + "release": rpm.RPMTAG_RELEASE, + "arch": rpm.RPMTAG_ARCH, + "epoch": rpm.RPMTAG_EPOCH, + "summary": rpm.RPMTAG_SUMMARY, + "license": rpm.RPMTAG_LICENSE, + "vendor": rpm.RPMTAG_VENDOR, + "url": rpm.RPMTAG_URL, + "packager": rpm.RPMTAG_PACKAGER, + "buildtime": rpm.RPMTAG_BUILDTIME, + "buildhost": rpm.RPMTAG_BUILDHOST, + "sourcerpm": rpm.RPMTAG_SOURCERPM, + "group": rpm.RPMTAG_GROUP, + "distribution": rpm.RPMTAG_DISTRIBUTION, + "sha256": rpm.RPMTAG_SHA256HEADER } + metadata = {} + for field_name, tag in tag_map.items(): + value = hdr[tag] + + # Special handling for certain types + if field_name == "epoch" and value is None: + value = "(none)" + elif value is None: + value = "" + elif isinstance(value, bytes): + value = value.decode('utf-8', errors='replace') + + metadata[field_name] = str(value) + + self.buildroot.root_log.debug(f"RPM metadata extracted natively: {metadata['name']}-{metadata['version']}") + return metadata + + except Exception as e: + self.buildroot.root_log.debug(f"Failed to extract RPM metadata from {rpm_path} natively: {e}") + # Fallback to subprocess if native method fails (should be rare) + return self._get_rpm_metadata_subprocess(rpm_path) + + def _get_rpm_metadata_subprocess(self, rpm_path): + """Fallback method to extract metadata using rpm command-line.""" + metadata = {} + fields = { + "name": "%{NAME}", "version": "%{VERSION}", "release": "%{RELEASE}", + "arch": "%{ARCH}", "epoch": "%{EPOCH}", "summary": "%{SUMMARY}", + "license": "%{LICENSE}", "vendor": "%{VENDOR}", "url": "%{URL}", + "packager": "%{PACKAGER}", "buildtime": "%{BUILDTIME}", + "buildhost": "%{BUILDHOST}", "sourcerpm": "%{SOURCERPM}", + "group": "%{GROUP}", "distribution": "%{DISTRIBUTION}" + } + try: for field_name, field_format in fields.items(): cmd = ["rpm", "-qp", rpm_path, "--queryformat", field_format] - result = subprocess.run( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True - ) + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) value = result.stdout.strip() - # Handle empty epoch (rpm returns empty string for no epoch) if field_name == "epoch" and not value: value = "(none)" metadata[field_name] = value - - self.buildroot.root_log.debug(f"RPM metadata extracted: {metadata}") return metadata - - except subprocess.CalledProcessError as e: - self.buildroot.root_log.debug(f"RPM command failed for {rpm_path}: {e.stderr}") - return {} # pylint: disable=broad-exception-caught except Exception as e: - self.buildroot.root_log.debug(f"Failed to extract RPM metadata: {e}") + self.buildroot.root_log.debug(f"Failed to extract RPM metadata via subprocess for {rpm_path}: {e}") return {} def get_rpm_file_list(self, rpm_path): @@ -1907,6 +2145,12 @@ def get_rpm_file_info(self, rpm_path): filemodes = hdr[rpm.RPMTAG_FILEMODES] fileusernames = hdr[rpm.RPMTAG_FILEUSERNAME] filegroupnames = hdr[rpm.RPMTAG_FILEGROUPNAME] + + # Detect digest algorithm + try: + algo = hdr[rpm.RPMTAG_FILEDIGESTALGO] + except (KeyError, IndexError): + algo = 8 # Default to SHA256 for i, basename in enumerate(basenames): dirname = dirnames[dirindexes[i]] @@ -1928,7 +2172,7 @@ def get_rpm_file_info(self, rpm_path): digest = None mode = filemodes[i] - # Format permissions as octal string (e.g., 0100755) to match rpm --dump format + # Format permissions as octal string permissions = f"0{mode:o}" owner = fileusernames[i] @@ -1940,14 +2184,15 @@ def get_rpm_file_info(self, rpm_path): group = group.decode('utf-8', 'replace') file_info[filename] = { - "sha256": digest, + "hash": digest, + "algo": algo, "permissions": permissions, "owner": owner, "group": group } self.buildroot.root_log.debug( - f"File info for RPM {rpm_path}: {len(file_info)} files processed" + f"File info for RPM {rpm_path}: {len(file_info)} files processed (Algo: {algo})" ) return file_info @@ -1958,35 +2203,55 @@ def get_rpm_file_info(self, rpm_path): return {} def get_rpm_dependencies(self, rpm_path): - """Extracts the list of dependencies from an RPM file.""" - cmd = ["rpm", "-qpR", rpm_path] + """Extracts the list of dependencies from an RPM file natively.""" try: - result = subprocess.run( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True - ) - dependencies = result.stdout.splitlines() - self.buildroot.root_log.debug(f"Dependencies for RPM {rpm_path}: {dependencies}") - return dependencies - except subprocess.CalledProcessError as e: - self.buildroot.root_log.debug(f"Failed to get dependencies for {rpm_path}: {e.stderr}") - return [] + import rpm + ts = rpm.TransactionSet() + with open(rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) + + # Use rpm.labelCompare etc to format if needed, but for now + # we just extract the requirement names. + requirements = hdr[rpm.RPMTAG_REQUIRENAME] + if not requirements: + return [] + + # Convert bytes to strings + return [r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) for r in requirements] + except Exception as e: + self.buildroot.root_log.debug(f"Failed to extract dependencies natively for {rpm_path}: {e}") + try: + cmd = ["rpm", "-qpR", rpm_path] + result = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True + ) + return result.stdout.splitlines() + except Exception: + return [] def get_rpm_signature(self, rpm_path): """Extracts the GPG signature of an RPM file.""" - cmd = ["rpm", "-qpi", rpm_path] + # Try subprocess first as it's more reliable for getting the formatted string try: - result = subprocess.run( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True - ) + # Try to get it via queryformat first (most machine-readable if successful) + cmd = ["rpm", "-qp", "--queryformat", "%{SIGPGP:pgpsig} %{SIGGPG:pgpsig}", rpm_path] + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + sig = result.stdout.strip() + if sig and sig != "(none) (none)" and sig != "(none)": + return sig.replace("(none)", "").strip() + + # Fallback to parsing rpm -qip output (always works for human-readable) + cmd = ["rpm", "-qip", rpm_path] + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) for line in result.stdout.splitlines(): - if line.startswith("Signature"): - # Extract the signature data after the colon - sig_data = line.split(":", 1)[1].strip() if ":" in line else "" - self.buildroot.root_log.debug(f"GPG Signature for {rpm_path}: {sig_data}") - return sig_data + if "Signature" in line and ":" in line: + sig_val = line.split(":", 1)[1].strip() + if sig_val and sig_val != "(none)": + return sig_val + return None - except subprocess.CalledProcessError as e: - self.buildroot.root_log.debug(f"Failed to get GPG signature for {rpm_path}: {e.stderr}") + except Exception as e: + self.buildroot.root_log.debug(f"Failed to extract signature for {rpm_path}: {e}") return None def hash_file(self, file_path): @@ -2003,68 +2268,49 @@ def hash_file(self, file_path): return None def extract_source_files_from_srpm(self, src_rpm_path): - """Extracts source files from a source RPM.""" - - self.buildroot.root_log.debug(f"Extracting source files from source RPM: {src_rpm_path}") + """Extracts metadata for source files from a source RPM without full extraction.""" + self.buildroot.root_log.debug(f"Extracting source metadata from source RPM: {src_rpm_path}") source_files = [] + if not os.path.isfile(src_rpm_path): + return source_files try: - temp_dir = tempfile.mkdtemp(prefix="sbom-srpm-") - try: - # Use rpm2archive instead of rpm2cpio to handle large files (>4GB) - # rpm2archive creates a .tgz file in the current directory - extract_cmd = ["rpm2archive", src_rpm_path] - subprocess.run( - extract_cmd, cwd=temp_dir, check=True, stderr=subprocess.PIPE, text=True - ) + import rpm + ts = rpm.TransactionSet() + with open(src_rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) - # Find the generated archive (should be only one file ending in .tgz or .tar.gz) - archive_file = None - for f in os.listdir(temp_dir): - if f.endswith(".tgz") or f.endswith(".tar.gz"): - archive_file = os.path.join(temp_dir, f) - break + basenames = hdr[rpm.RPMTAG_BASENAMES] + digests = hdr[rpm.RPMTAG_FILEDIGESTS] - if archive_file: - tar_cmd = ["tar", "-xf", archive_file] - subprocess.run(tar_cmd, cwd=temp_dir, check=True) - os.remove(archive_file) - else: - self.buildroot.root_log.debug( - f"rpm2archive did not produce expected output for {src_rpm_path}" - ) + # Create a set for quick lookup of signature files + file_set = set(basenames) - except (subprocess.CalledProcessError, OSError) as e: - self.buildroot.root_log.debug(f"Failed to unpack source RPM {src_rpm_path}: {e}") - mockbuild.file_util.rmtree(temp_dir) - return source_files + for filename, sha256 in zip(basenames, digests): + if filename.endswith(".spec"): + continue - for root_dir, _, files in os.walk(temp_dir): - for file_name in files: - if file_name.endswith(".spec"): - continue - file_path = os.path.join(root_dir, file_name) - sha256 = self.hash_file(file_path) - signature = self.get_file_signature(file_path) - source_files.append({ - "filename": file_name, - "sha256": sha256, - "digital_signature": signature - }) - try: - mockbuild.file_util.rmtree(temp_dir) - except OSError: - pass + signature = None + if filename.endswith(".asc") or filename.endswith(".sig"): + signature = "File is a signature file" + else: + for ext in [".asc", ".sig"]: + if filename + ext in file_set: + signature = f"GPG signature file exists: {filename}{ext}" + break - print(f"Extracted source files from source RPM: {source_files}") - # pylint: disable=broad-exception-caught + source_files.append({ + "filename": filename, + "sha256": sha256, + "digital_signature": signature + }) except Exception as e: - print(f"Failed to extract source files from source RPM {src_rpm_path}: {e}") + self.buildroot.root_log.debug(f"Failed to extract source metadata from {src_rpm_path}: {e}") return source_files def _generate_spdx_document(self, name, version, release, build_dir, rpm_files, - source_files, toolchain_components, distro_id): + source_files, toolchain_components, distro_id, spec_metadata=None): """Generates the full SPDX document.""" doc_spdx_id = "SPDXRef-DOCUMENT" creation_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") @@ -2109,6 +2355,17 @@ def _generate_spdx_document(self, name, version, release, build_dir, rpm_files, "relationshipType": "CONTAINS" }) + # Prepare toolchain name to SPDXID map for relationships + tc_name_to_id = {} + if spec_metadata and toolchain_components: + for tc in toolchain_components: + pkg_name = tc.get("name") + pkg_version = tc.get("version") + if pkg_name and pkg_version: + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', pkg_name) + safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', pkg_version) + tc_name_to_id[pkg_name.lower()] = f"SPDXRef-Package-{safe_name}-{safe_ver}" + # Add Build Artifacts (RPMs) for rpm_file in rpm_files: rpm_path = os.path.join(build_dir, rpm_file) @@ -2122,6 +2379,18 @@ def _generate_spdx_document(self, name, version, release, build_dir, rpm_files, "relationshipType": "DESCRIBES" }) + # Add BUILD_DEPENDENCY_OF relationships + if spec_metadata: + build_reqs = spec_metadata.get("build_requires", []) + for req in build_reqs: + req_name = req.split()[0].lower() + if req_name in tc_name_to_id: + document["relationships"].append({ + "spdxElementId": tc_name_to_id[req_name], + "relatedSpdxElement": spdx_pkg["SPDXID"], + "relationshipType": "BUILD_DEPENDENCY_OF" + }) + return document def _create_spdx_package_from_rpm(self, rpm_path, distro_obj): @@ -2163,7 +2432,10 @@ def _create_spdx_package_from_rpm(self, rpm_path, distro_obj): package["supplier"] = f"Person: {packager}" # Checksums - rpm_hash = self.hash_file(rpm_path) + rpm_hash = pkg_data.get("sha256") + if not rpm_hash or rpm_hash == "(none)": + rpm_hash = self.hash_file(rpm_path) + if rpm_hash: package["checksums"] = [{ "algorithm": "SHA256", @@ -2180,7 +2452,8 @@ def _create_spdx_package_from_rpm(self, rpm_path, distro_obj): "referenceLocator": purl }) - cpe = self.generate_cpe(name, version) + vendor = pkg_data.get("vendor") + cpe = self.generate_cpe(name, version, vendor=vendor) if cpe: external_refs.append({ "referenceCategory": "SECURITY", From e47195177333a179f8f899b45b8254be97bcfdac Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Sat, 14 Mar 2026 11:55:06 -0400 Subject: [PATCH 21/25] QA fixes: - Use chroot-native RPM tools via doChroot for internal artifacts - Fix path mapping to bypass chroot for host-resident result files - Add regex-based fallback for spec parsing to handle legacy syntax - Consolidate imports and standardized toolchain PURLs Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_generator.py | 497 ++++++++++++-------- 1 file changed, 311 insertions(+), 186 deletions(-) diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index 991c8ce49..da37f4f4c 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -12,8 +12,6 @@ import re import socket import uuid -import tempfile -import shlex import traceback from datetime import datetime, timezone @@ -22,7 +20,7 @@ -import mockbuild.file_util + from mockbuild.trace_decorator import traceLog # pylint: disable=invalid-name @@ -1016,7 +1014,6 @@ def _create_built_package_component( ] # Add hierarchical grouping for "RPM Contents" - outputs_ref = "build:outputs" # This will be the "RPM Contents" group # Add hash of RPM file - REMOVED per user request to only have hashes for files contained in RPM # or if needed for PURL integrity, but we'll prioritize the "only" constraint. @@ -1152,7 +1149,7 @@ def _create_toolchain_component(self, toolchain_pkg, distro_obj): return None # Generate PURL and bom-ref - purl = self._generate_purl(package_name, version, distro_obj) + purl = self._generate_purl(package_name, version, distro_obj, arch=toolchain_pkg.get("arch")) bom_ref = purl component = { @@ -1373,8 +1370,8 @@ def _create_file_components(self, rpm_path, package_name, package_version, if not self.include_file_components: return [] - file_list = self.get_rpm_file_list(rpm_path) file_info = self.get_rpm_file_info(rpm_path) + file_list = sorted(file_info.keys()) file_components = [] for file_path in file_list: @@ -1663,16 +1660,22 @@ def _signature_info_to_properties(self, signature_info): def parse_spec_file(self, spec_path): """Parses a spec file to extract metadata and source/patch files using the specfile library.""" self.buildroot.root_log.debug("Parsing spec file using specfile library") + + sources = [] + metadata = { + "name": "", + "version": "", + "release": "", + "license": "", + "build_requires": [], + "requires": [] + } + if not os.path.isfile(spec_path): self.buildroot.root_log.debug(f"Spec file not found: {spec_path}") - return {}, [] - - from specfile import Specfile - - sources = [] - metadata = {} + return metadata, sources try: - chroot_spec_path = self.buildroot.from_chroot_path(spec_path) + chroot_spec_path = self.buildroot.from_chroot_path(spec_path) or spec_path # Use rpmspec --parse inside the build chroot to ensure macro expansion # matches the build environment exactly. cmd = ["rpmspec", "--parse", chroot_spec_path] @@ -1681,75 +1684,114 @@ def parse_spec_file(self, spec_path): ) if not result: - return {}, [] - - # Use specfile to parse the expanded content - spec = Specfile(content=result, sourcedir=os.path.dirname(spec_path)) - - import rpm - # Extract canonical metadata - metadata = { - "name": spec.expanded_name, - "version": spec.expanded_version, - "release": spec.expanded_release, - "license": spec.expanded_license, - } - - # Extract BuildRequires and Requires from headers - try: - br = spec.rpm_spec.sourceHeader[rpm.RPMTAG_REQUIRENAME] - metadata["build_requires"] = [ - r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) - for r in br - ] if br else [] - except (AttributeError, KeyError): - metadata["build_requires"] = [] - + # If doChroot returned empty, try reading local spec as fallback + try: + with open(spec_path, 'r', encoding='utf-8') as f: + result = f.read() + except Exception: + return metadata, sources + try: - r = spec.rpm_spec.packages[0].header[rpm.RPMTAG_REQUIRENAME] - metadata["requires"] = [ - req.decode('utf-8', 'replace') if isinstance(req, bytes) else str(req) - for req in r - ] if r else [] - except (AttributeError, KeyError, IndexError): - metadata["requires"] = [] - - # Extract both sources and patches from the spec object model - all_locs = [] - with spec.sources() as spec_sources: - all_locs.extend(s.location for s in spec_sources if s.location) - with spec.patches() as spec_patches: - all_locs.extend(p.location for p in spec_patches if p.location) - - for loc in all_locs: - # Extract hash if present in Source (format: filename#hash) - filename, _, hash_value = loc.partition('#') - - # Extract actual filename from URL or path - actual_filename = os.path.basename(filename) - - # Locate the file in the SOURCES directory - build_dir = os.path.dirname(spec_path) - sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") - file_path = os.path.join(sources_dir, actual_filename) - - actual_hash = None - if os.path.isfile(file_path): - actual_hash = self.hash_file(file_path) - elif hash_value: - actual_hash = hash_value - - signature = ( - self.get_file_signature(file_path) if os.path.isfile(file_path) else None - ) + from specfile import Specfile + # Use specfile to parse the expanded content + spec = Specfile(content=result, sourcedir=os.path.dirname(spec_path)) - sources.append({ - "filename": actual_filename, - "sha256": actual_hash, - "digital_signature": signature + + # Extract canonical metadata + metadata.update({ + "name": spec.expanded_name, + "version": spec.expanded_version, + "release": spec.expanded_release, + "license": spec.expanded_license, }) + + # Extract BuildRequires and Requires from headers + try: + br = spec.rpm_spec.sourceHeader[rpm.RPMTAG_REQUIRENAME] + metadata["build_requires"] = [ + r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) + for r in br + ] if br else [] + except (AttributeError, KeyError): + metadata["build_requires"] = [] + + try: + r = spec.rpm_spec.packages[0].header[rpm.RPMTAG_REQUIRENAME] + metadata["requires"] = [ + req.decode('utf-8', 'replace') if isinstance(req, bytes) else str(req) + for req in r + ] if r else [] + except (AttributeError, KeyError, IndexError): + metadata["requires"] = [] + + # Extract both sources and patches from the spec object model + all_locs = [] + with spec.sources() as spec_sources: + all_locs.extend(s.location for s in spec_sources if s.location) + with spec.patches() as spec_patches: + all_locs.extend(p.location for p in spec_patches if p.location) + + for loc in all_locs: + filename, _, hash_value = loc.partition('#') + actual_filename = os.path.basename(filename) + build_dir = os.path.dirname(spec_path) + sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") + file_path = os.path.join(sources_dir, actual_filename) + + actual_hash = None + if os.path.isfile(file_path): + actual_hash = self.hash_file(file_path) + elif hash_value: + actual_hash = hash_value + + signature = ( + self.get_file_signature(file_path) if os.path.isfile(file_path) else None + ) - self.buildroot.root_log.debug(f"Extracted metadata {metadata} and {len(sources)} source/patch files from spec") + sources.append({ + "filename": actual_filename, + "sha256": actual_hash, + "digital_signature": signature + }) + self.buildroot.root_log.debug(f"Extracted metadata {metadata} and {len(sources)} source/patch files from spec") + + # Double check we actually got metadata + if not metadata.get("name"): + raise ValueError("Empty metadata from Specfile") + + except Exception as e: + self.buildroot.root_log.debug(f"Specfile parsing failed, falling back to regex: {e}") + + # Ensure result is a string for regex + content = str(result) if result else "" + + # Fallback to simple regex parsing of the expanded result + name_match = (re.search(r'^Name:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^name\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + version_match = (re.search(r'^Version:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^version\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + release_match = (re.search(r'^Release:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^release\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + license_match = (re.search(r'^License:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^license\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + + metadata["name"] = name_match.group(1).strip() if name_match else "" + metadata["version"] = version_match.group(1).strip() if version_match else "" + metadata["release"] = release_match.group(1).strip() if release_match else "" + metadata["license"] = license_match.group(1).strip() if license_match else "" + + # Simple source/patch extraction from expanded spec + source_matches = re.finditer(r'^(Source|Patch)\d*:\s+(.+)$', content, re.MULTILINE) + for sm in source_matches: + loc = sm.group(2).strip() + filename = os.path.basename(loc.partition('#')[0]) + # Avoid duplicates + if not any(s['filename'] == filename for s in sources): + sources.append({ + "filename": filename, + "sha256": None, + "digital_signature": None + }) except Exception as e: self.buildroot.root_log.debug(f"Failed to parse spec file {spec_path}: {e}") self.buildroot.root_log.debug(traceback.format_exc()) @@ -1859,7 +1901,8 @@ def get_build_toolchain_packages(self): # Tags: Name, EVR, License, BuildTime, Signature data (RSA, DSA, GPG, PGP) fields = [ "%{NAME}", - "%{VERSION}-%{RELEASE}.%{ARCH}", + "%{VERSION}-%{RELEASE}", + "%{ARCH}", "%{LICENSE}", "%{BUILDTIME}", "%{RSAHEADER:pgpsig}", @@ -1880,28 +1923,29 @@ def get_build_toolchain_packages(self): for line in output.splitlines(): parts = line.split("|") - if len(parts) < 5: + if len(parts) < 6: continue package_name = parts[0].strip() package_version = parts[1].strip() - package_license = parts[2].strip() - build_time = parts[3].strip() + package_arch = parts[2].strip() + package_license = parts[3].strip() + build_time = parts[4].strip() - # Signature data is in the middle parts + # Signature data is in the middle parts (parts 5-8) raw_sig = None - for sig_candidate in parts[4:8]: + for sig_candidate in parts[5:9]: sig_candidate = sig_candidate.strip() if sig_candidate and sig_candidate != "(none)": raw_sig = sig_candidate break - # Checksum is part 8, SOURCERPM is part 9 - package_checksum = parts[8].strip() if len(parts) > 8 else None + # Checksum is part 9, SOURCERPM is part 10 + package_checksum = parts[9].strip() if len(parts) > 9 else None if package_checksum == "(none)": package_checksum = None - source_rpm = parts[9].strip() if len(parts) > 9 else None + source_rpm = parts[10].strip() if len(parts) > 10 else None if source_rpm == "(none)": source_rpm = None @@ -1940,6 +1984,7 @@ def get_build_toolchain_packages(self): packages.append({ "name": package_name, "version": package_version, + "arch": package_arch, "licenseDeclared": package_license, "digital_signature": digital_signature, "sourcerpm": source_rpm, @@ -2032,61 +2077,89 @@ def _parse_signature_data(self, sig_data, signature_info): signature_info["signature_valid"] = False def get_rpm_metadata(self, rpm_path): - """Extracts metadata from an RPM file using python-rpm bindings.""" + """Extracts metadata from an RPM file. + Uses doChroot if the file is within the chroot to ensure compatibility.""" if not os.path.isfile(rpm_path): self.buildroot.root_log.debug(f"RPM file not found: {rpm_path}") return {} + # If the file is in the chroot, and NOT in the result directory, use doChroot + # Note: resultdir is on the host, doChroot can't see it easily. + chroot_path = self.buildroot.from_chroot_path(rpm_path) + if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): + return self._get_rpm_metadata_chroot(chroot_path) + + # Fallback to host-native or host-subprocess for non-chroot files + return self._get_rpm_metadata_native(rpm_path) + + def _get_rpm_metadata_chroot(self, chroot_rpm_path): + """Extracts metadata using rpm -qp inside the chroot.""" + fields = { + "name": "%{NAME}", "version": "%{VERSION}", "release": "%{RELEASE}", + "arch": "%{ARCH}", "epoch": "%{EPOCH}", "summary": "%{SUMMARY}", + "license": "%{LICENSE}", "vendor": "%{VENDOR}", "url": "%{URL}", + "packager": "%{PACKAGER}", "buildtime": "%{BUILDTIME}", + "buildhost": "%{BUILDHOST}", "sourcerpm": "%{SOURCERPM}", + "group": "%{GROUP}", "distribution": "%{DISTRIBUTION}", + "sha256": "%{SHA256HEADER}" + } + + metadata = {} + try: + query = "|".join(fields.values()) + cmd = ["rpm", "-qp", "--queryformat", query, chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + if output: + parts = output.split("|") + for i, field_name in enumerate(fields.keys()): + if i < len(parts): + val = parts[i].strip() + if field_name == "epoch" and (not val or val == "(none)"): + val = "0" + metadata[field_name] = val + return metadata + except Exception as e: + self.buildroot.root_log.debug(f"Failed to extract metadata via doChroot for {chroot_rpm_path}: {e}") + return {} + + def _get_rpm_metadata_native(self, rpm_path): + """Extracts metadata using native host bindings (fallback).""" + # pylint: disable=no-member try: - import rpm ts = rpm.TransactionSet() with open(rpm_path, "rb") as f: hdr = ts.hdrFromFdno(f.fileno()) - # Map of internal names to RPM tags tag_map = { - "name": rpm.RPMTAG_NAME, - "version": rpm.RPMTAG_VERSION, - "release": rpm.RPMTAG_RELEASE, - "arch": rpm.RPMTAG_ARCH, - "epoch": rpm.RPMTAG_EPOCH, - "summary": rpm.RPMTAG_SUMMARY, - "license": rpm.RPMTAG_LICENSE, - "vendor": rpm.RPMTAG_VENDOR, - "url": rpm.RPMTAG_URL, - "packager": rpm.RPMTAG_PACKAGER, - "buildtime": rpm.RPMTAG_BUILDTIME, - "buildhost": rpm.RPMTAG_BUILDHOST, - "sourcerpm": rpm.RPMTAG_SOURCERPM, - "group": rpm.RPMTAG_GROUP, - "distribution": rpm.RPMTAG_DISTRIBUTION, - "sha256": rpm.RPMTAG_SHA256HEADER + "name": rpm.RPMTAG_NAME, "version": rpm.RPMTAG_VERSION, + "release": rpm.RPMTAG_RELEASE, "arch": rpm.RPMTAG_ARCH, + "epoch": rpm.RPMTAG_EPOCH, "summary": rpm.RPMTAG_SUMMARY, + "license": rpm.RPMTAG_LICENSE, "vendor": rpm.RPMTAG_VENDOR, + "url": rpm.RPMTAG_URL, "packager": rpm.RPMTAG_PACKAGER, + "buildtime": rpm.RPMTAG_BUILDTIME, "buildhost": rpm.RPMTAG_BUILDHOST, + "sourcerpm": rpm.RPMTAG_SOURCERPM, "group": rpm.RPMTAG_GROUP, + "distribution": rpm.RPMTAG_DISTRIBUTION, "sha256": rpm.RPMTAG_SHA256HEADER } metadata = {} for field_name, tag in tag_map.items(): value = hdr[tag] - - # Special handling for certain types if field_name == "epoch" and value is None: - value = "(none)" + value = "0" elif value is None: value = "" elif isinstance(value, bytes): value = value.decode('utf-8', errors='replace') - metadata[field_name] = str(value) - - self.buildroot.root_log.debug(f"RPM metadata extracted natively: {metadata['name']}-{metadata['version']}") return metadata - - except Exception as e: - self.buildroot.root_log.debug(f"Failed to extract RPM metadata from {rpm_path} natively: {e}") - # Fallback to subprocess if native method fails (should be rare) + except Exception: return self._get_rpm_metadata_subprocess(rpm_path) def _get_rpm_metadata_subprocess(self, rpm_path): - """Fallback method to extract metadata using rpm command-line.""" + """Extracts metadata using host subprocess (last resort fallback).""" metadata = {} fields = { "name": "%{NAME}", "version": "%{VERSION}", "release": "%{RELEASE}", @@ -2102,124 +2175,144 @@ def _get_rpm_metadata_subprocess(self, rpm_path): result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) value = result.stdout.strip() if field_name == "epoch" and not value: - value = "(none)" + value = "0" metadata[field_name] = value return metadata - # pylint: disable=broad-exception-caught except Exception as e: self.buildroot.root_log.debug(f"Failed to extract RPM metadata via subprocess for {rpm_path}: {e}") return {} - def get_rpm_file_list(self, rpm_path): - """Extracts the list of files from an RPM file.""" - cmd = ["rpm", "-qpl", rpm_path] + + def get_rpm_file_info(self, rpm_path): + """Extracts file hashes, ownership, and permissions from an RPM file. + Uses doChroot for files within the chroot (and not in resultdir).""" + + chroot_path = self.buildroot.from_chroot_path(rpm_path) + if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): + return self._get_rpm_file_info_chroot(chroot_path) + + return self._get_rpm_file_info_native(rpm_path) + + def _get_rpm_file_info_chroot(self, chroot_rpm_path): + """Extracts file info using rpm -qp inside the chroot.""" + file_info = {} try: - result = subprocess.run( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True + # Query format for files: path|hash|mode|user|group + qf = "[%{FILENAMES}|%{FILEDIGESTS}|%{FILEMODES:octal}|%{FILEUSERNAME}|%{FILEGROUPNAME}\\n]" + cmd = ["rpm", "-qp", "--queryformat", qf, chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False ) - files = result.stdout.splitlines() - self.buildroot.root_log.debug(f"Files in RPM {rpm_path}: {files}") - return files - except subprocess.CalledProcessError as e: - self.buildroot.root_log.debug(f"Failed to get file list for {rpm_path}: {e.stderr}") - return [] + + # Detect digest algorithm from header + cmd_algo = ["rpm", "-qp", "--queryformat", "%{FILEDIGESTALGO}", chroot_rpm_path] + algo_out, _ = self.buildroot.doChroot( + cmd_algo, shell=False, returnOutput=True, printOutput=False + ) + try: + algo = int(algo_out.strip()) if algo_out and algo_out.strip() else 8 + except ValueError: + algo = 8 - def get_rpm_file_info(self, rpm_path): - """Extracts file hashes, ownership, and permissions from an RPM file using rpm-python.""" + for line in output.splitlines(): + parts = line.split("|") + if len(parts) >= 5: + filename = parts[0] + file_info[filename] = { + "hash": parts[1] if parts[1] and parts[1] != "(none)" else None, + "algo": algo, + "permissions": parts[2], + "owner": parts[3], + "group": parts[4] + } + return file_info + except Exception as e: + self.buildroot.root_log.debug(f"Failed to get file info via doChroot for {chroot_rpm_path}: {e}") + return {} + + def _get_rpm_file_info_native(self, rpm_path): + """Extracts file information using native host bindings (fallback).""" # pylint: disable=no-member file_info = {} try: ts = rpm.TransactionSet() # pylint: disable=protected-access ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES | rpm._RPMVSF_NODIGESTS) - # pylint: enable=protected-access with open(rpm_path, "rb") as f: hdr = ts.hdrFromFdno(f.fileno()) - # Use dirnames/basenames/dirindexes to construct paths reliably - dirnames = hdr[rpm.RPMTAG_DIRNAMES] basenames = hdr[rpm.RPMTAG_BASENAMES] + dirnames = hdr[rpm.RPMTAG_DIRNAMES] dirindexes = hdr[rpm.RPMTAG_DIRINDEXES] - filedigests = hdr[rpm.RPMTAG_FILEDIGESTS] filemodes = hdr[rpm.RPMTAG_FILEMODES] fileusernames = hdr[rpm.RPMTAG_FILEUSERNAME] filegroupnames = hdr[rpm.RPMTAG_FILEGROUPNAME] - - # Detect digest algorithm + try: algo = hdr[rpm.RPMTAG_FILEDIGESTALGO] except (KeyError, IndexError): - algo = 8 # Default to SHA256 + algo = 8 + file_info = {} for i, basename in enumerate(basenames): dirname = dirnames[dirindexes[i]] - - # Decode bytes to strings if needed if isinstance(dirname, bytes): dirname = dirname.decode('utf-8', 'replace') if isinstance(basename, bytes): basename = basename.decode('utf-8', 'replace') - filename = os.path.join(dirname, basename) digest = filedigests[i] if isinstance(digest, bytes): digest = digest.decode('utf-8') - # Empty digest usually means empty string or all zeros - if not digest: - digest = None - - mode = filemodes[i] - # Format permissions as octal string - permissions = f"0{mode:o}" - - owner = fileusernames[i] - if isinstance(owner, bytes): - owner = owner.decode('utf-8', 'replace') - - group = filegroupnames[i] - if isinstance(group, bytes): - group = group.decode('utf-8', 'replace') - file_info[filename] = { - "hash": digest, + "hash": digest if digest else None, "algo": algo, - "permissions": permissions, - "owner": owner, - "group": group + "permissions": f"0{filemodes[i]:o}", + "owner": fileusernames[i].decode('utf-8', 'replace') if isinstance(fileusernames[i], bytes) else fileusernames[i], + "group": filegroupnames[i].decode('utf-8', 'replace') if isinstance(filegroupnames[i], bytes) else filegroupnames[i] } - - self.buildroot.root_log.debug( - f"File info for RPM {rpm_path}: {len(file_info)} files processed (Algo: {algo})" - ) return file_info - - # pylint: disable=broad-exception-caught - except Exception as e: - self.buildroot.root_log.debug(f"Failed to get file info for {rpm_path}: {e}") - self.buildroot.root_log.debug(traceback.format_exc()) + except Exception: return {} def get_rpm_dependencies(self, rpm_path): - """Extracts the list of dependencies from an RPM file natively.""" + """Extracts the list of dependencies from an RPM file. + Uses doChroot for files within the chroot (and not in resultdir).""" + + chroot_path = self.buildroot.from_chroot_path(rpm_path) + if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): + return self._get_rpm_dependencies_chroot(chroot_path) + + return self._get_rpm_dependencies_native(rpm_path) + + def _get_rpm_dependencies_chroot(self, chroot_rpm_path): + """Extracts dependencies using rpm -qpR inside the chroot.""" + try: + cmd = ["rpm", "-qpR", chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + return output.splitlines() if output else [] + except Exception: + return [] + + def _get_rpm_dependencies_native(self, rpm_path): + """Extracts dependencies using native host bindings (fallback).""" + # pylint: disable=no-member try: - import rpm ts = rpm.TransactionSet() with open(rpm_path, "rb") as f: hdr = ts.hdrFromFdno(f.fileno()) - # Use rpm.labelCompare etc to format if needed, but for now - # we just extract the requirement names. requirements = hdr[rpm.RPMTAG_REQUIRENAME] if not requirements: return [] - # Convert bytes to strings return [r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) for r in requirements] - except Exception as e: - self.buildroot.root_log.debug(f"Failed to extract dependencies natively for {rpm_path}: {e}") + except Exception: # pylint: disable=broad-exception-caught try: cmd = ["rpm", "-qpR", rpm_path] result = subprocess.run( @@ -2230,17 +2323,51 @@ def get_rpm_dependencies(self, rpm_path): return [] def get_rpm_signature(self, rpm_path): - """Extracts the GPG signature of an RPM file.""" - # Try subprocess first as it's more reliable for getting the formatted string + """Extracts the GPG signature of an RPM file. + Uses doChroot for files within the chroot (and not in resultdir).""" + + chroot_path = self.buildroot.from_chroot_path(rpm_path) + if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): + return self._get_rpm_signature_chroot(chroot_path) + + return self._get_rpm_signature_host(rpm_path) + + def _get_rpm_signature_chroot(self, chroot_rpm_path): + """Extracts signature using rpm inside the chroot.""" + try: + # Try to get it via queryformat first + cmd = ["rpm", "-qp", "--queryformat", "%{SIGPGP:pgpsig} %{SIGGPG:pgpsig}", chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + sig = output.strip() if output else "" + if sig and sig != "(none) (none)" and sig != "(none)": + return sig.replace("(none)", "").strip() + + # Fallback to rpm -qip + cmd = ["rpm", "-qip", chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + if output: + for line in output.splitlines(): + if "Signature" in line and ":" in line: + sig_val = line.split(":", 1)[1].strip() + if sig_val and sig_val != "(none)": + return sig_val + return None + except Exception: + return None + + def _get_rpm_signature_host(self, rpm_path): + """Extracts signature using host tools (fallback).""" try: - # Try to get it via queryformat first (most machine-readable if successful) cmd = ["rpm", "-qp", "--queryformat", "%{SIGPGP:pgpsig} %{SIGGPG:pgpsig}", rpm_path] result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) sig = result.stdout.strip() if sig and sig != "(none) (none)" and sig != "(none)": return sig.replace("(none)", "").strip() - # Fallback to parsing rpm -qip output (always works for human-readable) cmd = ["rpm", "-qip", rpm_path] result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) for line in result.stdout.splitlines(): @@ -2248,10 +2375,8 @@ def get_rpm_signature(self, rpm_path): sig_val = line.split(":", 1)[1].strip() if sig_val and sig_val != "(none)": return sig_val - return None - except Exception as e: - self.buildroot.root_log.debug(f"Failed to extract signature for {rpm_path}: {e}") + except Exception: return None def hash_file(self, file_path): @@ -2269,12 +2394,12 @@ def hash_file(self, file_path): def extract_source_files_from_srpm(self, src_rpm_path): """Extracts metadata for source files from a source RPM without full extraction.""" + # pylint: disable=no-member self.buildroot.root_log.debug(f"Extracting source metadata from source RPM: {src_rpm_path}") source_files = [] if not os.path.isfile(src_rpm_path): return source_files try: - import rpm ts = rpm.TransactionSet() with open(src_rpm_path, "rb") as f: hdr = ts.hdrFromFdno(f.fileno()) From fee9a68f0ea1d5a1cf3b4379a3364cfa889da377 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Sat, 14 Mar 2026 12:00:55 -0400 Subject: [PATCH 22/25] Update SBOM docs - move internal tests out of the project Signed-off-by: Scott R. Shinn --- docs/Plugin-SBOM.md | 59 ++++++++-------- mock/tests/plugins/test_sbom_generator.py | 82 ----------------------- 2 files changed, 27 insertions(+), 114 deletions(-) delete mode 100644 mock/tests/plugins/test_sbom_generator.py diff --git a/docs/Plugin-SBOM.md b/docs/Plugin-SBOM.md index ce0e5f301..680f0cf70 100644 --- a/docs/Plugin-SBOM.md +++ b/docs/Plugin-SBOM.md @@ -7,16 +7,19 @@ This plugin generates a Software Bill of Materials (SBOM) in CycloneDX format fo ## Features -* Generates SBOM in CycloneDX 1.5 format (JSON) -* Captures information about: - * Source files and patches from spec files - * Binary RPM metadata with PURL and CPE identifiers - * Complete build toolchain packages - * Runtime dependencies - * File hashes (SHA-256) - * GPG signatures with detailed metadata -* Outputs SBOM as JSON file in the build results directory -* Compatible with security scanners (Grype, Trivy, Snyk) +* Generates SBOM in CycloneDX 1.5 format (JSON) and SPDX 2.3 format +* Deep Chroot Integration: + * Uses the target distribution's own `rpm` binary via `doChroot` for metadata extraction, ensuring 100% version compatibility across different distributions. + * Correctly handles path mapping between chroot and host environments. +* Captures detailed information about: + * Source files and patches from spec files with a resilient regex-based fallback for legacy/strict syntax errors. + * Binary RPM metadata with standard PURL and CPE identifiers. + * Complete build toolchain packages with per-package GPG signature metadata. + * Runtime dependencies. + * File hashes (SHA-256). +* Optimized Performance: Consolidated file listing and metadata extraction into a single pass. +* Outputs SBOM in the build results directory. +* Compatible with security scanners (Grype, Trivy, Snyk). ## Usage @@ -35,18 +38,7 @@ mock --enable-plugin=sbom_generator --rebuild ~/rpmbuild/SRPMS/package-1.0-1.fc4 mock --enable-plugin=sbom_generator --rebuild package.src.rpm -r rocky-9-x86_64 ``` -After the build completes, the SBOM will be available in the build results directory: - -```bash -# Find the SBOM file -ls /var/lib/mock/*/result/sbom.cyclonedx.json - -# View the SBOM -cat /var/lib/mock/rocky-9-x86_64/result/sbom.cyclonedx.json | jq . - -# Get build results directory path -mock --resultdir package.src.rpm -``` +After the build completes, the SBOM will be available in the build results directory ### Viewing and Analyzing the SBOM @@ -73,6 +65,10 @@ jq '.dependencies[] | select(.ref | contains("httpd"))' sbom.cyclonedx.json The SBOM can be directly used with security vulnerability scanners: ```bash + +# Scan with SBOM Auditor +sbom-auditor sbom.cyclonedx.json + # Scan with Grype grype sbom:./sbom.cyclonedx.json @@ -138,9 +134,9 @@ config_opts['plugin_conf']['sbom_generator_opts'] = { ## Output -The plugin generates a file named `--.sbom` in the build results directory (typically `/var/lib/mock/fedora-42-x86_64/result/`). The SBOM includes: +The plugin generates a file named `--.sbom` (for CycloneDX) or `--.spdx.json` (for SPDX) in the build results directory. The SBOM includes: -* CycloneDX document metadata +* CycloneDX/SPDX document metadata * Build timestamp * Tool information (Mock SBOM Generator) * Mock-specific build properties (host, distribution, chroot, config) @@ -300,18 +296,17 @@ The SBOM includes PURL (Package URL) and CPE identifiers for accurate vulnerabil ## Requirements * Python 3.x -* RPM tools for package metadata extraction * Access to build environment for package information +* Native `rpm` and `specfile` libraries (recommended) ## Notes -* The plugin runs in the `postbuild` hook, after the build completes -* SBOM generation is skipped if no RPM, source RPM, or spec file is found -* The plugin is designed to work with both source and binary RPM builds -* Build environment information is collected using `rpm -qa` command -* All build toolchain packages are captured, providing complete build provenance -* PURL format: `pkg:rpm/{distro}/{package}@{version}?arch={arch}` -* Mock-specific metadata is stored in component and metadata properties with `mock:` prefix +* The plugin runs in the `postbuild` hook, after the build completes. +* SBOM generation is skipped if no RPM, source RPM, or spec file is found. +* **Hybrid Analysis**: Uses `doChroot` to analyze artifacts within the buildroot (ensuring compatibility with target RPM versions) and host tools for artifacts already exported to the `result/` directory. +* **Resilient Parsing**: Includes a regex-based fallback for spec files that fail strict parsing by the `specfile` library (e.g., legacy `%patchN` syntax). +* **PURL format**: `pkg:rpm/{distro}/{package}@{version}?arch={arch}`. Architecture is always separated into a qualifier, never baked into the version string. +* Mock-specific metadata is stored in properties with the `mock:` prefix. ## Competitive Advantages diff --git a/mock/tests/plugins/test_sbom_generator.py b/mock/tests/plugins/test_sbom_generator.py deleted file mode 100644 index 20b3f3dd8..000000000 --- a/mock/tests/plugins/test_sbom_generator.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -import unittest -from unittest.mock import MagicMock, patch -import sys -from pathlib import Path - -# Add mock/py to sys.path so we can import mockbuild -sys.path.insert(0, str(Path(__file__).parents[2] / "mock" / "py")) - -from mockbuild.plugins.sbom_generator import SBOMGenerator - -class TestSBOMGenerator(unittest.TestCase): - def setUp(self): - self.plugins = MagicMock() - self.conf = {} - self.buildroot = MagicMock() - self.buildroot.rootdir = "/var/lib/mock/fedora-rawhide-x86_64/root" - self.buildroot.builddir = "/builddir" - self.buildroot.from_chroot_path = MagicMock(side_effect=lambda x: x.replace(self.buildroot.rootdir, "")) - - # Mocking root_log - self.buildroot.root_log = MagicMock() - - self.generator = SBOMGenerator(self.plugins, self.conf, self.buildroot) - - @patch('os.path.isfile') - @patch('os.path.isdir') - @patch('os.path.exists') - @patch('os.path.getsize') - def test_parse_spec_file_with_specfile_library(self, mock_getsize, mock_exists, mock_isdir, mock_isfile): - # We need to mock isfile for the spec file itself - def side_effect_isfile(path): - if path == "/builddir/SPECS/test.spec": - return True - return False - - mock_isfile.side_effect = side_effect_isfile - mock_exists.return_value = True - - spec_content = """ -Name: test-package -Version: 1.0.0 -Release: 1 -Summary: A test package -License: MIT - -Source0: https://example.com/source1.tar.gz -Source1: source2.tar.gz#sha256:1234567890abcdef -Patch0: patch1.diff - -%description -A test package for unit testing SBOM generator. - -%files -""" - # Mock doChroot to return the expanded spec content - self.buildroot.doChroot.return_value = (spec_content, 0) - - # Mock hash_file to return a dummy hash - with patch.object(SBOMGenerator, 'hash_file', return_value="deadbeef"): - # Mock get_file_signature - with patch.object(SBOMGenerator, 'get_file_signature', return_value=None): - # We also need to mock os.path.dirname and os.path.join if they behave differently on host - # but standard ones should be fine. - - sources = self.generator.parse_spec_file("/builddir/SPECS/test.spec") - - # Should have 3 items: source1, source2, and patch1 - self.assertEqual(len(sources), 3) - - # Verify source 0 (from URL) - self.assertEqual(sources[0]['filename'], "source1.tar.gz") - - # Verify source 1 (with inline hash) - self.assertEqual(sources[1]['filename'], "source2.tar.gz") - self.assertEqual(sources[1]['sha256'], "sha256:1234567890abcdef") - - # Verify patch 0 - self.assertEqual(sources[2]['filename'], "patch1.diff") - -if __name__ == '__main__': - unittest.main() From e45e593a71e6011b75be0153354bf4ba994fe1ee Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Sat, 14 Mar 2026 12:10:41 -0400 Subject: [PATCH 23/25] Update the Feature request Signed-off-by: Scott R. Shinn --- releng/release-notes-next/sbom-generator.feature | 6 ------ releng/release-notes-next/sbom-generator.feature.md | 8 ++++++++ 2 files changed, 8 insertions(+), 6 deletions(-) delete mode 100644 releng/release-notes-next/sbom-generator.feature create mode 100644 releng/release-notes-next/sbom-generator.feature.md diff --git a/releng/release-notes-next/sbom-generator.feature b/releng/release-notes-next/sbom-generator.feature deleted file mode 100644 index 280a7b702..000000000 --- a/releng/release-notes-next/sbom-generator.feature +++ /dev/null @@ -1,6 +0,0 @@ -[cyclonedx] -Add support for generating Software Bill of Materials (SBOM) for built packages. -This plugin generates an SBOM in CycloneDX format, including detailed information -about the build environment, source files, and dependencies. -The plugin can be enabled via configuration or command line arguments. -This feature is available as an experimental plugin. diff --git a/releng/release-notes-next/sbom-generator.feature.md b/releng/release-notes-next/sbom-generator.feature.md new file mode 100644 index 000000000..7c2206f78 --- /dev/null +++ b/releng/release-notes-next/sbom-generator.feature.md @@ -0,0 +1,8 @@ +The new SBOM generator plugin provides comprehensive visibility into the build +environment by capturing the **complete build toolchain** installed in the +chroot, including per-package GPG signatures and vendor metadata. It establishes +full audit traceability by linking built RPMs with their original source +tarballs and patches, including SHA-256 hashes. Supporting both CycloneDX 1.5 +and SPDX 2.3 formats, the plugin leverages a chroot-native analysis model to +ensure high metadata accuracy for cross-distribution builds and compatibility +with modern security scanners. From 58c437bbd679ae38a6986efb83d1b18a009f6b89 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Fri, 27 Mar 2026 17:59:03 -0400 Subject: [PATCH 24/25] Reorganize into separate files Signed-off-by: Scott R. Shinn --- mock/py/mockbuild/plugins/sbom_cyclonedx.py | 941 ++++++++ mock/py/mockbuild/plugins/sbom_generator.py | 2009 +---------------- mock/py/mockbuild/plugins/sbom_spdx.py | 409 ++++ mock/py/mockbuild/plugins/sbom_utils.py | 756 +++++++ ...ator.feature.md => sbom-generator.feature} | 3 +- 5 files changed, 2151 insertions(+), 1967 deletions(-) create mode 100644 mock/py/mockbuild/plugins/sbom_cyclonedx.py create mode 100644 mock/py/mockbuild/plugins/sbom_spdx.py create mode 100644 mock/py/mockbuild/plugins/sbom_utils.py rename releng/release-notes-next/{sbom-generator.feature.md => sbom-generator.feature} (84%) diff --git a/mock/py/mockbuild/plugins/sbom_cyclonedx.py b/mock/py/mockbuild/plugins/sbom_cyclonedx.py new file mode 100644 index 000000000..f8366a18b --- /dev/null +++ b/mock/py/mockbuild/plugins/sbom_cyclonedx.py @@ -0,0 +1,941 @@ +# -*- coding: utf-8 -*- +# vim:expandtab:autoindent:tabstop=4:shiftwidth=4:filetype=python:textwidth=0: +# License: GPL2 or later see COPYING +# Written by Scott R. Shinn +# Copyright (C) 2026, Atomicorp, Inc. + +import os +import re +import uuid +from datetime import datetime, timezone + +""" +CycloneDX generation functions for the SBOM generator plugin. +""" + + +class CycloneDxGenerator: + """Helper class for generating CycloneDX documents.""" + + def __init__(self, rpm_helper, buildroot, conf=None): + self.rpm_helper = rpm_helper + self.buildroot = buildroot + self.conf = conf or {} + + # Configuration options for file-level dependencies and filtering + self.include_file_dependencies = self.conf.get("include_file_dependencies", False) + self.include_file_components = self.conf.get("include_file_components", True) + self.include_debug_files = self.conf.get("include_debug_files", False) + self.include_man_pages = self.conf.get("include_man_pages", True) + self.include_toolchain_dependencies = self.conf.get( + "include_toolchain_dependencies", False + ) + + def create_built_package_component( + self, rpm_path, distro_obj, _source_components=None + ): + """Creates a CycloneDX component for a built RPM package.""" + package_data = self.rpm_helper.get_rpm_metadata(rpm_path) + if not package_data: + self.buildroot.root_log.debug(f"[SBOM] FAILED to get metadata for {rpm_path}, skipping component") + return None + + package_name = package_data.get("name") + version = package_data.get("version") + release = package_data.get("release") + arch = package_data.get("arch") + + # Combine version and release + full_version = f"{version}-{release}" if release else version + + # Generate PURL and bom-ref + purl = self.rpm_helper.generate_purl(package_name, full_version, distro_obj, arch) + bom_ref = purl + + # Determine component type (application vs library) + component_type = "library" + + component = { + "type": component_type, + "bom-ref": bom_ref, + "name": package_name, + "version": full_version, + "purl": purl + } + + # Add external references (CPE) + vendor = package_data.get("vendor") + cpe = self.rpm_helper.generate_cpe(package_name, version, vendor=vendor) + if cpe: + component["externalReferences"] = [ + { + "type": "other", + "comment": "CPE 2.3", + "url": cpe + } + ] + + # Add license information + license_str = package_data.get("license") + if license_str and license_str != "(none)": + component["licenses"] = [{"expression": license_str}] + + # Add supplier information (from Packager field) + packager = package_data.get("packager") + if packager and packager != "(none)": + component["supplier"] = {"name": packager} + + # Add properties for RPM metadata + properties = [] + + properties.append({ + "name": "mock:rpm:filename", + "value": os.path.basename(rpm_path) + }) + + vendor = package_data.get("vendor") + if vendor and vendor != "(none)": + properties.append({"name": "mock:rpm:vendor", "value": vendor}) + + packager = package_data.get("packager") + if packager and packager != "(none)": + properties.append({"name": "mock:rpm:packager", "value": packager}) + + buildhost = package_data.get("buildhost") + if buildhost and buildhost != "(none)": + properties.append({"name": "mock:rpm:buildhost", "value": buildhost}) + + buildtime_iso = self.format_epoch_timestamp(package_data.get("buildtime")) + if buildtime_iso: + properties.append({"name": "mock:rpm:buildtime", "value": buildtime_iso}) + + group = package_data.get("group") + if group and group != "(none)": + properties.append({"name": "mock:rpm:group", "value": group}) + + epoch_val = package_data.get("epoch") + if epoch_val and epoch_val != "(none)": + properties.append({"name": "mock:rpm:epoch", "value": epoch_val}) + + distribution = package_data.get("distribution") + if distribution and distribution != "(none)": + properties.append({"name": "mock:rpm:distribution", "value": distribution}) + + url = package_data.get("url") + if url and url != "(none)": + component["externalReferences"] = component.get("externalReferences", []) + component["externalReferences"].append({"type": "website", "url": url}) + + summary = package_data.get("summary") + if summary and summary != "(none)": + component["description"] = summary + + # Add GPG signature information if available + signature = self.rpm_helper.get_rpm_signature(rpm_path) + if signature: + # Parse signature info + sig_props = self.parse_signature_to_properties(signature) + properties.extend(sig_props) + + if properties: + component["properties"] = properties + + return component + + def parse_signature_to_properties(self, signature_string): + """Parses RPM signature string into CycloneDX properties.""" + properties = [] + if not signature_string or signature_string == "(none)": + return properties + + properties.append({"name": "mock:signature:type", "value": "GPG"}) + + if "RSA/SHA256" in signature_string: + properties.append({"name": "mock:signature:algorithm", "value": "RSA/SHA256"}) + elif "DSA/SHA1" in signature_string: + properties.append({"name": "mock:signature:algorithm", "value": "DSA/SHA1"}) + elif "ECDSA/SHA256" in signature_string: + properties.append({"name": "mock:signature:algorithm", "value": "ECDSA/SHA256"}) + elif "Ed25519/SHA256" in signature_string: + properties.append({"name": "mock:signature:algorithm", "value": "Ed25519/SHA256"}) + + key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', signature_string) + if key_id_match: + properties.append({"name": "mock:signature:key", "value": key_id_match.group(1)}) + + date_match = re.search( + r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', signature_string + ) + if date_match: + properties.append({"name": "mock:signature:date", "value": date_match.group(1)}) + + properties.append({"name": "mock:signature:raw", "value": signature_string}) + return properties + + def signature_info_to_properties(self, signature_info): + """Converts signature info dict to CycloneDX properties.""" + properties = [] + sig_type = signature_info.get("signature_type", "unsigned") + properties.append({"name": "mock:signature:type", "value": sig_type}) + + if ( + sig_type not in ('unsigned', 'unknown') and + 'missing key' not in sig_type and + 'BAD' not in sig_type + ): + algorithm = signature_info.get("signature_algorithm") + if algorithm: + properties.append({"name": "mock:signature:algorithm", "value": algorithm}) + + key_id = signature_info.get("signature_key") + if key_id: + properties.append({"name": "mock:signature:key", "value": key_id}) + + sig_date = signature_info.get("signature_date") + if sig_date: + properties.append({"name": "mock:signature:date", "value": sig_date}) + + sig_valid = signature_info.get("signature_valid", False) + properties.append({"name": "mock:signature:valid", "value": str(sig_valid).lower()}) + + raw_data = signature_info.get("raw_signature_data") + if raw_data: + properties.append({"name": "mock:signature:raw", "value": raw_data}) + + return properties + + def create_cyclonedx_document(self): + """Initializes the base CycloneDX JSON structure.""" + return { + "bomFormat": "CycloneDX", + "specVersion": "1.5", + "serialNumber": f"urn:uuid:{uuid.uuid4()}", + "version": 1, + "metadata": {}, + "components": [], + "dependencies": [] + } + + def generate_bom_ref(self, package_name, version, _component_type="package"): + """Generates a stable bom-ref ID based on package name and version.""" + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', package_name) + safe_version = re.sub(r'[^a-zA-Z0-9.-]', '-', version) + return f"build-output:{safe_name}-{safe_version}" + + def generate_file_bom_ref(self, package_name, package_version, file_path): + """Generates a unique but stable bom-ref for a file.""" + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', package_name) + safe_version = re.sub(r'[^a-zA-Z0-9.-]', '-', package_version) + safe_path = re.sub(r'[^a-zA-Z0-9.-]', '-', file_path.lstrip('/')) + return f"file:{safe_name}-{safe_version}:{safe_path}" + + def add_source_components(self, _bom, source_files): + """Adds source files (from spec) to the components list.""" + source_components = [] + source_component_entries = [] + for src_file in source_files: + file_comp = self.create_source_file_component(src_file) + _bom["components"].append(file_comp) + source_components.append(file_comp) + source_component_entries.append({ + "filename": src_file["filename"], + "bom-ref": file_comp["bom-ref"] + }) + return source_components, source_component_entries + + def create_source_file_component(self, source_file): + """Creates a CycloneDX component for a source file.""" + filename = source_file["filename"] + sha256 = source_file.get("sha256") + sig = source_file.get("digital_signature") + + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', filename) + hash_suffix = sha256[:8] if sha256 else "unknown" + bom_ref = f"source-file:{safe_name}-{hash_suffix}" + + comp = { + "type": "file", + "bom-ref": bom_ref, + "name": filename, + "properties": [ + {"name": "mock:source:type", "value": "patch" if self.is_patch_file(filename) else "archive"} + ] + } + if sha256: + comp["hashes"] = [{"alg": "SHA-256", "content": sha256}] + if sig: + comp["properties"].append({"name": "mock:signature:status", "value": sig}) + + return comp + + def is_patch_file(self, filename): + """Determines if a file is a patch file based on common extensions.""" + patch_extensions = ['.patch', '.diff'] + return any(filename.lower().endswith(ext) for ext in patch_extensions) + + def format_epoch_timestamp(self, epoch_value): + """Converts an epoch integer to an ISO 8601 timestamp string.""" + try: + val_int = int(epoch_value) + dt = datetime.fromtimestamp(val_int, timezone.utc) + return dt.isoformat() + except (ValueError, TypeError): + return "" + + def append_source_properties(self, properties, source_entries): + """Appends source and patch references to a component's properties.""" + for i, src in enumerate(source_entries): + filename = src["filename"] + prop_name = f"mock:source:patch{i}" if self.is_patch_file(filename) else f"mock:source:file{i}" + properties.append({ + "name": prop_name, + "value": src["bom-ref"] + }) + + def get_source_file_bom_refs(self, _package_name, source_files): + """Returns a list of bom-refs for source files.""" + refs = [] + for src in source_files: + filename = src["filename"] + sha256 = src.get("sha256") + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', filename) + hash_suffix = sha256[:8] if sha256 else "unknown" + bom_ref = f"source-file:{safe_name}-{hash_suffix}" + refs.append(bom_ref) + return refs + + def get_iso_timestamp(self): + """Returns the current UTC time in ISO 8601 format.""" + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + def create_dependency(self, bom_ref, dependencies, component_map, distro_obj): + """Creates a dependency entry mapping raw requires to parsed bom-refs.""" + dep_entry = { + "ref": bom_ref, + "dependsOn": [] + } + for raw_dep in dependencies: + target_ref = self.dependency_to_bom_ref(raw_dep, component_map, distro_obj) + if target_ref and target_ref not in dep_entry["dependsOn"] and target_ref != bom_ref: + dep_entry["dependsOn"].append(target_ref) + + return dep_entry if dep_entry["dependsOn"] else None + + def dependency_to_bom_ref(self, dependency_string, component_map, _distro): + """ + Attempts to map a raw RPM dependency string (e.g., 'libc.so.6', 'bash >= 4.0') + to a concrete bom-ref in the component_map. + """ + if not dependency_string: + return None + + clean_dep = dependency_string.strip() + + if " " in clean_dep: + # Handle forms like 'bash >= 5.0' -> just look for 'bash' + pkg_name = clean_dep.split()[0].strip() + # If the requirement is a package name we know about + if pkg_name in component_map: + return component_map[pkg_name] + + # Sometimes dependencies look like 'config(bash) = 5.0' + if clean_dep.startswith("config(") and ")" in clean_dep: + inner_name = clean_dep[7:clean_dep.find(")")] + if inner_name in component_map: + return component_map[inner_name] + + else: + # Handle raw names like 'bash' or 'libc.so.6' + if clean_dep in component_map: + return component_map[clean_dep] + + # Check if any component *provides* this string (this is an approximation, + # true resolution requires full RPM capability mapping which is slow) + # For now, we rely on the direct package name match which covers 80% of cases. + + return None + def process_built_packages(self, bom, rpm_files, build_dir, distro_id, + source_component_entries, build_subject_name, + build_toolchain_packages, toolchain_bom_refs): + """Processes binary RPMs and creates structured CycloneDX components and dependencies.""" + built_package_bom_refs = [] + all_built_components = [] + component_map = {} + primary_rpm_metadata = None + + # Build component map from toolchain packages + for toolchain_pkg in build_toolchain_packages: + pkg_name = toolchain_pkg.get("name") + pkg_version = toolchain_pkg.get("version") + if pkg_name and pkg_version: + purl = self.rpm_helper.generate_purl(pkg_name, pkg_version, distro_id) + component_map[pkg_name.lower()] = purl + + for rpm_file in rpm_files: + rpm_path = os.path.join(build_dir, rpm_file) + component = self.create_built_package_component( + rpm_path, distro_id, source_component_entries + ) + if not component: + continue + + bom_ref = component.get("bom-ref") + package_name = component.get("name") + package_version = component.get("version") + + if bom_ref: + built_package_bom_refs.append(bom_ref) + if package_name: + component_map[package_name.lower()] = bom_ref + + bom["components"].append(component) + + # Determine primary RPM metadata + if not primary_rpm_metadata: + if not package_name or 'debuginfo' not in package_name.lower(): + primary_rpm_metadata = self.rpm_helper.get_rpm_metadata(rpm_path) + else: + current_name = primary_rpm_metadata.get('name', '').lower() + is_current_debuginfo = 'debuginfo' in current_name + should_replace = False + if (is_current_debuginfo and package_name and + 'debuginfo' not in package_name.lower()): + should_replace = True + elif (build_subject_name and package_name and + package_name.lower() == build_subject_name.lower()): + should_replace = True + + if should_replace: + self.buildroot.root_log.debug(f"[SBOM] Selecting {package_name} as primary metadata source") + primary_rpm_metadata = self.rpm_helper.get_rpm_metadata(rpm_path) + + # File components + if package_name and package_version and self.include_file_components: + # Extract CPE and GPG info from the component to pass to files + rpm_cpe = None + for ext_ref in component.get("externalReferences", []): + if ext_ref.get("comment") == "CPE 2.3": + rpm_cpe = ext_ref.get("url") + + rpm_gpg = None + for prop in component.get("properties", []): + if prop.get("name") == "mock:signature:key": + rpm_gpg = prop.get("value") + + file_components = self.create_file_components( + rpm_path, package_name, package_version, + rpm_cpe=rpm_cpe, rpm_gpg=rpm_gpg + ) + + if file_components: + if "components" not in component: + component["components"] = [] + + for file_comp in file_components: + # Set scope to required for all files in the produced RPM + file_comp["scope"] = "required" + component["components"].append(file_comp) + + if self.should_include_file_dependency(file_comp.get("name", "")): + bom["dependencies"].append({ + "ref": file_comp["bom-ref"], + "dependsOn": [bom_ref] + }) + + # Sort file components alphabetically + component["components"].sort(key=lambda x: x.get("name", "")) + + # Dependencies + dependencies = self.rpm_helper.get_rpm_dependencies(rpm_path) or [] + runtime_dependency = self.create_dependency( + bom_ref, dependencies, component_map, distro_id + ) + + all_depends_on = [] + if runtime_dependency and runtime_dependency.get("dependsOn"): + all_depends_on.extend(runtime_dependency.get("dependsOn")) + + if self.include_toolchain_dependencies and toolchain_bom_refs: + for t_ref in toolchain_bom_refs: + if t_ref not in all_depends_on: + all_depends_on.append(t_ref) + + all_depends_on = sorted(list(set(all_depends_on))) + if all_depends_on: + bom["dependencies"].append({"ref": bom_ref, "dependsOn": all_depends_on}) + elif runtime_dependency: + bom["dependencies"].append(runtime_dependency) + + all_built_components.append(component) + + return built_package_bom_refs, primary_rpm_metadata, all_built_components + + # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-positional-arguments + + def finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_refs, + build_subject_name, build_subject_version, + build_subject_release, distro_id, spec_metadata=None): + """Finalizes BOM metadata, sets the primary component, and adds RPM properties.""" + # Add BuildRequires and Requires from spec if available + if spec_metadata: + metadata_props = [] + build_reqs = spec_metadata.get("build_requires", []) + if build_reqs: + metadata_props.append({ + "name": "mock:spec:build_requires", + "value": ",".join(build_reqs) + }) + + reqs = spec_metadata.get("requires", []) + if reqs: + metadata_props.append({ + "name": "mock:spec:requires", + "value": ",".join(reqs) + }) + + if metadata_props: + bom["metadata"]["properties"] = bom["metadata"].get("properties", []) + bom["metadata"]["properties"].extend(metadata_props) + + if primary_rpm_metadata: + if "properties" not in bom["metadata"]: + bom["metadata"]["properties"] = [] + rpm_props = bom["metadata"]["properties"] + for key, prop_name in [("buildhost", "mock:rpm:buildhost"), + ("buildtime", "mock:rpm:buildtime"), + ("group", "mock:rpm:group"), + ("epoch", "mock:rpm:epoch"), + ("distribution", "mock:rpm:distribution")]: + val = primary_rpm_metadata.get(key) + if val and val != "(none)" and (key != "epoch" or val.strip()): + rpm_props.append({"name": prop_name, "value": val}) + + vendor = primary_rpm_metadata.get("vendor") + if vendor and vendor != "(none)": + bom["metadata"]["manufacturer"] = {"name": vendor} + bom["metadata"]["authors"] = [{"name": vendor}] + + packager = primary_rpm_metadata.get("packager") + if packager and packager != "(none)": + bom["metadata"]["supplier"] = {"name": packager} + + if built_package_bom_refs: + if len(built_package_bom_refs) == 1: + primary_ref = built_package_bom_refs[0] + primary_component = next((c for c in bom["components"] + if c.get("bom-ref") == primary_ref), None) + if primary_component: + component_obj = { + "type": primary_component.get("type", "application"), + "name": primary_component.get("name"), + "version": primary_component.get("version"), + "bom-ref": primary_ref, + "purl": primary_component.get("purl") + } + if primary_component.get("description"): + component_obj["description"] = primary_component.get("description") + elif primary_rpm_metadata: + summary = primary_rpm_metadata.get("summary") + if summary and summary != "(none)": + component_obj["description"] = summary + + external_refs = [] + if primary_rpm_metadata: + sourcerpm = primary_rpm_metadata.get("sourcerpm") + if sourcerpm and sourcerpm != "(none)": + external_refs.append({"type": "distribution", "url": sourcerpm}) + url = primary_rpm_metadata.get("url") + if url and url != "(none)": + external_refs.append({"type": "website", "url": url}) + if external_refs: + component_obj["externalReferences"] = external_refs + + if primary_component.get("licenses"): + component_obj["licenses"] = primary_component.get("licenses") + elif primary_rpm_metadata: + lic = primary_rpm_metadata.get("license") + if lic and lic != "(none)": + component_obj["licenses"] = [{"expression": lic}] + bom["metadata"]["component"] = component_obj + else: + first_pkg = next((c for c in bom["components"] + if c.get("bom-ref") == built_package_bom_refs[0]), None) + if first_pkg: + aggregate_name = build_subject_name or first_pkg.get("name", "unknown") + aggregate_version = None + if build_subject_version and build_subject_release: + aggregate_version = f"{build_subject_version}-{build_subject_release}" + elif primary_rpm_metadata: + v = primary_rpm_metadata.get("version") + r = primary_rpm_metadata.get("release") + if v and r: + aggregate_version = f"{v}-{r}" + if not aggregate_version: + aggregate_version = first_pkg.get("version", "unknown") + + description = ( + f"Build output containing {len(built_package_bom_refs)} package(s)" + ) + if primary_rpm_metadata: + summary = primary_rpm_metadata.get("summary") + if summary and summary != "(none)": + description = f"{summary} ({description})" + + component_obj = { + "type": "application", + "name": aggregate_name, + "version": aggregate_version, + "bom-ref": f"build-output:{aggregate_name}", + "description": description + } + if primary_rpm_metadata: + lic = primary_rpm_metadata.get("license") + if lic and lic != "(none)": + component_obj["licenses"] = [{"expression": lic}] + elif spec_metadata and spec_metadata.get("license"): + component_obj["licenses"] = [{"expression": spec_metadata["license"]}] + + if aggregate_name and aggregate_version: + component_obj["purl"] = self.rpm_helper.generate_purl( + aggregate_name, aggregate_version, distro_id + ) + bom["metadata"]["component"] = component_obj + + # pylint: disable=too-many-locals,too-many-branches,too-many-statements + + def finalize_dependencies(self, bom, source_component_entries, + build_toolchain_packages, distro_id, + built_package_bom_refs, toolchain_bom_refs, + spec_metadata=None, + source_components=None, + toolchain_components=None, + all_built_components=None): + """Finalizes BOM dependencies, linking primary package to hierarchical grouping components + and implementing nested component composition.""" + # Find primary component ref (metadata.component or first built package) + primary_ref = None + if bom.get("metadata") and bom["metadata"].get("component"): + primary_ref = bom["metadata"]["component"].get("bom-ref") + + if not primary_ref: + return + + # Create virtual grouping references + inputs_ref = "build:inputs" + toolchain_ref = "build:toolchain" + outputs_ref = "build:outputs" + + # Prepare grouping components + inputs_group = { + "type": "application", + "bom-ref": inputs_ref, + "name": "Build Inputs", + "description": "Source code and patches used for the build", + "properties": [{"name": "mock:type", "value": "grouping-node"}] + } + if source_components: + inputs_group["components"] = sorted(source_components, key=lambda x: x.get("name", "")) + + toolchain_group = { + "type": "application", + "bom-ref": toolchain_ref, + "name": "Build Toolchain", + "description": "Packages and tools used to perform the build", + "scope": "excluded", # Tools are not part of the runtime payload + "properties": [{"name": "mock:type", "value": "grouping-node"}] + } + if toolchain_components: + # Group toolchain components by their GPG Key ID + signer_groups = {} + pkg_map = {p.get("name"): p for p in build_toolchain_packages} + + for comp in toolchain_components: + comp["scope"] = "excluded" + pkg_info = pkg_map.get(comp.get("name")) + sig_info = pkg_info.get("digital_signature", {}) if pkg_info else {} + key_id = sig_info.get("signature_key", "unsigned") + + # Attach signature properties to the individual package component + if sig_info: + sig_props = self.signature_info_to_properties(sig_info) + comp["properties"] = comp.get("properties", []) + comp["properties"].extend([p for p in sig_props if p["name"] != "mock:signature:raw"]) + + if key_id not in signer_groups: + # Create group properties - common only to the signer + group_props = [ + {"name": "mock:role", "value": "build-toolchain"}, + {"name": "mock:type", "value": "signer-group"}, + {"name": "mock:signature:key", "value": key_id} + ] + + signer_groups[key_id] = { + "type": "application", + "bom-ref": f"signer:{key_id}", + "name": f"Packages signed by {key_id}" if key_id != "unsigned" else "Unsigned Packages", + "scope": "excluded", + "properties": group_props, + "components": [] + } + signer_groups[key_id]["components"].append(comp) + + # Add signer groups as children of toolchain_group + sorted_groups = sorted( + list(signer_groups.values()), + key=lambda x: x.get("name", "") + ) + for group in sorted_groups: + group["components"].sort(key=lambda x: x.get("name", "")) + + toolchain_group["components"] = sorted_groups + + outputs_group = { + "type": "application", + "bom-ref": outputs_ref, + "name": "RPM Contents", + "description": "RPM packages and their contained files produced by the build", + "scope": "required", + "properties": [{"name": "mock:type", "value": "grouping-node"}] + } + if all_built_components: + outputs_group["components"] = sorted(all_built_components, key=lambda x: x.get("name", "")) + + # Nest groups into the primary component + primary_comp = bom["metadata"]["component"] + primary_comp["components"] = [inputs_group, toolchain_group, outputs_group] + # Sort metadata components alphabetically + primary_comp["components"].sort(key=lambda x: x.get("name", "")) + + # 1. Primary component depends on the three groups + bom["dependencies"].append({ + "ref": primary_ref, + "dependsOn": sorted([inputs_ref, toolchain_ref, outputs_ref]) + }) + + # 2. Build Inputs Group -> Source components + input_deps = [] + for entry in source_component_entries: + if entry.get("bom-ref"): + input_deps.append(entry["bom-ref"]) + + if input_deps: + bom["dependencies"].append({ + "ref": inputs_ref, + "dependsOn": sorted(list(set(input_deps))) + }) + + # 3. Build Toolchain Group -> Signer Groups + signer_refs = [g["bom-ref"] for g in toolchain_group.get("components", [])] + if signer_refs: + bom["dependencies"].append({ + "ref": toolchain_ref, + "dependsOn": sorted(signer_refs) + }) + + # 3b. Signer Groups -> Individual packages + for group in toolchain_group["components"]: + pkg_refs = [c["bom-ref"] for c in group["components"]] + bom["dependencies"].append({ + "ref": group["bom-ref"], + "dependsOn": sorted(pkg_refs) + }) + + # 4. RPM Contents Group -> Built RPMs (Packages) + if built_package_bom_refs: + bom["dependencies"].append({ + "ref": outputs_ref, + "dependsOn": sorted(list(set(built_package_bom_refs))) + }) + + + + def create_toolchain_component(self, toolchain_pkg, distro_obj): + """Creates a CycloneDX component for a build toolchain package.""" + package_name = toolchain_pkg.get("name") + version = toolchain_pkg.get("version") + + if not package_name or not version: + return None + + # Generate PURL and bom-ref + purl = self.rpm_helper.generate_purl(package_name, version, distro_obj, arch=toolchain_pkg.get("arch")) + bom_ref = purl + + component = { + "type": "library", + "bom-ref": bom_ref, + "name": package_name, + "version": version, + "purl": purl + } + + # Add checksum - REMOVED per user request to only have hashes for files contained in RPM + # (This follows the rule that only the 'RPM Contents' section should have hashes) + # checksum = toolchain_pkg.get("checksum") + # if checksum and checksum != "error" and not checksum.startswith("error"): + # if len(checksum) == 64: + # alg = "SHA-256" + # elif len(checksum) == 40: + # alg = "SHA-1" + # else: + # alg = "SHA-256" + # component["hashes"] = [{"alg": alg, "content": checksum}] + + # Add CPE + cpe = toolchain_pkg.get("cpe") + if cpe: + component["externalReferences"] = [ + { + "type": "other", + "comment": "CPE 2.3", + "url": cpe + } + ] + + # Add license + license_str = toolchain_pkg.get("licenseDeclared") + if license_str and license_str != "(none)": + component["licenses"] = [ + { + "expression": license_str + } + ] + + # Add properties + properties = [] + + # Add build date if available + signature_info = toolchain_pkg.get("digital_signature", {}) + build_date = signature_info.get("build_date") + if build_date: + properties.append({ + "name": "mock:build:date", + "value": build_date + }) + + if properties: + component["properties"] = properties + + return component + + + def create_file_components(self, rpm_path, package_name, package_version, + rpm_cpe=None, rpm_gpg=None): + """Creates file components for all files in an RPM package.""" + if not self.include_file_components: + return [] + + file_info = self.rpm_helper.get_rpm_file_info(rpm_path) + if not file_info: + return [] + + file_list = sorted(file_info.keys()) + + file_components = [] + for file_path in file_list: + if not file_path or not file_path.strip(): + continue + + # Filtering logic + if not self.include_debug_files and ("/usr/lib/debug/" in file_path or "/usr/src/debug/" in file_path): + self.buildroot.root_log.debug(f"[SBOM] Filtering debug file: {file_path}") + continue + + if not self.include_man_pages and ("/usr/share/man/" in file_path): + self.buildroot.root_log.debug(f"[SBOM] Filtering man page: {file_path}") + continue + + # Filter files based on configuration + if not self.include_debug_files: + if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): + continue + + file_data = file_info.get(file_path, {}) + file_hash = file_data.get("hash") + algo_id = file_data.get("algo") + + bom_ref = self.generate_file_bom_ref(package_name, package_version, file_path) + component = { + "type": "file", + "bom-ref": bom_ref, + "name": file_path + } + + # Add hash if available with detected algorithm + if file_hash: + # Map RPM algo ID to CycloneDX algo name + # 8: SHA-256, 10: SHA-512, 1: MD5, 2: SHA-1 + algo_map = { + 8: "SHA-256", + 10: "SHA-512", + 1: "MD5", + 2: "SHA-1", + 9: "SHA-384", + 11: "SHA-224" + } + alg_name = algo_map.get(algo_id, "SHA-256") + + component["hashes"] = [ + { + "alg": alg_name, + "content": file_hash + } + ] + + # Add properties for file metadata + properties = [] + if file_data.get("permissions"): + properties.append({ + "name": "mock:file:permissions", + "value": file_data["permissions"] + }) + if file_data.get("owner"): + properties.append({ + "name": "mock:file:owner", + "value": file_data["owner"] + }) + if file_data.get("group"): + properties.append({ + "name": "mock:file:group", + "value": file_data["group"] + }) + + if rpm_cpe: + properties.append({ + "name": "mock:package:cpe", + "value": rpm_cpe + }) + if rpm_gpg: + properties.append({ + "name": "mock:package:gpg:key", + "value": rpm_gpg + }) + + if properties: + component["properties"] = properties + + file_components.append(component) + + return file_components + + + def should_include_file_dependency(self, file_path): + """Determine if a file should have a dependency entry.""" + if not self.include_file_dependencies: + return False + + # Filter out debug files if configured + if not self.include_debug_files: + if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): + return False + + # Filter out man pages if configured + if not self.include_man_pages: + if ( + '/usr/share/man/' in file_path or + (file_path.endswith('.gz') and '/man' in file_path) + ): + return False + + return True + diff --git a/mock/py/mockbuild/plugins/sbom_generator.py b/mock/py/mockbuild/plugins/sbom_generator.py index da37f4f4c..c506cb018 100644 --- a/mock/py/mockbuild/plugins/sbom_generator.py +++ b/mock/py/mockbuild/plugins/sbom_generator.py @@ -5,18 +5,16 @@ # Copyright (C) 2026, Atomicorp, Inc. """Mock plugin for generating CycloneDX SBOMs from built RPM packages.""" +from mockbuild.plugins.sbom_utils import RpmQueryHelper +from mockbuild.plugins.sbom_spdx import SpdxGenerator +from mockbuild.plugins.sbom_cyclonedx import CycloneDxGenerator import os import json import subprocess -import hashlib -import re import socket -import uuid import traceback from datetime import datetime, timezone -import distro -import rpm # pylint: disable=no-member @@ -32,19 +30,14 @@ def init(plugins, conf, buildroot): """Initializes the SBOM generator plugin.""" # Ensure configuration exists for the plugin - if "sbom_generator_opts" not in conf: - conf["sbom_generator_opts"] = {} - - # Check for valid SBOM type configuration - opts = conf["sbom_generator_opts"] - if "type" in opts and opts["type"] not in ("cyclonedx", "spdx"): + if "type" in conf and conf["type"] not in ("cyclonedx", "spdx"): # We only support cyclonedx and spdx for now buildroot.root_log.warning( - f"SBOM generator type '{opts['type']}' not supported, defaulting to 'cyclonedx'" + f"SBOM generator type '{conf['type']}' not supported, defaulting to 'cyclonedx'" ) - opts["type"] = "cyclonedx" + conf["type"] = "cyclonedx" - SBOMGenerator(plugins, conf["sbom_generator_opts"], buildroot) + SBOMGenerator(plugins, conf, buildroot) class SBOMGenerator: """Generates SBOM for the built packages.""" @@ -53,10 +46,13 @@ class SBOMGenerator: def __init__(self, plugins, conf, buildroot): self.buildroot = buildroot + self.conf = conf + self.rpm_helper = RpmQueryHelper(self.buildroot) + self.spdx_gen = SpdxGenerator(self.rpm_helper, self.buildroot, conf=self.conf) + self.cdx_gen = CycloneDxGenerator(self.rpm_helper, self.buildroot, conf=self.conf) self.state = buildroot.state self.rootdir = buildroot.rootdir self.builddir = buildroot.builddir - self.conf = conf self.sbom_enabled = self.conf.get('generate_sbom', True) self.sbom_type = self.conf.get('type', 'cyclonedx') self.sbom_done = False @@ -89,7 +85,7 @@ def _capture_prebuild_state(self): if file.endswith('.spec'): spec_file = os.path.join(specs_dir, file) self.buildroot.root_log.debug(f"Parsing spec file for pre-build state: {spec_file}") - metadata, sources = self.parse_spec_file(spec_file) + metadata, sources = self.rpm_helper.parse_spec_file(spec_file) self.prebuild_spec_metadata = metadata self.prebuild_source_files = sources break @@ -98,18 +94,6 @@ def _capture_prebuild_state(self): except Exception as e: self.buildroot.root_log.debug(f"Failed to capture pre-build state: {e}") - def _create_cyclonedx_document(self): - """Creates the base CycloneDX document structure.""" - return { - "bomFormat": "CycloneDX", - "specVersion": "1.5", - "serialNumber": f"urn:uuid:{uuid.uuid4()}", - "version": 1, - "metadata": self._create_metadata(), - "components": [], - "dependencies": [] - } - def _create_metadata(self): """Creates CycloneDX metadata object with Mock-specific build information.""" metadata = { @@ -150,7 +134,7 @@ def _create_metadata(self): "value": socket.gethostname() }) - distro_name = self.get_distribution() + distro_name = self.rpm_helper.get_distribution() if distro_name: properties.append({ "name": "mock:build:distribution", @@ -338,25 +322,6 @@ def _contains_flag(flag): return properties - def _generate_purl(self, package_name, version, distro_obj=None, arch=None): - """Generate Package URL (PURL) for RPM package.""" - if not distro_obj: - distro_obj = self.detect_chroot_distribution() or "fedora" - - # Clean package name for PURL (lowercase, no special chars) - clean_name = re.sub(r'[^a-zA-Z0-9._-]', '-', package_name.lower()) - - purl = f"pkg:rpm/{distro_obj}/{clean_name}@{version}" - if arch: - purl += f"?arch={arch}" - return purl - - def _generate_bom_ref(self, package_name, version, _component_type="package"): - """Generate a unique bom-ref identifier for a component.""" - # Use PURL as bom-ref for consistency - distro_obj = self.detect_chroot_distribution() or "fedora" - return self._generate_purl(package_name, version, distro_obj) - def _find_build_artifacts(self, build_dir): """Locates RPMs, source RPMs, and spec files in the build directory.""" rpm_files = [] @@ -409,7 +374,7 @@ def _get_build_subject_metadata(self, spec_file, src_rpm_files, build_dir): build_subject_version = spec_metadata.get("version") build_subject_release = spec_metadata.get("release") elif spec_file: - spec_metadata, parsed_sources = self.parse_spec_file(spec_file) + spec_metadata, parsed_sources = self.rpm_helper.parse_spec_file(spec_file) if spec_metadata: build_subject_name = spec_metadata.get("name") build_subject_version = spec_metadata.get("version") @@ -419,7 +384,7 @@ def _get_build_subject_metadata(self, spec_file, src_rpm_files, build_dir): if src_rpm_files: srpm_path = os.path.join(build_dir, src_rpm_files[0]) - srpm_metadata = self.get_rpm_metadata(srpm_path) + srpm_metadata = self.rpm_helper.get_rpm_metadata(srpm_path) if srpm_metadata: if not build_subject_name: build_subject_name = srpm_metadata.get("name") @@ -430,14 +395,16 @@ def _get_build_subject_metadata(self, spec_file, src_rpm_files, build_dir): if not source_files: # Extract metadata for source files from source RPM without full extraction - source_files = self.extract_source_files_from_srpm(srpm_path) + source_files = self.rpm_helper.extract_source_files_from_srpm(srpm_path) # Record the source RPM itself as an input artifact srpm_name = src_rpm_files[0] - srpm_sig = self.get_rpm_signature(srpm_path) + srpm_sig = self.rpm_helper.get_rpm_signature(srpm_path) + srpm_hash = self.rpm_helper.hash_file(srpm_path) # Add to the beginning of the list for visibility source_files.insert(0, { "filename": srpm_name, + "sha256": srpm_hash, "digital_signature": srpm_sig, "source_type": "source_rpm" }) @@ -447,28 +414,12 @@ def _get_build_subject_metadata(self, spec_file, src_rpm_files, build_dir): build_subject_release, source_files ) - def _add_source_components(self, _bom, source_files): - """Converts source files to CycloneDX components and returns components and metadata entries.""" - source_components = [] - source_component_entries = [] - for source_file in source_files: - component = self._create_source_file_component(source_file) - if component: - source_components.append(component) - filename = source_file.get("filename") - source_component_entries.append({ - "filename": filename, - "bom-ref": component.get("bom-ref"), - "type": "patch" if self._is_patch_file(filename) else "source" - }) - return source_components, source_component_entries - def _add_toolchain_components(self, _bom, build_toolchain_packages, distro_id): """Adds toolchain components to the BOM and returns their components and bom-refs.""" toolchain_components = [] toolchain_bom_refs = [] for toolchain_pkg in build_toolchain_packages: - component = self._create_toolchain_component(toolchain_pkg, distro_id) + component = self.cdx_gen.create_toolchain_component(toolchain_pkg, distro_id) if component: bom_ref = component.get("bom-ref") if bom_ref: @@ -480,6 +431,7 @@ def _add_toolchain_components(self, _bom, build_toolchain_packages, distro_id): # pylint: disable=too-many-locals def _generate_sbom_post_build_hook(self): """Plugin hook called after the build is complete.""" + self.buildroot.root_log.debug("[SBOM] Starting post-build SBOM generation") if self.sbom_done or not self.sbom_enabled: return @@ -503,12 +455,12 @@ def _generate_sbom_post_build_hook(self): ) = self._get_build_subject_metadata(spec_file, src_rpm_files, build_dir) if not build_subject_name or not build_subject_version or not build_subject_release: - self.buildroot.root_log.debug("Cannot generate SBOM - build metadata incomplete") + self.buildroot.root_log.debug("[SBOM] Cannot generate SBOM - build metadata incomplete") return # Gather common data - distro_id = self.detect_chroot_distribution() or "fedora" - build_toolchain_packages = self.get_build_toolchain_packages() + distro_id = self.rpm_helper.detect_chroot_distribution() or "unknown" + build_toolchain_packages = self.rpm_helper.get_build_toolchain_packages() # Dispatch based on type if self.sbom_type == "spdx": @@ -517,10 +469,14 @@ def _generate_sbom_post_build_hook(self): ) out_file = os.path.join(self.buildroot.resultdir, sbom_filename) - doc = self._generate_spdx_document( + # Collect hardening flags + hardening_props = self._collect_build_hardening_properties() + + doc = self.spdx_gen.generate_spdx_document( build_subject_name, build_subject_version, build_subject_release, build_dir, rpm_files, source_files, - build_toolchain_packages, distro_id, spec_metadata=spec_metadata + build_toolchain_packages, distro_id, + spec_metadata=spec_metadata, hardening_props=hardening_props ) with open(out_file, "w", encoding="utf-8") as f: @@ -536,10 +492,10 @@ def _generate_sbom_post_build_hook(self): out_file = os.path.join(self.buildroot.resultdir, sbom_filename) # Create CycloneDX document - bom = self._create_cyclonedx_document() + bom = self.cdx_gen.create_cyclonedx_document() # Add source and toolchain components - source_components, source_component_entries = self._add_source_components(bom, source_files) + source_components, source_component_entries = self.cdx_gen.add_source_components(bom, source_files) toolchain_components, toolchain_bom_refs = self._add_toolchain_components( bom, build_toolchain_packages, distro_id ) @@ -547,17 +503,17 @@ def _generate_sbom_post_build_hook(self): # Process binary RPMs and convert to components ( built_package_bom_refs, primary_rpm_metadata, all_built_components - ) = self._process_built_packages( + ) = self.cdx_gen.process_built_packages( bom, rpm_files + src_rpm_files, build_dir, distro_id, source_component_entries, build_subject_name, build_toolchain_packages, toolchain_bom_refs ) # Add RPM-specific metadata and finalize dependencies - self._finalize_bom_metadata(bom, primary_rpm_metadata, built_package_bom_refs, + self.cdx_gen.finalize_bom_metadata(bom, primary_rpm_metadata, built_package_bom_refs, build_subject_name, build_subject_version, build_subject_release, distro_id, spec_metadata=spec_metadata) - self._finalize_dependencies(bom, source_component_entries, + self.cdx_gen.finalize_dependencies(bom, source_component_entries, build_toolchain_packages, distro_id, built_package_bom_refs, toolchain_bom_refs, spec_metadata=spec_metadata, @@ -573,407 +529,18 @@ def _generate_sbom_post_build_hook(self): # pylint: disable=broad-exception-caught except Exception as e: - self.buildroot.root_log.debug(f"An error occurred during SBOM generation: {e}") + self.buildroot.root_log.debug(f"[SBOM] FAILED: An error occurred during SBOM generation: {e}") traceback.print_exc() finally: self.sbom_done = True self.state.finish(state_text) # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-positional-arguments - def _process_built_packages(self, bom, rpm_files, build_dir, distro_id, - source_component_entries, build_subject_name, - build_toolchain_packages, toolchain_bom_refs): - """Processes binary RPMs and creates structured CycloneDX components and dependencies.""" - built_package_bom_refs = [] - all_built_components = [] - component_map = {} - primary_rpm_metadata = None - - # Build component map from toolchain packages - for toolchain_pkg in build_toolchain_packages: - pkg_name = toolchain_pkg.get("name") - pkg_version = toolchain_pkg.get("version") - if pkg_name and pkg_version: - purl = self._generate_purl(pkg_name, pkg_version, distro_id) - component_map[pkg_name.lower()] = purl - - for rpm_file in rpm_files: - rpm_path = os.path.join(build_dir, rpm_file) - component = self._create_built_package_component( - rpm_path, distro_id, source_component_entries - ) - if not component: - continue - - bom_ref = component.get("bom-ref") - package_name = component.get("name") - package_version = component.get("version") - - if bom_ref: - built_package_bom_refs.append(bom_ref) - if package_name: - component_map[package_name.lower()] = bom_ref - - bom["components"].append(component) - - # Determine primary RPM metadata - if not primary_rpm_metadata: - if not package_name or 'debuginfo' not in package_name.lower(): - primary_rpm_metadata = self.get_rpm_metadata(rpm_path) - else: - current_name = primary_rpm_metadata.get('name', '').lower() - is_current_debuginfo = 'debuginfo' in current_name - should_replace = False - if (is_current_debuginfo and package_name and - 'debuginfo' not in package_name.lower()): - should_replace = True - elif (build_subject_name and package_name and - package_name.lower() == build_subject_name.lower()): - should_replace = True - - if should_replace: - primary_rpm_metadata = self.get_rpm_metadata(rpm_path) - - # File components - if package_name and package_version and self.include_file_components: - # Extract CPE and GPG info from the component to pass to files - rpm_cpe = None - for ext_ref in component.get("externalReferences", []): - if ext_ref.get("comment") == "CPE 2.3": - rpm_cpe = ext_ref.get("url") - - rpm_gpg = None - for prop in component.get("properties", []): - if prop.get("name") == "mock:signature:key": - rpm_gpg = prop.get("value") - - file_components = self._create_file_components( - rpm_path, package_name, package_version, - rpm_cpe=rpm_cpe, rpm_gpg=rpm_gpg - ) - - if file_components: - if "components" not in component: - component["components"] = [] - - for file_comp in file_components: - # Set scope to required for all files in the produced RPM - file_comp["scope"] = "required" - component["components"].append(file_comp) - - if self._should_include_file_dependency(file_comp.get("name", "")): - bom["dependencies"].append({ - "ref": file_comp["bom-ref"], - "dependsOn": [bom_ref] - }) - - # Sort file components alphabetically - component["components"].sort(key=lambda x: x.get("name", "")) - - # Dependencies - dependencies = self.get_rpm_dependencies(rpm_path) - runtime_dependency = self._create_dependency( - bom_ref, dependencies, component_map, distro_id - ) - - all_depends_on = [] - if runtime_dependency and runtime_dependency.get("dependsOn"): - all_depends_on.extend(runtime_dependency.get("dependsOn")) - - if self.include_toolchain_dependencies and toolchain_bom_refs: - for t_ref in toolchain_bom_refs: - if t_ref not in all_depends_on: - all_depends_on.append(t_ref) - - all_depends_on = sorted(list(set(all_depends_on))) - if all_depends_on: - bom["dependencies"].append({"ref": bom_ref, "dependsOn": all_depends_on}) - elif runtime_dependency: - bom["dependencies"].append(runtime_dependency) - - all_built_components.append(component) - - return built_package_bom_refs, primary_rpm_metadata, all_built_components - - # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements,too-many-positional-arguments - def _finalize_bom_metadata(self, bom, primary_rpm_metadata, built_package_bom_refs, - build_subject_name, build_subject_version, - build_subject_release, distro_id, spec_metadata=None): - """Finalizes BOM metadata, sets the primary component, and adds RPM properties.""" - # Add BuildRequires and Requires from spec if available - if spec_metadata: - metadata_props = [] - build_reqs = spec_metadata.get("build_requires", []) - if build_reqs: - metadata_props.append({ - "name": "mock:spec:build_requires", - "value": ",".join(build_reqs) - }) - - reqs = spec_metadata.get("requires", []) - if reqs: - metadata_props.append({ - "name": "mock:spec:requires", - "value": ",".join(reqs) - }) - - if metadata_props: - bom["metadata"]["properties"] = bom["metadata"].get("properties", []) - bom["metadata"]["properties"].extend(metadata_props) - - if primary_rpm_metadata: - rpm_props = bom["metadata"]["properties"] - for key, prop_name in [("buildhost", "mock:rpm:buildhost"), - ("buildtime", "mock:rpm:buildtime"), - ("group", "mock:rpm:group"), - ("epoch", "mock:rpm:epoch"), - ("distribution", "mock:rpm:distribution")]: - val = primary_rpm_metadata.get(key) - if val and val != "(none)" and (key != "epoch" or val.strip()): - rpm_props.append({"name": prop_name, "value": val}) - - vendor = primary_rpm_metadata.get("vendor") - if vendor and vendor != "(none)": - bom["metadata"]["manufacturer"] = {"name": vendor} - bom["metadata"]["authors"] = [{"name": vendor}] - - packager = primary_rpm_metadata.get("packager") - if packager and packager != "(none)": - bom["metadata"]["supplier"] = {"name": packager} - - if built_package_bom_refs: - if len(built_package_bom_refs) == 1: - primary_ref = built_package_bom_refs[0] - primary_component = next((c for c in bom["components"] - if c.get("bom-ref") == primary_ref), None) - if primary_component: - component_obj = { - "type": primary_component.get("type", "application"), - "name": primary_component.get("name"), - "version": primary_component.get("version"), - "bom-ref": primary_ref, - "purl": primary_component.get("purl") - } - if primary_component.get("description"): - component_obj["description"] = primary_component.get("description") - elif primary_rpm_metadata: - summary = primary_rpm_metadata.get("summary") - if summary and summary != "(none)": - component_obj["description"] = summary - - external_refs = [] - if primary_rpm_metadata: - sourcerpm = primary_rpm_metadata.get("sourcerpm") - if sourcerpm and sourcerpm != "(none)": - external_refs.append({"type": "distribution", "url": sourcerpm}) - url = primary_rpm_metadata.get("url") - if url and url != "(none)": - external_refs.append({"type": "website", "url": url}) - if external_refs: - component_obj["externalReferences"] = external_refs - - if primary_component.get("licenses"): - component_obj["licenses"] = primary_component.get("licenses") - elif primary_rpm_metadata: - lic = primary_rpm_metadata.get("license") - if lic and lic != "(none)": - component_obj["licenses"] = [{"expression": lic}] - bom["metadata"]["component"] = component_obj - else: - first_pkg = next((c for c in bom["components"] - if c.get("bom-ref") == built_package_bom_refs[0]), None) - if first_pkg: - aggregate_name = build_subject_name or first_pkg.get("name", "unknown") - aggregate_version = None - if build_subject_version and build_subject_release: - aggregate_version = f"{build_subject_version}-{build_subject_release}" - elif primary_rpm_metadata: - v = primary_rpm_metadata.get("version") - r = primary_rpm_metadata.get("release") - if v and r: - aggregate_version = f"{v}-{r}" - if not aggregate_version: - aggregate_version = first_pkg.get("version", "unknown") - - description = ( - f"Build output containing {len(built_package_bom_refs)} package(s)" - ) - if primary_rpm_metadata: - summary = primary_rpm_metadata.get("summary") - if summary and summary != "(none)": - description = f"{summary} ({description})" - - component_obj = { - "type": "application", - "name": aggregate_name, - "version": aggregate_version, - "bom-ref": f"build-output:{aggregate_name}", - "description": description - } - if primary_rpm_metadata: - lic = primary_rpm_metadata.get("license") - if lic and lic != "(none)": - component_obj["licenses"] = [{"expression": lic}] - elif spec_metadata and spec_metadata.get("license"): - component_obj["licenses"] = [{"expression": spec_metadata["license"]}] - - if aggregate_name and aggregate_version: - component_obj["purl"] = self._generate_purl( - aggregate_name, aggregate_version, distro_id - ) - bom["metadata"]["component"] = component_obj - - # pylint: disable=too-many-locals,too-many-branches,too-many-statements - def _finalize_dependencies(self, bom, source_component_entries, - build_toolchain_packages, distro_id, - built_package_bom_refs, toolchain_bom_refs, - spec_metadata=None, - source_components=None, - toolchain_components=None, - all_built_components=None): - """Finalizes BOM dependencies, linking primary package to hierarchical grouping components - and implementing nested component composition.""" - # Find primary component ref (metadata.component or first built package) - primary_ref = None - if bom.get("metadata") and bom["metadata"].get("component"): - primary_ref = bom["metadata"]["component"].get("bom-ref") - - if not primary_ref: - return - - # Create virtual grouping references - inputs_ref = "build:inputs" - toolchain_ref = "build:toolchain" - outputs_ref = "build:outputs" - - # Prepare grouping components - inputs_group = { - "type": "application", - "bom-ref": inputs_ref, - "name": "Build Inputs", - "description": "Source code and patches used for the build", - "properties": [{"name": "mock:type", "value": "grouping-node"}] - } - if source_components: - inputs_group["components"] = sorted(source_components, key=lambda x: x.get("name", "")) - - toolchain_group = { - "type": "application", - "bom-ref": toolchain_ref, - "name": "Build Toolchain", - "description": "Packages and tools used to perform the build", - "scope": "excluded", # Tools are not part of the runtime payload - "properties": [{"name": "mock:type", "value": "grouping-node"}] - } - if toolchain_components: - # Group toolchain components by their GPG Key ID - signer_groups = {} - pkg_map = {p.get("name"): p for p in build_toolchain_packages} - - for comp in toolchain_components: - comp["scope"] = "excluded" - pkg_info = pkg_map.get(comp.get("name")) - sig_info = pkg_info.get("digital_signature", {}) if pkg_info else {} - key_id = sig_info.get("signature_key", "unsigned") - - # Attach signature properties to the individual package component - if sig_info: - sig_props = self._signature_info_to_properties(sig_info) - comp["properties"] = comp.get("properties", []) - comp["properties"].extend([p for p in sig_props if p["name"] != "mock:signature:raw"]) - - if key_id not in signer_groups: - # Create group properties - common only to the signer - group_props = [ - {"name": "mock:role", "value": "build-toolchain"}, - {"name": "mock:type", "value": "signer-group"}, - {"name": "mock:signature:key", "value": key_id} - ] - - signer_groups[key_id] = { - "type": "application", - "bom-ref": f"signer:{key_id}", - "name": f"Packages signed by {key_id}" if key_id != "unsigned" else "Unsigned Packages", - "scope": "excluded", - "properties": group_props, - "components": [] - } - signer_groups[key_id]["components"].append(comp) - - # Add signer groups as children of toolchain_group - sorted_groups = sorted( - list(signer_groups.values()), - key=lambda x: x.get("name", "") - ) - for group in sorted_groups: - group["components"].sort(key=lambda x: x.get("name", "")) - - toolchain_group["components"] = sorted_groups - - outputs_group = { - "type": "application", - "bom-ref": outputs_ref, - "name": "RPM Contents", - "description": "RPM packages and their contained files produced by the build", - "scope": "required", - "properties": [{"name": "mock:type", "value": "grouping-node"}] - } - if all_built_components: - outputs_group["components"] = sorted(all_built_components, key=lambda x: x.get("name", "")) - - # Nest groups into the primary component - primary_comp = bom["metadata"]["component"] - primary_comp["components"] = [inputs_group, toolchain_group, outputs_group] - # Sort metadata components alphabetically - primary_comp["components"].sort(key=lambda x: x.get("name", "")) - - # 1. Primary component depends on the three groups - bom["dependencies"].append({ - "ref": primary_ref, - "dependsOn": sorted([inputs_ref, toolchain_ref, outputs_ref]) - }) - - # 2. Build Inputs Group -> Source components - input_deps = [] - for entry in source_component_entries: - if entry.get("bom-ref"): - input_deps.append(entry["bom-ref"]) - - if input_deps: - bom["dependencies"].append({ - "ref": inputs_ref, - "dependsOn": sorted(list(set(input_deps))) - }) - - # 3. Build Toolchain Group -> Signer Groups - signer_refs = [g["bom-ref"] for g in toolchain_group.get("components", [])] - if signer_refs: - bom["dependencies"].append({ - "ref": toolchain_ref, - "dependsOn": sorted(signer_refs) - }) - - # 3b. Signer Groups -> Individual packages - for group in toolchain_group["components"]: - pkg_refs = [c["bom-ref"] for c in group["components"]] - bom["dependencies"].append({ - "ref": group["bom-ref"], - "dependsOn": sorted(pkg_refs) - }) - - # 4. RPM Contents Group -> Built RPMs (Packages) - if built_package_bom_refs: - bom["dependencies"].append({ - "ref": outputs_ref, - "dependsOn": sorted(list(set(built_package_bom_refs))) - }) - - def _create_built_package_component( self, rpm_path, distro_obj, _source_components=None ): """Creates a CycloneDX component for a built RPM package.""" - package_data = self.get_rpm_metadata(rpm_path) + package_data = self.rpm_helper.get_rpm_metadata(rpm_path) if not package_data: return None @@ -986,7 +553,7 @@ def _create_built_package_component( full_version = f"{version}-{release}" if release else version # Generate PURL and bom-ref - purl = self._generate_purl(package_name, full_version, distro_obj, arch) + purl = self.rpm_helper.generate_purl(package_name, full_version, distro_obj, arch) bom_ref = purl # Determine component type (application vs library) @@ -1003,7 +570,7 @@ def _create_built_package_component( # Add external references (CPE) vendor = package_data.get("vendor") - cpe = self.generate_cpe(package_name, version, vendor=vendor) + cpe = self.rpm_helper.generate_cpe(package_name, version, vendor=vendor) if cpe: component["externalReferences"] = [ { @@ -1019,7 +586,7 @@ def _create_built_package_component( # or if needed for PURL integrity, but we'll prioritize the "only" constraint. # rpm_hash = package_data.get("sha256") # if not rpm_hash or rpm_hash == "(none)": - # rpm_hash = self.hash_file(rpm_path) + # rpm_hash = self.rpm_helper.hash_file(rpm_path) # if rpm_hash: # component["hashes"] = [ @@ -1074,7 +641,7 @@ def _create_built_package_component( "value": buildhost }) - buildtime_iso = self._format_epoch_timestamp(package_data.get("buildtime")) + buildtime_iso = self.cdx_gen.format_epoch_timestamp(package_data.get("buildtime")) if buildtime_iso: properties.append({ "name": "mock:rpm:buildtime", @@ -1115,10 +682,10 @@ def _create_built_package_component( component["description"] = summary # Add GPG signature information if available - signature = self.get_rpm_signature(rpm_path) + signature = self.rpm_helper.get_rpm_signature(rpm_path) if signature: # Parse signature info - sig_props = self._parse_signature_to_properties(signature) + sig_props = self.cdx_gen.parse_signature_to_properties(signature) properties.extend(sig_props) # Note: Source/patch file relationships are represented in component properties @@ -1140,664 +707,6 @@ def _create_built_package_component( return component - def _create_toolchain_component(self, toolchain_pkg, distro_obj): - """Creates a CycloneDX component for a build toolchain package.""" - package_name = toolchain_pkg.get("name") - version = toolchain_pkg.get("version") - - if not package_name or not version: - return None - - # Generate PURL and bom-ref - purl = self._generate_purl(package_name, version, distro_obj, arch=toolchain_pkg.get("arch")) - bom_ref = purl - - component = { - "type": "library", - "bom-ref": bom_ref, - "name": package_name, - "version": version, - "purl": purl - } - - # Add checksum - REMOVED per user request to only have hashes for files contained in RPM - # (This follows the rule that only the 'RPM Contents' section should have hashes) - # checksum = toolchain_pkg.get("checksum") - # if checksum and checksum != "error" and not checksum.startswith("error"): - # if len(checksum) == 64: - # alg = "SHA-256" - # elif len(checksum) == 40: - # alg = "SHA-1" - # else: - # alg = "SHA-256" - # component["hashes"] = [{"alg": alg, "content": checksum}] - - # Add CPE - cpe = toolchain_pkg.get("cpe") - if cpe: - component["externalReferences"] = [ - { - "type": "other", - "comment": "CPE 2.3", - "url": cpe - } - ] - - # Add license - license_str = toolchain_pkg.get("licenseDeclared") - if license_str and license_str != "(none)": - component["licenses"] = [ - { - "expression": license_str - } - ] - - # Add properties - properties = [] - - # Add build date if available - signature_info = toolchain_pkg.get("digital_signature", {}) - build_date = signature_info.get("build_date") - if build_date: - properties.append({ - "name": "mock:build:date", - "value": build_date - }) - - if properties: - component["properties"] = properties - - return component - - def _create_source_file_component(self, source_file): - """Creates a CycloneDX component for a source file.""" - filename = source_file.get("filename") - if not filename: - return None - - # Generate bom-ref from filename and hash - sha256 = source_file.get("sha256") - if sha256: - bom_ref = f"file:{filename}#{sha256[:16]}" - else: - bom_ref = f"file:{filename}" - - component = { - "type": "file", - "bom-ref": bom_ref, - "name": filename - } - - # Add hash - sha256 = source_file.get("sha256") - if sha256: - component["hashes"] = [ - { - "alg": "SHA-256", - "content": sha256 - } - ] - - # Add properties - properties = [] - - source_type = source_file.get("source_type") - if not source_type: - source_type = "patch" if self._is_patch_file(filename) else "source" - - properties.append({ - "name": "mock:source:type", - "value": source_type - }) - - # Add signature information if available - signature = source_file.get("digital_signature") - if signature: - if source_type == "source_rpm" and not signature.startswith("GPG signature file exists") and not signature.startswith("File is a signature file"): - sig_props = self._parse_signature_to_properties(signature) - properties.extend(sig_props) - else: - properties.append({ - "name": "mock:signature:info", - "value": signature - }) - - if properties: - component["properties"] = properties - - return component - - def _is_patch_file(self, filename): - """Returns True if the filename looks like a patch file.""" - if not filename: - return False - lower_name = filename.lower() - return ( - lower_name.startswith("patch") or - lower_name.endswith(".patch") or - lower_name.endswith(".diff") - ) - - def _format_epoch_timestamp(self, epoch_value): - """Convert epoch timestamp string to ISO8601 if possible.""" - if not epoch_value or epoch_value in ("(none)", "None"): - return None - try: - epoch_int = int(epoch_value) - if epoch_int <= 0: - return None - return datetime.fromtimestamp(epoch_int, tz=timezone.utc).isoformat() - except (ValueError, TypeError, OSError, OverflowError): - return epoch_value - - def _append_source_properties(self, properties, source_entries): - """Append source and patch references to component properties.""" - if not source_entries: - return - source_names = set() - patch_names = set() - source_refs = set() - patch_refs = set() - for entry in source_entries: - filename = entry.get("filename") - bom_ref = entry.get("bom_ref") - entry_type = entry.get("type", "source") - if entry_type == "patch": - if filename: - patch_names.add(filename) - if bom_ref: - patch_refs.add(bom_ref) - else: - if filename: - source_names.add(filename) - if bom_ref: - source_refs.add(bom_ref) - if source_names: - properties.append({ - "name": "mock:source:files", - "value": ",".join(sorted(source_names)) - }) - if source_refs: - properties.append({ - "name": "mock:source:refs", - "value": ",".join(sorted(source_refs)) - }) - if patch_names: - properties.append({ - "name": "mock:patch:files", - "value": ",".join(sorted(patch_names)) - }) - if patch_refs: - properties.append({ - "name": "mock:patch:refs", - "value": ",".join(sorted(patch_refs)) - }) - - def _generate_file_bom_ref(self, package_name, package_version, file_path): - """Generates a bom-ref for a file component within a package. - - Format: file:package-name@version:/absolute/path/to/file - """ - # Normalize file path (ensure it starts with /) - if not file_path.startswith('/'): - file_path = '/' + file_path - - return f"file:{package_name}@{package_version}:{file_path}" - - def _should_include_file_dependency(self, file_path): - """Determine if a file should have a dependency entry.""" - if not self.include_file_dependencies: - return False - - # Filter out debug files if configured - if not self.include_debug_files: - if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): - return False - - # Filter out man pages if configured - if not self.include_man_pages: - if ( - '/usr/share/man/' in file_path or - (file_path.endswith('.gz') and '/man' in file_path) - ): - return False - - return True - - def _create_file_components(self, rpm_path, package_name, package_version, - rpm_cpe=None, rpm_gpg=None): - """Creates file components for all files in an RPM package.""" - if not self.include_file_components: - return [] - - file_info = self.get_rpm_file_info(rpm_path) - file_list = sorted(file_info.keys()) - - file_components = [] - for file_path in file_list: - if not file_path or not file_path.strip(): - continue - - # Filter files based on configuration - if not self.include_debug_files: - if '/usr/lib/debug/' in file_path or file_path.endswith('.debug'): - continue - - file_data = file_info.get(file_path, {}) - file_hash = file_data.get("hash") - algo_id = file_data.get("algo") - - bom_ref = self._generate_file_bom_ref(package_name, package_version, file_path) - component = { - "type": "file", - "bom-ref": bom_ref, - "name": file_path - } - - # Add hash if available with detected algorithm - if file_hash: - # Map RPM algo ID to CycloneDX algo name - # 8: SHA-256, 10: SHA-512, 1: MD5, 2: SHA-1 - algo_map = { - 8: "SHA-256", - 10: "SHA-512", - 1: "MD5", - 2: "SHA-1", - 9: "SHA-384", - 11: "SHA-224" - } - alg_name = algo_map.get(algo_id, "SHA-256") - - component["hashes"] = [ - { - "alg": alg_name, - "content": file_hash - } - ] - - # Add properties for file metadata - properties = [] - if file_data.get("permissions"): - properties.append({ - "name": "mock:file:permissions", - "value": file_data["permissions"] - }) - if file_data.get("owner"): - properties.append({ - "name": "mock:file:owner", - "value": file_data["owner"] - }) - if file_data.get("group"): - properties.append({ - "name": "mock:file:group", - "value": file_data["group"] - }) - - if rpm_cpe: - properties.append({ - "name": "mock:package:cpe", - "value": rpm_cpe - }) - if rpm_gpg: - properties.append({ - "name": "mock:package:gpg:key", - "value": rpm_gpg - }) - - if properties: - component["properties"] = properties - - file_components.append(component) - - return file_components - - def _get_source_file_bom_refs(self, _package_name, source_files): - """Gets bom-refs for source files that were used to build a package. - - Returns list of bom-refs for source tar.gz and patch files. - """ - source_bom_refs = [] - - for source_file in source_files: - filename = source_file.get("filename", "") - if not filename: - continue - - # Include source tar.gz files - if ( - filename.endswith('.tar.gz') or - filename.endswith('.tar.bz2') or - filename.endswith('.tar.xz') - ): - sha256 = source_file.get("sha256") - if sha256: - bom_ref = f"file:{filename}#{sha256[:16]}" - else: - bom_ref = f"file:{filename}" - source_bom_refs.append(bom_ref) - - # Include patch files (matching pattern like 00xx*.patch) - elif filename.endswith('.patch') or '.patch' in filename.lower(): - sha256 = source_file.get("sha256") - if sha256: - bom_ref = f"file:{filename}#{sha256[:16]}" - else: - bom_ref = f"file:{filename}" - source_bom_refs.append(bom_ref) - - return source_bom_refs - - def _create_dependency(self, bom_ref, dependencies, component_map, distro_obj): - """Creates a CycloneDX dependency entry.""" - if not bom_ref: - return None - - # Convert dependency strings to bom-refs - depends_on = [] - for dep in dependencies: - # Parse RPM dependency format (e.g., "libc.so.6()(64bit)", "package >= 1.0") - dep_bom_ref = self._dependency_to_bom_ref(dep, component_map, distro_obj) - if dep_bom_ref: - depends_on.append(dep_bom_ref) - - # Deduplicate dependsOn array - depends_on = list(set(depends_on)) - - if not depends_on: - return None - - return { - "ref": bom_ref, - "dependsOn": depends_on - } - - def _dependency_to_bom_ref(self, dependency_string, component_map, _distro): - """Converts an RPM dependency string to a bom-ref (PURL).""" - if not dependency_string: - return None - - # RPM dependencies can be complex: "package >= version", "libc.so.6()(64bit)", etc. - # Try to extract package name - dep = dependency_string.split()[0] if dependency_string else "" - - # Remove version constraints (>=, <=, =, etc.) - dep = re.sub(r'\s*[><=!]+\s*.*$', '', dep) - - # Remove parentheses content (e.g., "libc.so.6()(64bit)" -> "libc.so.6") - dep = re.sub(r'\(.*?\)', '', dep) - dep = dep.strip() - - if not dep or dep.startswith('/'): - return None - - # Try to match against known components (case-insensitive) - dep_lower = dep.lower() - if dep_lower in component_map: - return component_map[dep_lower] - - # If no match found, try to find by package name pattern - # Some dependencies are library names, try to find matching package - for pkg_name, bom_ref in component_map.items(): - # Check if dependency might match this package - # (e.g., "libc.so.6" might come from "glibc" package) - if dep_lower in pkg_name or pkg_name in dep_lower: - return bom_ref - - # If still no match, return None (don't create invalid references) - return None - - def _parse_signature_to_properties(self, signature_string): - """Parses RPM signature string into CycloneDX properties.""" - properties = [] - if not signature_string or signature_string == "(none)": - return properties - - # Parse signature like: - # "RSA/SHA256, Fri 08 Nov 2024 03:56:24 AM EST, Key ID c8ac4916105ef944" - properties.append({ - "name": "mock:signature:type", - "value": "GPG" - }) - - if "RSA/SHA256" in signature_string: - properties.append({ - "name": "mock:signature:algorithm", - "value": "RSA/SHA256" - }) - elif "DSA/SHA1" in signature_string: - properties.append({ - "name": "mock:signature:algorithm", - "value": "DSA/SHA1" - }) - elif "ECDSA/SHA256" in signature_string: - properties.append({ - "name": "mock:signature:algorithm", - "value": "ECDSA/SHA256" - }) - elif "Ed25519/SHA256" in signature_string: - properties.append({ - "name": "mock:signature:algorithm", - "value": "Ed25519/SHA256" - }) - - # Extract key ID - key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', signature_string) - if key_id_match: - properties.append({ - "name": "mock:signature:key", - "value": key_id_match.group(1) - }) - - # Extract date - # Extract date - date_match = re.search( - r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', signature_string - ) - if date_match: - properties.append({ - "name": "mock:signature:date", - "value": date_match.group(1) - }) - - properties.append({ - "name": "mock:signature:raw", - "value": signature_string - }) - - return properties - - def _signature_info_to_properties(self, signature_info): - """Converts signature info dict to CycloneDX properties.""" - properties = [] - - sig_type = signature_info.get("signature_type", "unsigned") - properties.append({ - "name": "mock:signature:type", - "value": sig_type - }) - - if ( - sig_type not in ('unsigned', 'unknown') and - 'missing key' not in sig_type and - 'BAD' not in sig_type - ): - algorithm = signature_info.get("signature_algorithm") - if algorithm: - properties.append({ - "name": "mock:signature:algorithm", - "value": algorithm - }) - - key_id = signature_info.get("signature_key") - if key_id: - properties.append({ - "name": "mock:signature:key", - "value": key_id - }) - - sig_date = signature_info.get("signature_date") - if sig_date: - properties.append({ - "name": "mock:signature:date", - "value": sig_date - }) - - sig_valid = signature_info.get("signature_valid", False) - properties.append({ - "name": "mock:signature:valid", - "value": str(sig_valid).lower() - }) - - raw_data = signature_info.get("raw_signature_data") - if raw_data: - properties.append({ - "name": "mock:signature:raw", - "value": raw_data - }) - - return properties - - def parse_spec_file(self, spec_path): - """Parses a spec file to extract metadata and source/patch files using the specfile library.""" - self.buildroot.root_log.debug("Parsing spec file using specfile library") - - sources = [] - metadata = { - "name": "", - "version": "", - "release": "", - "license": "", - "build_requires": [], - "requires": [] - } - - if not os.path.isfile(spec_path): - self.buildroot.root_log.debug(f"Spec file not found: {spec_path}") - return metadata, sources - try: - chroot_spec_path = self.buildroot.from_chroot_path(spec_path) or spec_path - # Use rpmspec --parse inside the build chroot to ensure macro expansion - # matches the build environment exactly. - cmd = ["rpmspec", "--parse", chroot_spec_path] - result, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - - if not result: - # If doChroot returned empty, try reading local spec as fallback - try: - with open(spec_path, 'r', encoding='utf-8') as f: - result = f.read() - except Exception: - return metadata, sources - - try: - from specfile import Specfile - # Use specfile to parse the expanded content - spec = Specfile(content=result, sourcedir=os.path.dirname(spec_path)) - - - # Extract canonical metadata - metadata.update({ - "name": spec.expanded_name, - "version": spec.expanded_version, - "release": spec.expanded_release, - "license": spec.expanded_license, - }) - - # Extract BuildRequires and Requires from headers - try: - br = spec.rpm_spec.sourceHeader[rpm.RPMTAG_REQUIRENAME] - metadata["build_requires"] = [ - r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) - for r in br - ] if br else [] - except (AttributeError, KeyError): - metadata["build_requires"] = [] - - try: - r = spec.rpm_spec.packages[0].header[rpm.RPMTAG_REQUIRENAME] - metadata["requires"] = [ - req.decode('utf-8', 'replace') if isinstance(req, bytes) else str(req) - for req in r - ] if r else [] - except (AttributeError, KeyError, IndexError): - metadata["requires"] = [] - - # Extract both sources and patches from the spec object model - all_locs = [] - with spec.sources() as spec_sources: - all_locs.extend(s.location for s in spec_sources if s.location) - with spec.patches() as spec_patches: - all_locs.extend(p.location for p in spec_patches if p.location) - - for loc in all_locs: - filename, _, hash_value = loc.partition('#') - actual_filename = os.path.basename(filename) - build_dir = os.path.dirname(spec_path) - sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") - file_path = os.path.join(sources_dir, actual_filename) - - actual_hash = None - if os.path.isfile(file_path): - actual_hash = self.hash_file(file_path) - elif hash_value: - actual_hash = hash_value - - signature = ( - self.get_file_signature(file_path) if os.path.isfile(file_path) else None - ) - - sources.append({ - "filename": actual_filename, - "sha256": actual_hash, - "digital_signature": signature - }) - self.buildroot.root_log.debug(f"Extracted metadata {metadata} and {len(sources)} source/patch files from spec") - - # Double check we actually got metadata - if not metadata.get("name"): - raise ValueError("Empty metadata from Specfile") - - except Exception as e: - self.buildroot.root_log.debug(f"Specfile parsing failed, falling back to regex: {e}") - - # Ensure result is a string for regex - content = str(result) if result else "" - - # Fallback to simple regex parsing of the expanded result - name_match = (re.search(r'^Name:\s+(.+)$', content, re.MULTILINE) or - re.search(r'^name\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) - version_match = (re.search(r'^Version:\s+(.+)$', content, re.MULTILINE) or - re.search(r'^version\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) - release_match = (re.search(r'^Release:\s+(.+)$', content, re.MULTILINE) or - re.search(r'^release\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) - license_match = (re.search(r'^License:\s+(.+)$', content, re.MULTILINE) or - re.search(r'^license\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) - - metadata["name"] = name_match.group(1).strip() if name_match else "" - metadata["version"] = version_match.group(1).strip() if version_match else "" - metadata["release"] = release_match.group(1).strip() if release_match else "" - metadata["license"] = license_match.group(1).strip() if license_match else "" - - # Simple source/patch extraction from expanded spec - source_matches = re.finditer(r'^(Source|Patch)\d*:\s+(.+)$', content, re.MULTILINE) - for sm in source_matches: - loc = sm.group(2).strip() - filename = os.path.basename(loc.partition('#')[0]) - # Avoid duplicates - if not any(s['filename'] == filename for s in sources): - sources.append({ - "filename": filename, - "sha256": None, - "digital_signature": None - }) - except Exception as e: - self.buildroot.root_log.debug(f"Failed to parse spec file {spec_path}: {e}") - self.buildroot.root_log.debug(traceback.format_exc()) - - return metadata, sources - def get_file_signature(self, file_path): """Attempts to detect if a file has a digital signature.""" try: @@ -1820,836 +729,4 @@ def get_file_signature(self, file_path): self.buildroot.root_log.debug(f"Failed to check signature for {file_path}: {e}") return None - def get_iso_timestamp(self): - """Returns the current time in ISO 8601 format.""" - return datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') - - def get_distribution(self): - """Returns the distribution name and version from /etc/os-release.""" - try: - distro_name = None - version = None - if os.path.exists("/etc/os-release"): - with open("/etc/os-release", encoding="utf-8") as f: - for line in f: - if line.startswith("NAME="): - distro_name = line.strip().split("=", 1)[1].strip('"') - elif line.startswith("VERSION_ID="): - version = line.strip().split("=", 1)[1].strip('"') - if distro_name and version: - return f"{distro_name} {version}" - return distro_name or "Unknown" - except OSError as e: - return f"Unknown ({e})" - - - def generate_cpe(self, package_name, package_version, vendor=None): - """Generates a CPE identifier for a package.""" - # CPE format: cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:* - - # Default vendor if not provided - if not vendor or vendor == "(none)": - vendor = "fedora" - - # Clean up vendor name for CPE - vendor = re.sub(r'[^a-zA-Z0-9._-]', '_', vendor.lower()) - - # Clean up package name for CPE - product = re.sub(r'[^a-zA-Z0-9._-]', '_', package_name.lower()) - - # Clean up version for CPE (remove release part if present) - version = package_version - if '-' in version: - version = version.split('-')[0] # Remove release part - - # Generate CPE - cpe = f"cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:*" - return cpe - - def detect_chroot_distribution(self): - """Detects the distribution name inside the chroot using python-distro.""" - try: - # Query the chroot filesystem directly. Attempting root_dir first. - try: - # pylint: disable=unexpected-keyword-arg - distro_id = distro.id(root_dir=self.buildroot.rootdir) - except TypeError: - # Fallback for older python-distro versions (<1.6.0) - # We could use os-release file directly - os_release = os.path.join(self.buildroot.rootdir, "etc/os-release") - distro_id = "unknown" - if os.path.isfile(os_release): - with open(os_release, 'r') as f: - for line in f: - if line.startswith("ID="): - distro_id = line.split("=")[1].strip().strip('"').strip("'") - break - - if distro_id: - return distro_id.lower() - return "unknown" - # pylint: disable=broad-exception-caught - except Exception as e: - self.buildroot.root_log.debug(f"Failed to detect chroot distribution: {e}") - return "unknown" - - def get_build_toolchain_packages(self): - """Returns the list of packages installed in the build toolchain - with detailed signature information collected in a single batch query.""" - try: - # Get detailed package info including signature data in one batch query - # Tags: Name, EVR, License, BuildTime, Signature data (RSA, DSA, GPG, PGP) - fields = [ - "%{NAME}", - "%{VERSION}-%{RELEASE}", - "%{ARCH}", - "%{LICENSE}", - "%{BUILDTIME}", - "%{RSAHEADER:pgpsig}", - "%{DSAHEADER:pgpsig}", - "%{SIGGPG:pgpsig}", - "%{SIGPGP:pgpsig}", - "%{SHA256HEADER}", - "%{SOURCERPM}" - ] - query = "|".join(fields) + "\n" - cmd = ["rpm", "-qa", "--qf", query] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - - packages = [] - cpe_vendor_default = self.detect_chroot_distribution() or "unknown" - - for line in output.splitlines(): - parts = line.split("|") - if len(parts) < 6: - continue - - package_name = parts[0].strip() - package_version = parts[1].strip() - package_arch = parts[2].strip() - package_license = parts[3].strip() - build_time = parts[4].strip() - - # Signature data is in the middle parts (parts 5-8) - raw_sig = None - for sig_candidate in parts[5:9]: - sig_candidate = sig_candidate.strip() - if sig_candidate and sig_candidate != "(none)": - raw_sig = sig_candidate - break - - # Checksum is part 9, SOURCERPM is part 10 - package_checksum = parts[9].strip() if len(parts) > 9 else None - if package_checksum == "(none)": - package_checksum = None - - source_rpm = parts[10].strip() if len(parts) > 10 else None - if source_rpm == "(none)": - source_rpm = None - - # Skip GPG keys and other non-package entries - if ( - package_name.startswith('gpg-pubkey') or - package_name == '(none)' or - not package_name - ): - continue - - # Prepare signature info structure - digital_signature = { - "signature_type": "unsigned", - "signature_key": None, - "signature_date": None, - "signature_algorithm": None, - "signature_valid": False, - "raw_signature_data": raw_sig, - "build_date": None - } - - if raw_sig: - self._parse_signature_data(raw_sig, digital_signature) - - # Build date from metadata - if build_time and build_time.isdigit(): - try: - dt = datetime.fromtimestamp(int(build_time), tz=timezone.utc) - digital_signature["build_date"] = dt.isoformat() - except (ValueError, TypeError, OverflowError): - pass - - cpe = self.generate_cpe(package_name, package_version, vendor=cpe_vendor_default) - - packages.append({ - "name": package_name, - "version": package_version, - "arch": package_arch, - "licenseDeclared": package_license, - "digital_signature": digital_signature, - "sourcerpm": source_rpm, - "cpe": cpe, - "checksum": package_checksum - }) - - self.buildroot.root_log.debug(f"Found {len(packages)} build toolchain packages with integrated signature metadata") - return packages - # pylint: disable=broad-exception-caught - except Exception as e: - self.buildroot.root_log.debug(f"Failed to get build environment packages: {e}") - return [] - - def get_package_checksum_from_chroot(self, package_name): - """Gets the SHA-256 checksum of an installed package from inside the chroot.""" - try: - # Try different RPM header tags to get a checksum - # SHA256HEADER is the SHA256 checksum of the RPM header - cmd = ["rpm", "-q", package_name, "--qf", "%{SHA256HEADER}"] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - - if ( - output and output.strip() and - output.strip() != "(none)" and - not output.strip().startswith("error") - ): - return output.strip().lower() - - # Try SHA1HEADER as fallback (older RPMs) - cmd = ["rpm", "-q", package_name, "--qf", "%{SHA1HEADER}"] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - - if ( - output and output.strip() and - output.strip() != "(none)" and - not output.strip().startswith("error") - ): - # It's SHA-1, but it's better than nothing - self.buildroot.root_log.debug( - f"Warning: Using SHA-1 for {package_name}, SHA-256 not available" - ) - return output.strip().lower() - - # No header checksum available - self.buildroot.root_log.debug(f"Warning: No checksum available for {package_name}") - return None - - # pylint: disable=broad-exception-caught - except Exception as e: - self.buildroot.root_log.debug(f"Failed to get checksum for package {package_name}: {e}") - return None - - def _parse_signature_data(self, sig_data, signature_info): - """Parses the raw signature string and updates the signature_info dict.""" - if sig_data and sig_data != "(none)" and sig_data != "": - signature_info["signature_type"] = "GPG" - signature_info["signature_valid"] = True - - # Parse signature line like: - # "RSA/SHA256, Fri 08 Nov 2024 ... Key ID ..." - if "RSA/SHA256" in sig_data: - signature_info["signature_algorithm"] = "RSA/SHA256" - elif "DSA/SHA1" in sig_data: - signature_info["signature_algorithm"] = "DSA/SHA1" - elif "ECDSA/SHA256" in sig_data: - signature_info["signature_algorithm"] = "ECDSA/SHA256" - elif "Ed25519/SHA256" in sig_data: - signature_info["signature_algorithm"] = "Ed25519/SHA256" - - # Extract key ID - if "Key ID" in sig_data: - key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', sig_data) - if key_id_match: - signature_info["signature_key"] = key_id_match.group(1) - - # Extract date - handle various time formats including EST/EDT - date_match = re.search( - r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', - sig_data - ) - if date_match: - signature_info["signature_date"] = date_match.group(1) - else: - signature_info["signature_type"] = "unsigned" - signature_info["signature_valid"] = False - - def get_rpm_metadata(self, rpm_path): - """Extracts metadata from an RPM file. - Uses doChroot if the file is within the chroot to ensure compatibility.""" - if not os.path.isfile(rpm_path): - self.buildroot.root_log.debug(f"RPM file not found: {rpm_path}") - return {} - - # If the file is in the chroot, and NOT in the result directory, use doChroot - # Note: resultdir is on the host, doChroot can't see it easily. - chroot_path = self.buildroot.from_chroot_path(rpm_path) - if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): - return self._get_rpm_metadata_chroot(chroot_path) - - # Fallback to host-native or host-subprocess for non-chroot files - return self._get_rpm_metadata_native(rpm_path) - - def _get_rpm_metadata_chroot(self, chroot_rpm_path): - """Extracts metadata using rpm -qp inside the chroot.""" - fields = { - "name": "%{NAME}", "version": "%{VERSION}", "release": "%{RELEASE}", - "arch": "%{ARCH}", "epoch": "%{EPOCH}", "summary": "%{SUMMARY}", - "license": "%{LICENSE}", "vendor": "%{VENDOR}", "url": "%{URL}", - "packager": "%{PACKAGER}", "buildtime": "%{BUILDTIME}", - "buildhost": "%{BUILDHOST}", "sourcerpm": "%{SOURCERPM}", - "group": "%{GROUP}", "distribution": "%{DISTRIBUTION}", - "sha256": "%{SHA256HEADER}" - } - - metadata = {} - try: - query = "|".join(fields.values()) - cmd = ["rpm", "-qp", "--queryformat", query, chroot_rpm_path] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - - if output: - parts = output.split("|") - for i, field_name in enumerate(fields.keys()): - if i < len(parts): - val = parts[i].strip() - if field_name == "epoch" and (not val or val == "(none)"): - val = "0" - metadata[field_name] = val - return metadata - except Exception as e: - self.buildroot.root_log.debug(f"Failed to extract metadata via doChroot for {chroot_rpm_path}: {e}") - return {} - - def _get_rpm_metadata_native(self, rpm_path): - """Extracts metadata using native host bindings (fallback).""" - # pylint: disable=no-member - try: - ts = rpm.TransactionSet() - with open(rpm_path, "rb") as f: - hdr = ts.hdrFromFdno(f.fileno()) - - tag_map = { - "name": rpm.RPMTAG_NAME, "version": rpm.RPMTAG_VERSION, - "release": rpm.RPMTAG_RELEASE, "arch": rpm.RPMTAG_ARCH, - "epoch": rpm.RPMTAG_EPOCH, "summary": rpm.RPMTAG_SUMMARY, - "license": rpm.RPMTAG_LICENSE, "vendor": rpm.RPMTAG_VENDOR, - "url": rpm.RPMTAG_URL, "packager": rpm.RPMTAG_PACKAGER, - "buildtime": rpm.RPMTAG_BUILDTIME, "buildhost": rpm.RPMTAG_BUILDHOST, - "sourcerpm": rpm.RPMTAG_SOURCERPM, "group": rpm.RPMTAG_GROUP, - "distribution": rpm.RPMTAG_DISTRIBUTION, "sha256": rpm.RPMTAG_SHA256HEADER - } - - metadata = {} - for field_name, tag in tag_map.items(): - value = hdr[tag] - if field_name == "epoch" and value is None: - value = "0" - elif value is None: - value = "" - elif isinstance(value, bytes): - value = value.decode('utf-8', errors='replace') - metadata[field_name] = str(value) - return metadata - except Exception: - return self._get_rpm_metadata_subprocess(rpm_path) - - def _get_rpm_metadata_subprocess(self, rpm_path): - """Extracts metadata using host subprocess (last resort fallback).""" - metadata = {} - fields = { - "name": "%{NAME}", "version": "%{VERSION}", "release": "%{RELEASE}", - "arch": "%{ARCH}", "epoch": "%{EPOCH}", "summary": "%{SUMMARY}", - "license": "%{LICENSE}", "vendor": "%{VENDOR}", "url": "%{URL}", - "packager": "%{PACKAGER}", "buildtime": "%{BUILDTIME}", - "buildhost": "%{BUILDHOST}", "sourcerpm": "%{SOURCERPM}", - "group": "%{GROUP}", "distribution": "%{DISTRIBUTION}" - } - try: - for field_name, field_format in fields.items(): - cmd = ["rpm", "-qp", rpm_path, "--queryformat", field_format] - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - value = result.stdout.strip() - if field_name == "epoch" and not value: - value = "0" - metadata[field_name] = value - return metadata - except Exception as e: - self.buildroot.root_log.debug(f"Failed to extract RPM metadata via subprocess for {rpm_path}: {e}") - return {} - - - def get_rpm_file_info(self, rpm_path): - """Extracts file hashes, ownership, and permissions from an RPM file. - Uses doChroot for files within the chroot (and not in resultdir).""" - - chroot_path = self.buildroot.from_chroot_path(rpm_path) - if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): - return self._get_rpm_file_info_chroot(chroot_path) - - return self._get_rpm_file_info_native(rpm_path) - - def _get_rpm_file_info_chroot(self, chroot_rpm_path): - """Extracts file info using rpm -qp inside the chroot.""" - file_info = {} - try: - # Query format for files: path|hash|mode|user|group - qf = "[%{FILENAMES}|%{FILEDIGESTS}|%{FILEMODES:octal}|%{FILEUSERNAME}|%{FILEGROUPNAME}\\n]" - cmd = ["rpm", "-qp", "--queryformat", qf, chroot_rpm_path] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - - # Detect digest algorithm from header - cmd_algo = ["rpm", "-qp", "--queryformat", "%{FILEDIGESTALGO}", chroot_rpm_path] - algo_out, _ = self.buildroot.doChroot( - cmd_algo, shell=False, returnOutput=True, printOutput=False - ) - try: - algo = int(algo_out.strip()) if algo_out and algo_out.strip() else 8 - except ValueError: - algo = 8 - - for line in output.splitlines(): - parts = line.split("|") - if len(parts) >= 5: - filename = parts[0] - file_info[filename] = { - "hash": parts[1] if parts[1] and parts[1] != "(none)" else None, - "algo": algo, - "permissions": parts[2], - "owner": parts[3], - "group": parts[4] - } - return file_info - except Exception as e: - self.buildroot.root_log.debug(f"Failed to get file info via doChroot for {chroot_rpm_path}: {e}") - return {} - - def _get_rpm_file_info_native(self, rpm_path): - """Extracts file information using native host bindings (fallback).""" - # pylint: disable=no-member - file_info = {} - try: - ts = rpm.TransactionSet() - # pylint: disable=protected-access - ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES | rpm._RPMVSF_NODIGESTS) - with open(rpm_path, "rb") as f: - hdr = ts.hdrFromFdno(f.fileno()) - - basenames = hdr[rpm.RPMTAG_BASENAMES] - dirnames = hdr[rpm.RPMTAG_DIRNAMES] - dirindexes = hdr[rpm.RPMTAG_DIRINDEXES] - filedigests = hdr[rpm.RPMTAG_FILEDIGESTS] - filemodes = hdr[rpm.RPMTAG_FILEMODES] - fileusernames = hdr[rpm.RPMTAG_FILEUSERNAME] - filegroupnames = hdr[rpm.RPMTAG_FILEGROUPNAME] - - try: - algo = hdr[rpm.RPMTAG_FILEDIGESTALGO] - except (KeyError, IndexError): - algo = 8 - - file_info = {} - for i, basename in enumerate(basenames): - dirname = dirnames[dirindexes[i]] - if isinstance(dirname, bytes): - dirname = dirname.decode('utf-8', 'replace') - if isinstance(basename, bytes): - basename = basename.decode('utf-8', 'replace') - filename = os.path.join(dirname, basename) - - digest = filedigests[i] - if isinstance(digest, bytes): - digest = digest.decode('utf-8') - - file_info[filename] = { - "hash": digest if digest else None, - "algo": algo, - "permissions": f"0{filemodes[i]:o}", - "owner": fileusernames[i].decode('utf-8', 'replace') if isinstance(fileusernames[i], bytes) else fileusernames[i], - "group": filegroupnames[i].decode('utf-8', 'replace') if isinstance(filegroupnames[i], bytes) else filegroupnames[i] - } - return file_info - except Exception: - return {} - - def get_rpm_dependencies(self, rpm_path): - """Extracts the list of dependencies from an RPM file. - Uses doChroot for files within the chroot (and not in resultdir).""" - - chroot_path = self.buildroot.from_chroot_path(rpm_path) - if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): - return self._get_rpm_dependencies_chroot(chroot_path) - - return self._get_rpm_dependencies_native(rpm_path) - - def _get_rpm_dependencies_chroot(self, chroot_rpm_path): - """Extracts dependencies using rpm -qpR inside the chroot.""" - try: - cmd = ["rpm", "-qpR", chroot_rpm_path] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - return output.splitlines() if output else [] - except Exception: - return [] - - def _get_rpm_dependencies_native(self, rpm_path): - """Extracts dependencies using native host bindings (fallback).""" - # pylint: disable=no-member - try: - ts = rpm.TransactionSet() - with open(rpm_path, "rb") as f: - hdr = ts.hdrFromFdno(f.fileno()) - - requirements = hdr[rpm.RPMTAG_REQUIRENAME] - if not requirements: - return [] - - return [r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) for r in requirements] - except Exception: # pylint: disable=broad-exception-caught - try: - cmd = ["rpm", "-qpR", rpm_path] - result = subprocess.run( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True - ) - return result.stdout.splitlines() - except Exception: - return [] - - def get_rpm_signature(self, rpm_path): - """Extracts the GPG signature of an RPM file. - Uses doChroot for files within the chroot (and not in resultdir).""" - - chroot_path = self.buildroot.from_chroot_path(rpm_path) - if chroot_path and not rpm_path.startswith(self.buildroot.resultdir): - return self._get_rpm_signature_chroot(chroot_path) - - return self._get_rpm_signature_host(rpm_path) - - def _get_rpm_signature_chroot(self, chroot_rpm_path): - """Extracts signature using rpm inside the chroot.""" - try: - # Try to get it via queryformat first - cmd = ["rpm", "-qp", "--queryformat", "%{SIGPGP:pgpsig} %{SIGGPG:pgpsig}", chroot_rpm_path] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - sig = output.strip() if output else "" - if sig and sig != "(none) (none)" and sig != "(none)": - return sig.replace("(none)", "").strip() - - # Fallback to rpm -qip - cmd = ["rpm", "-qip", chroot_rpm_path] - output, _ = self.buildroot.doChroot( - cmd, shell=False, returnOutput=True, printOutput=False - ) - if output: - for line in output.splitlines(): - if "Signature" in line and ":" in line: - sig_val = line.split(":", 1)[1].strip() - if sig_val and sig_val != "(none)": - return sig_val - return None - except Exception: - return None - - def _get_rpm_signature_host(self, rpm_path): - """Extracts signature using host tools (fallback).""" - try: - cmd = ["rpm", "-qp", "--queryformat", "%{SIGPGP:pgpsig} %{SIGGPG:pgpsig}", rpm_path] - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - sig = result.stdout.strip() - if sig and sig != "(none) (none)" and sig != "(none)": - return sig.replace("(none)", "").strip() - - cmd = ["rpm", "-qip", rpm_path] - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) - for line in result.stdout.splitlines(): - if "Signature" in line and ":" in line: - sig_val = line.split(":", 1)[1].strip() - if sig_val and sig_val != "(none)": - return sig_val - return None - except Exception: - return None - - def hash_file(self, file_path): - """Calculates the SHA256 hash of a file.""" - sha256 = hashlib.sha256() - try: - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - sha256.update(chunk) - return sha256.hexdigest() - # pylint: disable=broad-exception-caught - except Exception as e: - self.buildroot.root_log.debug(f"Failed to hash file {file_path}: {e}") - return None - - def extract_source_files_from_srpm(self, src_rpm_path): - """Extracts metadata for source files from a source RPM without full extraction.""" - # pylint: disable=no-member - self.buildroot.root_log.debug(f"Extracting source metadata from source RPM: {src_rpm_path}") - source_files = [] - if not os.path.isfile(src_rpm_path): - return source_files - try: - ts = rpm.TransactionSet() - with open(src_rpm_path, "rb") as f: - hdr = ts.hdrFromFdno(f.fileno()) - - basenames = hdr[rpm.RPMTAG_BASENAMES] - digests = hdr[rpm.RPMTAG_FILEDIGESTS] - - # Create a set for quick lookup of signature files - file_set = set(basenames) - - for filename, sha256 in zip(basenames, digests): - if filename.endswith(".spec"): - continue - - signature = None - if filename.endswith(".asc") or filename.endswith(".sig"): - signature = "File is a signature file" - else: - for ext in [".asc", ".sig"]: - if filename + ext in file_set: - signature = f"GPG signature file exists: {filename}{ext}" - break - - source_files.append({ - "filename": filename, - "sha256": sha256, - "digital_signature": signature - }) - except Exception as e: - self.buildroot.root_log.debug(f"Failed to extract source metadata from {src_rpm_path}: {e}") - - return source_files - - - def _generate_spdx_document(self, name, version, release, build_dir, rpm_files, - source_files, toolchain_components, distro_id, spec_metadata=None): - """Generates the full SPDX document.""" - doc_spdx_id = "SPDXRef-DOCUMENT" - creation_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - - # Basic Document Structure - document = { - "spdxVersion": "SPDX-2.3", - "dataLicense": "CC0-1.0", - "SPDXID": doc_spdx_id, - "name": f"SBOM for {name}-{version}-{release}", - "documentNamespace": f"http://spdx.org/spdxdocs/{name}-{version}-{release}-{uuid.uuid4()}", - "creationInfo": { - "creators": [ - "Tool: mock-sbom-generator-1.0", - "Organization: Atomicorp" - ], - "created": creation_time - }, - "packages": [], - "files": [], - "relationships": [] - } - - # Add Toolchain Packages - for tc in toolchain_components: - spdx_pkg = self._create_spdx_package_from_dict(tc) - if spdx_pkg: - document["packages"].append(spdx_pkg) - # Relationship: Document DESCRIBES toolchain (conceptually part of build environment) - # But strictly, Document DESCRIBES the output artifacts. - # We'll just list them. - - # Add Source Files - for src_file in source_files: - spdx_file = self._create_spdx_file(src_file) - if spdx_file: - document["files"].append(spdx_file) - # Relationship: Document CONTAINS file - document["relationships"].append({ - "spdxElementId": doc_spdx_id, - "relatedSpdxElement": spdx_file["SPDXID"], - "relationshipType": "CONTAINS" - }) - - # Prepare toolchain name to SPDXID map for relationships - tc_name_to_id = {} - if spec_metadata and toolchain_components: - for tc in toolchain_components: - pkg_name = tc.get("name") - pkg_version = tc.get("version") - if pkg_name and pkg_version: - safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', pkg_name) - safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', pkg_version) - tc_name_to_id[pkg_name.lower()] = f"SPDXRef-Package-{safe_name}-{safe_ver}" - - # Add Build Artifacts (RPMs) - for rpm_file in rpm_files: - rpm_path = os.path.join(build_dir, rpm_file) - spdx_pkg = self._create_spdx_package_from_rpm(rpm_path, distro_id) - if spdx_pkg: - document["packages"].append(spdx_pkg) - # Relationship: Document DESCRIBES Package - document["relationships"].append({ - "spdxElementId": doc_spdx_id, - "relatedSpdxElement": spdx_pkg["SPDXID"], - "relationshipType": "DESCRIBES" - }) - - # Add BUILD_DEPENDENCY_OF relationships - if spec_metadata: - build_reqs = spec_metadata.get("build_requires", []) - for req in build_reqs: - req_name = req.split()[0].lower() - if req_name in tc_name_to_id: - document["relationships"].append({ - "spdxElementId": tc_name_to_id[req_name], - "relatedSpdxElement": spdx_pkg["SPDXID"], - "relationshipType": "BUILD_DEPENDENCY_OF" - }) - - return document - - def _create_spdx_package_from_rpm(self, rpm_path, distro_obj): - """Creates an SPDX Package from an RPM file.""" - pkg_data = self.get_rpm_metadata(rpm_path) - if not pkg_data: - return None - - name = pkg_data.get("name") - version = pkg_data.get("version") - release = pkg_data.get("release") - arch = pkg_data.get("arch") - full_version = f"{version}-{release}" if release else version - - safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', name) - safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', full_version) - spdx_id = f"SPDXRef-Package-{safe_name}-{safe_ver}" - - package = { - "name": name, - "SPDXID": spdx_id, - "versionInfo": full_version, - "downloadLocation": "NOASSERTION", - "filesAnalyzed": False, - "supplier": "NOASSERTION" - } - - # License - lic = pkg_data.get("license") - if lic and lic != "(none)": - package["licenseDeclared"] = lic - else: - package["licenseDeclared"] = "NOASSERTION" - package["licenseConcluded"] = "NOASSERTION" - - # Supplier - packager = pkg_data.get("packager") - if packager and packager != "(none)": - package["supplier"] = f"Person: {packager}" - - # Checksums - rpm_hash = pkg_data.get("sha256") - if not rpm_hash or rpm_hash == "(none)": - rpm_hash = self.hash_file(rpm_path) - - if rpm_hash: - package["checksums"] = [{ - "algorithm": "SHA256", - "checksumValue": rpm_hash - }] - - # External Refs - external_refs = [] - purl = self._generate_purl(name, full_version, distro_obj, arch) - if purl: - external_refs.append({ - "referenceCategory": "PACKAGE-MANAGER", - "referenceType": "purl", - "referenceLocator": purl - }) - - vendor = pkg_data.get("vendor") - cpe = self.generate_cpe(name, version, vendor=vendor) - if cpe: - external_refs.append({ - "referenceCategory": "SECURITY", - "referenceType": "cpe23Type", - "referenceLocator": cpe - }) - - if external_refs: - package["externalRefs"] = external_refs - - return package - - def _create_spdx_package_from_dict(self, pkg_data): - """Creates an SPDX Package from a dictionary (e.g. toolchain).""" - name = pkg_data.get("name") - version = pkg_data.get("version") - if not name or not version: - return None - - safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', name) - safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', version) - spdx_id = f"SPDXRef-Package-{safe_name}-{safe_ver}" - - package = { - "name": name, - "SPDXID": spdx_id, - "versionInfo": version, - "downloadLocation": "NOASSERTION", - "filesAnalyzed": False, - "supplier": "NOASSERTION" - } - - lic = pkg_data.get("licenseDeclared") - if lic and lic != "(none)": - package["licenseDeclared"] = lic - else: - package["licenseDeclared"] = "NOASSERTION" - package["licenseConcluded"] = "NOASSERTION" - - checksum = pkg_data.get("checksum") - if checksum and not checksum.startswith("error"): - # Assume SHA256 if len 64 else SHA1 - alg = "SHA256" if len(checksum) == 64 else "SHA1" - package["checksums"] = [{ - "algorithm": alg, - "checksumValue": checksum - }] - - return package - - def _create_spdx_file(self, file_data): - """Creates an SPDX File from file metadata.""" - filename = file_data.get("filename") - if not filename: - return None - - safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', filename) - spdx_id = f"SPDXRef-File-{safe_name}" - - file_obj = { - "fileName": f"./{filename}", - "SPDXID": spdx_id, - "licenseConcluded": "NOASSERTION", - "copyrightText": "NOASSERTION" - } - - sha256 = file_data.get("sha256") - if sha256: - file_obj["checksums"] = [{ - "algorithm": "SHA256", - "checksumValue": sha256 - }] - return file_obj diff --git a/mock/py/mockbuild/plugins/sbom_spdx.py b/mock/py/mockbuild/plugins/sbom_spdx.py new file mode 100644 index 000000000..b507930ad --- /dev/null +++ b/mock/py/mockbuild/plugins/sbom_spdx.py @@ -0,0 +1,409 @@ +# -*- coding: utf-8 -*- +# vim:expandtab:autoindent:tabstop=4:shiftwidth=4:filetype=python:textwidth=0: +# License: GPL2 or later see COPYING +# Written by Scott R. Shinn +# Copyright (C) 2026, Atomicorp, Inc. +""" +SPDX generation functions for the SBOM generator plugin. +""" + +import os +import re +import uuid +from datetime import datetime, timezone + + + +# pylint: disable=too-many-instance-attributes +class SpdxGenerator: + """Helper class for generating SPDX documents.""" + + def __init__(self, rpm_helper, buildroot, conf=None): + self.rpm_helper = rpm_helper + self.buildroot = buildroot + self.conf = conf or {} + + # Configuration options for file-level dependencies and filtering + self.include_file_dependencies = self.conf.get("include_file_dependencies", False) + self.include_file_components = self.conf.get("include_file_components", True) + self.include_debug_files = self.conf.get("include_debug_files", False) + self.include_man_pages = self.conf.get("include_man_pages", True) + self.include_toolchain_dependencies = self.conf.get( + "include_toolchain_dependencies", False + ) + + # pylint: disable=too-many-locals,too-many-branches,too-many-statements,too-many-arguments,too-many-positional-arguments + def generate_spdx_document(self, name, version, release, build_dir, rpm_files, + source_files, build_toolchain_packages, distro_id, + spec_metadata=None, hardening_props=None): + """Generates the full SPDX document using hierarchical grouping and enhanced metadata.""" + doc_spdx_id = "SPDXRef-DOCUMENT" + creation_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # 1. Initialize Document + document = { + "spdxVersion": "SPDX-2.3", + "dataLicense": "CC0-1.0", + "SPDXID": doc_spdx_id, + "name": f"SBOM for {name}-{version}-{release}", + "documentNamespace": f"http://spdx.org/spdxdocs/{name}-{version}-{release}-{uuid.uuid4()}", + "creationInfo": { + "creators": [ + "Tool: mock-sbom-generator-1.0", + "Organization: Atomicorp" + ], + "created": creation_time + }, + "packages": [], + "files": [], + "relationships": [] + } + + # 1.5 Add Spec Metadata and Hardening Props to Document Comment + doc_metadata = [] + if spec_metadata: + build_reqs = spec_metadata.get("build_requires", []) + if build_reqs: + doc_metadata.append(f"Build-Requires: {', '.join(build_reqs)}") + reqs = spec_metadata.get("requires", []) + if reqs: + doc_metadata.append(f"Requires: {', '.join(reqs)}") + + # Hardening flags + if hardening_props: + for prop in hardening_props: + doc_metadata.append(f"{prop['name']}: {prop['value']}") + + if doc_metadata: + document["comment"] = " | ".join(doc_metadata) + + # Virtual Grouping Refs + inputs_ref = "SPDXRef-Build-Inputs" + toolchain_ref = "SPDXRef-Build-Toolchain" + outputs_ref = "SPDXRef-RPM-Contents" + + # 2. Add Grouping Packages (Represented as virtual packages) + document["packages"].extend([ + { + "name": "Build Inputs", + "SPDXID": inputs_ref, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + "comment": "Grouping node for source files and patches used in the build." + }, + { + "name": "Build Toolchain", + "SPDXID": toolchain_ref, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + "comment": "Grouping node for packages and tools used to perform the build." + }, + { + "name": "RPM Contents", + "SPDXID": outputs_ref, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + "comment": "Grouping node for RPM packages and their contained files produced by the build." + } + ]) + + # Core relationships for the grouped architecture + document["relationships"].extend([ + {"spdxElementId": doc_spdx_id, "relatedSpdxElement": inputs_ref, "relationshipType": "CONTAINS"}, + {"spdxElementId": doc_spdx_id, "relatedSpdxElement": toolchain_ref, "relationshipType": "CONTAINS"}, + {"spdxElementId": doc_spdx_id, "relatedSpdxElement": outputs_ref, "relationshipType": "CONTAINS"} + ]) + + # 3. Process Source Files (Inputs) + for src_file in source_files: + spdx_file = self.create_spdx_file(src_file) + if spdx_file: + document["files"].append(spdx_file) + document["relationships"].append({ + "spdxElementId": inputs_ref, + "relatedSpdxElement": spdx_file["SPDXID"], + "relationshipType": "CONTAINS" + }) + + # 4. Process Build Toolchain (Grouped by Signer) + signer_groups = {} + for tc_pkg in build_toolchain_packages: + sig_info = tc_pkg.get("digital_signature", {}) + key_id = sig_info.get("signature_key", "unsigned") + + if key_id not in signer_groups: + safe_key = re.sub(r'[^a-zA-Z0-9.-]', '-', key_id) + signer_ref = f"SPDXRef-Signer-{safe_key}" + signer_pkg = { + "name": f"Packages signed by {key_id}" if key_id != "unsigned" else "Unsigned Packages", + "SPDXID": signer_ref, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + "comment": f"Grouping for build toolchain packages signed with GPG key {key_id}." + } + document["packages"].append(signer_pkg) + document["relationships"].append({ + "spdxElementId": toolchain_ref, + "relatedSpdxElement": signer_ref, + "relationshipType": "DEPENDS_ON" + }) + signer_groups[key_id] = signer_ref + + spdx_pkg = self.create_spdx_package_from_dict(tc_pkg) + if spdx_pkg: + document["packages"].append(spdx_pkg) + document["relationships"].append({ + "spdxElementId": signer_groups[key_id], + "relatedSpdxElement": spdx_pkg["SPDXID"], + "relationshipType": "DEPENDS_ON" + }) + + # 5. Process Build Artifacts (Outputs) + all_built_packages = [] + + for rpm_file in rpm_files: + rpm_path = os.path.join(build_dir, rpm_file) + spdx_pkg = self.create_spdx_package_from_rpm(rpm_path, distro_id) + if spdx_pkg: + all_built_packages.append((spdx_pkg, rpm_path)) + document["packages"].append(spdx_pkg) + document["relationships"].append({ + "spdxElementId": outputs_ref, + "relatedSpdxElement": spdx_pkg["SPDXID"], + "relationshipType": "DEPENDS_ON" + }) + + # Add file components if enabled + if self.include_file_components: + file_spdx_objs = self.create_file_components(rpm_path, spdx_pkg["SPDXID"]) + for file_obj in file_spdx_objs: + document["files"].append(file_obj) + document["relationships"].append({ + "spdxElementId": spdx_pkg["SPDXID"], + "relatedSpdxElement": file_obj["SPDXID"], + "relationshipType": "CONTAINS" + }) + + # 6. Select Primary Package for DESCRIBES relationship + if all_built_packages: + # Logic: Avoid debuginfo, prefer exact name match + primary_pkg_ref = self._select_primary_package(all_built_packages, name) + document["relationships"].append({ + "spdxElementId": doc_spdx_id, + "relatedSpdxElement": primary_pkg_ref, + "relationshipType": "DESCRIBES" + }) + + return document + + def _select_primary_package(self, pkg_tuples, subject_name): + """Selects the most suitable primary package from the list of built RPMs.""" + # tuples are (spdx_pkg, rpm_path) + candidates = [t for t in pkg_tuples if "debuginfo" not in t[0]["name"].lower()] + if not candidates: + candidates = pkg_tuples + + # Prefer exact name match + for pkg, _ in candidates: + if pkg["name"].lower() == subject_name.lower(): + return pkg["SPDXID"] + + # Fallback to the first non-debuginfo candidate + return candidates[0][0]["SPDXID"] + + # pylint: disable=too-many-locals,too-many-branches,too-many-statements + def create_spdx_package_from_rpm(self, rpm_path, distro_obj): + """Creates an SPDX Package from an RPM file, including all header metadata.""" + pkg_data = self.rpm_helper.get_rpm_metadata(rpm_path) + if not pkg_data: + self.buildroot.root_log.debug(f"[SBOM] FAILED to get metadata for {rpm_path}, skipping SPDX package") + return None + + name = pkg_data.get("name") + version = pkg_data.get("version") + release = pkg_data.get("release") + full_version = f"{version}-{release}" if release else version + + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', name) + safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', full_version) + spdx_id = f"SPDXRef-Package-{safe_name}-{safe_ver}" + + # SPDX Package Structure + package = { + "name": name, + "SPDXID": spdx_id, + "versionInfo": full_version, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": self.include_file_components, + "supplier": "NOASSERTION", + "homepage": "NOASSERTION" + } + + # Map RPM Header Fields to SPDX pkg fields or comments + lic = pkg_data.get("license") + if lic and lic != "(none)": + package["licenseDeclared"] = lic + else: + package["licenseDeclared"] = "NOASSERTION" + package["licenseConcluded"] = "NOASSERTION" + package["copyrightText"] = "NOASSERTION" + + url = pkg_data.get("url") + if url and url != "(none)": + package["homepage"] = url + + packager = pkg_data.get("packager") + if packager and packager != "(none)": + package["supplier"] = f"Person: {packager}" + + # Store additional RPM metadata in a comment block + metadata_fields = [] + for key, label in [("vendor", "Vendor"), ("buildhost", "Build Host"), + ("group", "Group"), ("epoch", "Epoch"), + ("distribution", "Distribution"), ("arch", "Architecture")]: + val = pkg_data.get(key) + if val and val != "(none)": + metadata_fields.append(f"{label}: {val}") + + buildtime = pkg_data.get("buildtime") + if buildtime: + try: + dt = datetime.fromtimestamp(int(buildtime), timezone.utc) + metadata_fields.append(f"Build Time: {dt.isoformat()}") + except (ValueError, TypeError): + pass + + # GPG Signature Information + signature = self.rpm_helper.get_rpm_signature(rpm_path) + if signature: + metadata_fields.append(f"GPG Signature: {signature}") + + if metadata_fields: + package["comment"] = " | ".join(metadata_fields) + + # Checksums + rpm_hash = pkg_data.get("sha256") + if not rpm_hash or rpm_hash == "(none)": + rpm_hash = self.rpm_helper.hash_file(rpm_path) + + if rpm_hash: + package["checksums"] = [{"algorithm": "SHA256", "checksumValue": rpm_hash}] + + # External References (CPE and PURL) + external_refs = [] + vendor = pkg_data.get("vendor") + cpe = self.rpm_helper.generate_cpe(name, version, vendor=vendor) + if cpe: + external_refs.append({ + "referenceCategory": "SECURITY", + "referenceType": "cpe23Type", + "referenceLocator": cpe + }) + + purl = self.rpm_helper.generate_purl(name, full_version, distro_obj, pkg_data.get("arch")) + if purl: + external_refs.append({ + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": purl + }) + + if external_refs: + package["externalRefs"] = external_refs + + return package + + def create_spdx_package_from_dict(self, pkg_data): + """Creates an SPDX Package from a dictionary (e.g. toolchain).""" + name = pkg_data.get("name") + version = pkg_data.get("version") + if not name or not version: + self.buildroot.root_log.debug( + "[SBOM] Skipping toolchain package due to missing name/version" + ) + return None + + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', name) + safe_ver = re.sub(r'[^a-zA-Z0-9.-]', '-', version) + spdx_id = f"SPDXRef-Package-{safe_name}-{safe_ver}" + + package = { + "name": name, + "SPDXID": spdx_id, + "versionInfo": version, + "downloadLocation": "NOASSERTION", + "filesAnalyzed": False, + "supplier": "NOASSERTION" + } + + lic = pkg_data.get("licenseDeclared") + if lic and lic != "(none)": + package["licenseDeclared"] = lic + else: + package["licenseDeclared"] = "NOASSERTION" + package["licenseConcluded"] = "NOASSERTION" + + # Checksums - REMOVED per user request to only have hashes for files contained in RPM + # (Follows CycloneDX parity where external toolchain hashes are omitted) + # checksum = pkg_data.get("checksum") + # if checksum and not checksum.startswith("error"): + # alg = "SHA256" if len(checksum) == 64 else "SHA1" + # package["checksums"] = [{ + # "algorithm": alg, + # "checksumValue": checksum + # }] + + return package + + def create_spdx_file(self, file_data, parent_pkg_id=None): + """Creates an SPDX File from file metadata.""" + filename = file_data.get("filename") + if not filename: + return None + + safe_name = re.sub(r'[^a-zA-Z0-9.-]', '-', filename) + # Use a more unique ID if parent is provided + if parent_pkg_id: + parent_suffix = parent_pkg_id.split("-")[-1] + spdx_id = f"SPDXRef-File-{safe_name}-{parent_suffix}" + else: + spdx_id = f"SPDXRef-File-{safe_name}" + + file_obj = { + "fileName": f"./{filename}", + "SPDXID": spdx_id, + "licenseConcluded": "NOASSERTION", + "copyrightText": "NOASSERTION" + } + + sha256 = file_data.get("sha256") + if sha256: + file_obj["checksums"] = [{"algorithm": "SHA256", "checksumValue": sha256}] + + # Store GPG flag as a comment if present + if file_data.get("digital_signature"): + file_obj["comment"] = f"Signature Status: {file_data['digital_signature']}" + + return file_obj + + def create_file_components(self, rpm_path, parent_spdx_id): + """Extracts file list from an RPM and creates SPDX File objects.""" + file_info = self.rpm_helper.get_rpm_file_info(rpm_path) or {} + spdx_files = [] + + for filename in sorted(file_info.keys()): + f_data = file_info[filename] + # Ensure filename is in the data dict for create_spdx_file + f_data["filename"] = filename + + # Filtering logic (man pages, debug files) + if not self.include_debug_files and (".build-id" in filename or ".debug" in filename): + continue + if not self.include_man_pages and ("/usr/share/man" in filename or "/usr/share/info" in filename): + continue + + f_obj = self.create_spdx_file(f_data, parent_pkg_id=parent_spdx_id) + if f_obj: + spdx_files.append(f_obj) + + return spdx_files diff --git a/mock/py/mockbuild/plugins/sbom_utils.py b/mock/py/mockbuild/plugins/sbom_utils.py new file mode 100644 index 000000000..b1642d720 --- /dev/null +++ b/mock/py/mockbuild/plugins/sbom_utils.py @@ -0,0 +1,756 @@ +# -*- coding: utf-8 -*- +# vim:expandtab:autoindent:tabstop=4:shiftwidth=4:filetype=python:textwidth=0: +# License: GPL2 or later see COPYING +# Written by Scott R. Shinn +# Copyright (C) 2026, Atomicorp, Inc. + +import os +import re +import subprocess +import hashlib +import traceback +import rpm +from datetime import datetime, timezone + +""" +Utility functions for the SBOM generator plugin. +""" + + +class RpmQueryHelper: + # pylint: disable=broad-exception-caught + """Helper class for querying RPM metadata.""" + + def __init__(self, buildroot): + """Initializes the helper with a buildroot for doChroot access.""" + self.buildroot = buildroot + + def _from_chroot_path(self, path): + """Standardizes from_chroot_path as a fallback for older mock versions.""" + if hasattr(self.buildroot, 'from_chroot_path'): + return self.buildroot.from_chroot_path(path) + + # Fallback implementation + rootdir = getattr(self.buildroot, 'rootdir', None) + if not rootdir: + return path + if path.startswith(rootdir): + rel_path = path[len(rootdir):] + if not rel_path.startswith("/"): + rel_path = "/" + rel_path + return rel_path + return path + + def _resolve_chroot_path(self, rpm_path): + """Resolves a host RPM path to its equivalent path inside the chroot if possible.""" + # Check if it's already a chroot path (from_chroot_path returns a path for any file in rootdir) + chroot_path = self._from_chroot_path(rpm_path) + if not chroot_path: + return None + + # Check if it's in the resultdir. If so, it should be in /builddir/build/RPMS + if rpm_path.startswith(self.buildroot.resultdir): + filename = os.path.basename(rpm_path) + # Search in common build directory structures inside the chroot + search_paths = [ + "/builddir/build/RPMS", + "/builddir/build/RPMS/x86_64", + "/builddir/build/RPMS/noarch", + "/builddir/build/SRPMS", + "/builddir/build/SOURCES" + ] + for search_path in search_paths: + candidate = os.path.join(search_path, filename) + # Verify existence via doChroot + cmd = ["ls", candidate] + try: + res, _ = self.buildroot.doChroot(cmd, shell=False, returnOutput=True, printOutput=False) + if res and candidate in res: + return candidate + except Exception: + pass + return None + + return chroot_path + + def generate_purl(self, package_name, version, distro_obj=None, arch=None): + """Generates a Package URL (PURL) for an RPM package.""" + # pkg:rpm/fedora/curl@7.50.3-1.fc25?arch=i386&distro=fedora-25 + # We simplify to pkg:rpm/distro/name@version?arch=arch + clean_name = re.sub(r'[^a-zA-Z0-9.-]', '-', package_name) + purl = f"pkg:rpm/{distro_obj}/{clean_name}@{version}" + if arch: + purl += f"?arch={arch}" + return purl + + def generate_cpe(self, package_name, package_version, vendor=None): + """Generates a CPE identifier for a package.""" + # CPE format: cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:* + + # Default vendor if not provided + if not vendor or vendor == "(none)": + vendor = "unknown" + + # Clean up vendor name for CPE + vendor = re.sub(r'[^a-zA-Z0-9._-]', '_', vendor.lower()) + + # Clean up package name for CPE + product = re.sub(r'[^a-zA-Z0-9._-]', '_', package_name.lower()) + + # Clean up version for CPE (remove release part if present) + version = package_version + if '-' in version: + version = version.split('-')[0] # Remove release part + + # Generate CPE + cpe = f"cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*:*" + return cpe + + + def _parse_signature_data(self, sig_data, signature_info): + """Parses the raw signature string and updates the signature_info dict.""" + if sig_data and sig_data != "(none)" and sig_data != "": + signature_info["signature_type"] = "GPG" + signature_info["signature_valid"] = True + + # Parse signature line like: + # "RSA/SHA256, Fri 08 Nov 2024 ... Key ID ..." + if "RSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "RSA/SHA256" + elif "DSA/SHA1" in sig_data: + signature_info["signature_algorithm"] = "DSA/SHA1" + elif "ECDSA/SHA256" in sig_data: + signature_info["signature_algorithm"] = "ECDSA/SHA256" + elif "Ed25519/SHA256" in sig_data: + signature_info["signature_algorithm"] = "Ed25519/SHA256" + + # Extract key ID + if "Key ID" in sig_data: + key_id_match = re.search(r'Key ID ([0-9a-fA-F]+)', sig_data) + if key_id_match: + signature_info["signature_key"] = key_id_match.group(1) + + # Extract date - handle various time formats including EST/EDT + date_match = re.search( + r'([A-Za-z]{3} [A-Za-z]{3}\s+\d{1,2} \d{2}:\d{2}:\d{2} \d{4})', + sig_data + ) + if date_match: + signature_info["signature_date"] = date_match.group(1) + else: + signature_info["signature_type"] = "unsigned" + signature_info["signature_valid"] = False + + def get_rpm_metadata(self, rpm_path): + """Extracts metadata from an RPM file. + Uses doChroot if the file is within the chroot to ensure compatibility.""" + if not os.path.isfile(rpm_path): + self.buildroot.root_log.debug(f"RPM file not found: {rpm_path}") + return {} + + # Try to resolve to a chroot path to prioritize chroot-native analysis + chroot_path = self._resolve_chroot_path(rpm_path) + if chroot_path: + self.buildroot.root_log.debug(f"[SBOM] Using chroot-native rpm for: {chroot_path}") + return self._get_rpm_metadata_chroot(chroot_path) + + # Fallback to host-native bindings + self.buildroot.root_log.debug(f"[SBOM] Using host-native analysis for: {rpm_path}") + return self._get_rpm_metadata_native(rpm_path) + + def _get_rpm_metadata_chroot(self, chroot_rpm_path): + """Extracts metadata using rpm -qp inside the chroot.""" + fields = { + "name": "%{NAME}", "version": "%{VERSION}", "release": "%{RELEASE}", + "arch": "%{ARCH}", "epoch": "%{EPOCH}", "summary": "%{SUMMARY}", + "license": "%{LICENSE}", "vendor": "%{VENDOR}", "url": "%{URL}", + "packager": "%{PACKAGER}", "buildtime": "%{BUILDTIME}", + "buildhost": "%{BUILDHOST}", "sourcerpm": "%{SOURCERPM}", + "group": "%{GROUP}", "distribution": "%{DISTRIBUTION}", + "sha256": "%{SHA256HEADER}" + } + + metadata = {} + try: + query = "|".join(fields.values()) + cmd = ["rpm", "-qp", "--queryformat", query, chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + if output: + parts = output.split("|") + for i, field_name in enumerate(fields.keys()): + if i < len(parts): + val = parts[i].strip() + if field_name == "epoch" and (not val or val == "(none)"): + val = "0" + metadata[field_name] = val + return metadata + except Exception as e: + self.buildroot.root_log.debug(f"Failed to extract metadata via doChroot for {chroot_rpm_path}: {e}") + return {} + + def _get_rpm_metadata_native(self, rpm_path): + """Extracts metadata using native host bindings (fallback).""" + # pylint: disable=no-member + try: + ts = rpm.TransactionSet() + with open(rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) + + tag_map = { + "name": rpm.RPMTAG_NAME, "version": rpm.RPMTAG_VERSION, + "release": rpm.RPMTAG_RELEASE, "arch": rpm.RPMTAG_ARCH, + "epoch": rpm.RPMTAG_EPOCH, "summary": rpm.RPMTAG_SUMMARY, + "license": rpm.RPMTAG_LICENSE, "vendor": rpm.RPMTAG_VENDOR, + "url": rpm.RPMTAG_URL, "packager": rpm.RPMTAG_PACKAGER, + "buildtime": rpm.RPMTAG_BUILDTIME, "buildhost": rpm.RPMTAG_BUILDHOST, + "sourcerpm": rpm.RPMTAG_SOURCERPM, "group": rpm.RPMTAG_GROUP, + "distribution": rpm.RPMTAG_DISTRIBUTION, "sha256": rpm.RPMTAG_SHA256HEADER + } + + metadata = {} + for field_name, tag in tag_map.items(): + value = hdr[tag] + if field_name == "epoch" and value is None: + value = "0" + elif value is None: + value = "" + elif isinstance(value, bytes): + value = value.decode('utf-8', errors='replace') + metadata[field_name] = str(value) + return metadata + except Exception: + self.buildroot.root_log.debug(f"Failed to extract metadata via native bindings for {rpm_path}") + return {} + + + + def get_rpm_file_info(self, rpm_path): + """Extracts file hashes, ownership, and permissions from an RPM file.""" + chroot_path = self._resolve_chroot_path(rpm_path) + if chroot_path: + self.buildroot.root_log.debug(f"[SBOM] Using chroot-native file info for: {chroot_path}") + return self._get_rpm_file_info_chroot(chroot_path) + + self.buildroot.root_log.debug(f"[SBOM] Using host-native file info for: {rpm_path}") + return self._get_rpm_file_info_native(rpm_path) + + def _get_rpm_file_info_chroot(self, chroot_rpm_path): + """Extracts file info using rpm -qp inside the chroot.""" + file_info = {} + try: + # Query format for files: path|hash|mode|user|group + qf = "[%{FILENAMES}|%{FILEDIGESTS}|%{FILEMODES:octal}|%{FILEUSERNAME}|%{FILEGROUPNAME}\\n]" + cmd = ["rpm", "-qp", "--queryformat", qf, chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + # Detect digest algorithm from header + cmd_algo = ["rpm", "-qp", "--queryformat", "%{FILEDIGESTALGO}", chroot_rpm_path] + algo_out, _ = self.buildroot.doChroot( + cmd_algo, shell=False, returnOutput=True, printOutput=False + ) + try: + algo = int(algo_out.strip()) if algo_out and algo_out.strip() else 8 + except ValueError: + algo = 8 + + for line in output.splitlines(): + parts = line.split("|") + if len(parts) >= 5: + filename = parts[0] + file_info[filename] = { + "hash": parts[1] if parts[1] and parts[1] != "(none)" else None, + "algo": algo, + "permissions": parts[2], + "owner": parts[3], + "group": parts[4] + } + return file_info + except Exception as e: + self.buildroot.root_log.debug(f"Failed to get file info via doChroot for {chroot_rpm_path}: {e}") + return {} + + def _get_rpm_file_info_native(self, rpm_path): + """Extracts file information using native host bindings (fallback).""" + # pylint: disable=no-member + file_info = {} + try: + ts = rpm.TransactionSet() + # pylint: disable=protected-access + ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES | rpm._RPMVSF_NODIGESTS) + with open(rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) + + basenames = hdr[rpm.RPMTAG_BASENAMES] + dirnames = hdr[rpm.RPMTAG_DIRNAMES] + dirindexes = hdr[rpm.RPMTAG_DIRINDEXES] + filedigests = hdr[rpm.RPMTAG_FILEDIGESTS] + filemodes = hdr[rpm.RPMTAG_FILEMODES] + fileusernames = hdr[rpm.RPMTAG_FILEUSERNAME] + filegroupnames = hdr[rpm.RPMTAG_FILEGROUPNAME] + + try: + algo = hdr[rpm.RPMTAG_FILEDIGESTALGO] + except (KeyError, IndexError): + algo = 8 + + file_info = {} + for i, basename in enumerate(basenames): + dirname = dirnames[dirindexes[i]] + if isinstance(dirname, bytes): + dirname = dirname.decode('utf-8', 'replace') + if isinstance(basename, bytes): + basename = basename.decode('utf-8', 'replace') + filename = os.path.join(dirname, basename) + + digest = filedigests[i] + if isinstance(digest, bytes): + digest = digest.decode('utf-8') + + file_info[filename] = { + "hash": digest if digest else None, + "algo": algo, + "permissions": f"0{filemodes[i]:o}", + "owner": fileusernames[i].decode('utf-8', 'replace') if isinstance(fileusernames[i], bytes) else fileusernames[i], + "group": filegroupnames[i].decode('utf-8', 'replace') if isinstance(filegroupnames[i], bytes) else filegroupnames[i] + } + return file_info + except Exception: + return {} + + def get_rpm_dependencies(self, rpm_path): + """Extracts the list of dependencies from an RPM file.""" + chroot_path = self._resolve_chroot_path(rpm_path) + if chroot_path: + self.buildroot.root_log.debug(f"[SBOM] Using chroot-native dependencies for: {chroot_path}") + return self._get_rpm_dependencies_chroot(chroot_path) + + self.buildroot.root_log.debug(f"[SBOM] Using host-native dependencies for: {rpm_path}") + return self._get_rpm_dependencies_native(rpm_path) + + def _get_rpm_dependencies_chroot(self, chroot_rpm_path): + """Extracts dependencies using rpm -qpR inside the chroot.""" + try: + cmd = ["rpm", "-qpR", chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + return output.splitlines() if output else [] + except Exception: + return [] + + def _get_rpm_dependencies_native(self, rpm_path): + """Extracts dependencies using native host bindings (fallback).""" + # pylint: disable=no-member + try: + ts = rpm.TransactionSet() + with open(rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) + + requirements = hdr[rpm.RPMTAG_REQUIRENAME] + if not requirements: + return [] + + return [r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) for r in requirements] + except Exception: # pylint: disable=broad-exception-caught + self.buildroot.root_log.debug(f"Failed to extract dependencies via native bindings for {rpm_path}") + return [] + + def get_rpm_signature(self, rpm_path): + """Extracts the GPG signature of an RPM file.""" + chroot_path = self._resolve_chroot_path(rpm_path) + if chroot_path: + self.buildroot.root_log.debug(f"[SBOM] Using chroot-native signature query for: {chroot_path}") + return self._get_rpm_signature_chroot(chroot_path) + + self.buildroot.root_log.debug(f"[SBOM] Using host-native signature query for: {rpm_path}") + return self._get_rpm_signature_host(rpm_path) + + def _get_rpm_signature_chroot(self, chroot_rpm_path): + """Extracts signature using rpm inside the chroot.""" + try: + # Try to get it via queryformat first + cmd = ["rpm", "-qp", "--queryformat", "%{SIGPGP:pgpsig} %{SIGGPG:pgpsig}", chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + sig = output.strip() if output else "" + if sig and sig != "(none) (none)" and sig != "(none)": + return sig.replace("(none)", "").strip() + + # Fallback to rpm -qip + cmd = ["rpm", "-qip", chroot_rpm_path] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + if output: + for line in output.splitlines(): + if "Signature" in line and ":" in line: + sig_val = line.split(":", 1)[1].strip() + if sig_val and sig_val != "(none)": + return sig_val + return None + except Exception: + return None + + def _get_rpm_signature_host(self, rpm_path): + """Extracts signature using host tools (fallback).""" + try: + # Query format for signatures + cmd = ["rpm", "-qp", "--queryformat", "%{SIGPGP:pgpsig} %{SIGGPG:pgpsig}", rpm_path] + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + sig = result.stdout.strip() + if sig and sig != "(none) (none)" and sig != "(none)": + return sig.replace("(none)", "").strip() + + # Second try via -qip + cmd = ["rpm", "-qip", rpm_path] + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + for line in result.stdout.splitlines(): + if "Signature" in line and ":" in line: + sig_val = line.split(":", 1)[1].strip() + if sig_val and sig_val != "(none)": + return sig_val + return None + except Exception: + return None + + + def hash_file(self, file_path): + """Calculates the SHA256 hash of a file.""" + sha256 = hashlib.sha256() + try: + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + sha256.update(chunk) + return sha256.hexdigest() + # pylint: disable=broad-exception-caught + except Exception as e: + self.buildroot.root_log.debug(f"Failed to hash file {file_path}: {e}") + return None + + def extract_source_files_from_srpm(self, src_rpm_path): + """Extracts metadata for source files from a source RPM without full extraction.""" + # pylint: disable=no-member + self.buildroot.root_log.debug(f"Extracting source metadata from source RPM: {src_rpm_path}") + source_files = [] + if not os.path.isfile(src_rpm_path): + return source_files + try: + ts = rpm.TransactionSet() + with open(src_rpm_path, "rb") as f: + hdr = ts.hdrFromFdno(f.fileno()) + + basenames = hdr[rpm.RPMTAG_BASENAMES] + digests = hdr[rpm.RPMTAG_FILEDIGESTS] + + # Create a set for quick lookup of signature files + file_set = set(basenames) + + for filename, sha256 in zip(basenames, digests): + if filename.endswith(".spec"): + continue + + signature = None + if filename.endswith(".asc") or filename.endswith(".sig"): + signature = "File is a signature file" + else: + for ext in [".asc", ".sig"]: + if filename + ext in file_set: + signature = f"GPG signature file exists: {filename}{ext}" + break + + source_files.append({ + "filename": filename, + "sha256": sha256, + "digital_signature": signature + }) + except Exception as e: + self.buildroot.root_log.debug(f"Failed to extract source metadata from {src_rpm_path}: {e}") + + return source_files + + + + def parse_spec_file(self, spec_path): + """Parses a spec file to extract metadata and source/patch files using the specfile library.""" + self.buildroot.root_log.debug(f"[SBOM] Parsing spec file: {spec_path}") + + sources = [] + metadata = { + "name": "", + "version": "", + "release": "", + "license": "", + "build_requires": [], + "requires": [] + } + + if not os.path.isfile(spec_path): + self.buildroot.root_log.debug(f"Spec file not found: {spec_path}") + return metadata, sources + try: + chroot_spec_path = self._from_chroot_path(spec_path) or spec_path + # Use rpmspec --parse inside the build chroot to ensure macro expansion + # matches the build environment exactly. + cmd = ["rpmspec", "--parse", chroot_spec_path] + result, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + if not result: + # If doChroot returned empty, try reading local spec as fallback + try: + with open(spec_path, 'r', encoding='utf-8') as f: + result = f.read() + except Exception: + return metadata, sources + + try: + from specfile import Specfile + # Use specfile to parse the expanded content + spec = Specfile(content=result, sourcedir=os.path.dirname(spec_path)) + + + # Extract canonical metadata + metadata.update({ + "name": spec.expanded_name, + "version": spec.expanded_version, + "release": spec.expanded_release, + "license": spec.expanded_license, + }) + + # Extract BuildRequires and Requires from headers + try: + br = spec.rpm_spec.sourceHeader[rpm.RPMTAG_REQUIRENAME] + metadata["build_requires"] = [ + r.decode('utf-8', 'replace') if isinstance(r, bytes) else str(r) + for r in br + ] if br else [] + except (AttributeError, KeyError): + metadata["build_requires"] = [] + + try: + r = spec.rpm_spec.packages[0].header[rpm.RPMTAG_REQUIRENAME] + metadata["requires"] = [ + req.decode('utf-8', 'replace') if isinstance(req, bytes) else str(req) + for req in r + ] if r else [] + except (AttributeError, KeyError, IndexError): + metadata["requires"] = [] + + # Extract both sources and patches from the spec object model + all_locs = [] + with spec.sources() as spec_sources: + all_locs.extend(s.location for s in spec_sources if s.location) + with spec.patches() as spec_patches: + all_locs.extend(p.location for p in spec_patches if p.location) + + for loc in all_locs: + filename, _, hash_value = loc.partition('#') + actual_filename = os.path.basename(filename) + build_dir = os.path.dirname(spec_path) + sources_dir = os.path.join(os.path.dirname(build_dir), "SOURCES") + file_path = os.path.join(sources_dir, actual_filename) + + actual_hash = None + if os.path.isfile(file_path): + actual_hash = self.rpm_helper.hash_file(file_path) + elif hash_value: + actual_hash = hash_value + + signature = ( + self.get_file_signature(file_path) if os.path.isfile(file_path) else None + ) + + sources.append({ + "filename": actual_filename, + "sha256": actual_hash, + "digital_signature": signature + }) + self.buildroot.root_log.debug(f"Extracted metadata {metadata} and {len(sources)} source/patch files from spec") + + # Double check we actually got metadata + if not metadata.get("name"): + raise ValueError("Empty metadata from Specfile") + + except Exception as e: + self.buildroot.root_log.debug(f"[SBOM] FALLBACK: Specfile library failed for {spec_path}, trying regex: {e}") + + # Ensure result is a string for regex + content = str(result) if result else "" + + # Fallback to simple regex parsing of the expanded result + name_match = (re.search(r'^Name:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^name\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + version_match = (re.search(r'^Version:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^version\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + release_match = (re.search(r'^Release:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^release\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + license_match = (re.search(r'^License:\s+(.+)$', content, re.MULTILINE) or + re.search(r'^license\s*:\s*(.+)$', content, re.IGNORECASE | re.MULTILINE)) + + metadata["name"] = name_match.group(1).strip() if name_match else "" + metadata["version"] = version_match.group(1).strip() if version_match else "" + metadata["release"] = release_match.group(1).strip() if release_match else "" + metadata["license"] = license_match.group(1).strip() if license_match else "" + + # Simple source/patch extraction from expanded spec + source_matches = re.finditer(r'^(Source|Patch)\d*:\s+(.+)$', content, re.MULTILINE) + for sm in source_matches: + loc = sm.group(2).strip() + filename = os.path.basename(loc.partition('#')[0]) + # Avoid duplicates + if not any(s['filename'] == filename for s in sources): + sources.append({ + "filename": filename, + "sha256": None, + "digital_signature": None + }) + except Exception as e: + self.buildroot.root_log.debug(f"Failed to parse spec file {spec_path}: {e}") + self.buildroot.root_log.debug(traceback.format_exc()) + + return metadata, sources + + def detect_chroot_distribution(self): + """Detects the distribution ID (e.g., 'fedora', 'centos', 'rhel') from inside the chroot.""" + try: + import distro + try: + distro_id = distro.id(root_dir=self.buildroot.rootdir) + except (TypeError, AttributeError): + # Fallback for older python-distro versions (<1.6.0) + os_release = os.path.join(self.buildroot.rootdir, "etc/os-release") + distro_id = "unknown" + if os.path.isfile(os_release): + with open(os_release, 'r') as f: + for line in f: + if line.startswith("ID="): + distro_id = line.split("=")[1].strip().strip('"').strip("'") + break + + if distro_id: + return distro_id.lower() + return "unknown" + except Exception as e: + self.buildroot.root_log.debug(f"Failed to detect chroot distribution: {e}") + return "unknown" + + def get_build_toolchain_packages(self): + """Returns the list of packages installed in the build toolchain + with detailed signature information collected in a single batch query.""" + try: + fields = [ + "%{NAME}", "%{VERSION}-%{RELEASE}", "%{ARCH}", "%{LICENSE}", + "%{BUILDTIME}", "%{RSAHEADER:pgpsig}", "%{DSAHEADER:pgpsig}", + "%{SIGGPG:pgpsig}", "%{SIGPGP:pgpsig}", "%{SHA256HEADER}", + "%{SOURCERPM}" + ] + query = "|".join(fields) + "\n" + cmd = ["rpm", "-qa", "--qf", query] + output, _ = self.buildroot.doChroot( + cmd, shell=False, returnOutput=True, printOutput=False + ) + + packages = [] + cpe_vendor_default = self.detect_chroot_distribution() or "unknown" + + for line in output.splitlines(): + parts = line.split("|") + if len(parts) < 6: + continue + + package_name = parts[0].strip() + package_version = parts[1].strip() + package_arch = parts[2].strip() + package_license = parts[3].strip() + build_time = parts[4].strip() + + raw_sig = None + for sig_candidate in parts[5:9]: + sig_candidate = sig_candidate.strip() + if sig_candidate and sig_candidate != "(none)": + raw_sig = sig_candidate + break + + package_checksum = parts[9].strip() if len(parts) > 9 else None + if package_checksum == "(none)": + package_checksum = None + + source_rpm = parts[10].strip() if len(parts) > 10 else None + if source_rpm == "(none)": + source_rpm = None + + if ( + package_name.startswith('gpg-pubkey') or + package_name == '(none)' or + not package_name + ): + continue + + digital_signature = { + "signature_type": "unsigned", + "signature_key": None, + "signature_date": None, + "signature_algorithm": None, + "signature_valid": False, + "raw_signature_data": raw_sig, + "build_date": None + } + + if raw_sig: + self._parse_signature_data(raw_sig, digital_signature) + + if build_time and build_time.isdigit(): + try: + dt = datetime.fromtimestamp(int(build_time), tz=timezone.utc) + digital_signature["build_date"] = dt.isoformat() + except (ValueError, TypeError, OverflowError): + pass + + cpe = self.generate_cpe(package_name, package_version, vendor=cpe_vendor_default) + + packages.append({ + "name": package_name, + "version": package_version, + "arch": package_arch, + "licenseDeclared": package_license, + "digital_signature": digital_signature, + "sourcerpm": source_rpm, + "cpe": cpe, + "checksum": package_checksum + }) + + self.buildroot.root_log.debug(f"Found {len(packages)} build toolchain packages") + return packages + except Exception as e: + self.buildroot.root_log.debug(f"Failed to get build environment packages: {e}") + return [] + + def get_distribution(self): + """Detects the distribution from the chroot environment (human readable).""" + try: + os_release = os.path.join(self.buildroot.rootdir, "etc/os-release") + distro_name = "Unknown" + version = "" + if os.path.isfile(os_release): + with open(os_release, 'r') as f: + for line in f: + if line.startswith("NAME="): + distro_name = line.strip().split("=", 1)[1].strip('"') + elif line.startswith("VERSION_ID="): + version = line.strip().split("=", 1)[1].strip('"') + if distro_name and version: + return f"{distro_name} {version}" + return distro_name or "Unknown" + except OSError as e: + return f"Unknown ({e})" + + + + diff --git a/releng/release-notes-next/sbom-generator.feature.md b/releng/release-notes-next/sbom-generator.feature similarity index 84% rename from releng/release-notes-next/sbom-generator.feature.md rename to releng/release-notes-next/sbom-generator.feature index 7c2206f78..1a38a5cc6 100644 --- a/releng/release-notes-next/sbom-generator.feature.md +++ b/releng/release-notes-next/sbom-generator.feature @@ -5,4 +5,5 @@ full audit traceability by linking built RPMs with their original source tarballs and patches, including SHA-256 hashes. Supporting both CycloneDX 1.5 and SPDX 2.3 formats, the plugin leverages a chroot-native analysis model to ensure high metadata accuracy for cross-distribution builds and compatibility -with modern security scanners. +with modern security scanners, File Integrity Monitoring (FIM), and Supply Chain +forensic analysis. From 01b4db0e4bd6e19876eee90688eca23cadffb733 Mon Sep 17 00:00:00 2001 From: "Scott R. Shinn" Date: Sat, 4 Apr 2026 19:14:28 -0400 Subject: [PATCH 25/25] Bump for build test Signed-off-by: Scott R. Shinn --- releng/release-notes-next/sbom-generator.feature | 1 + 1 file changed, 1 insertion(+) diff --git a/releng/release-notes-next/sbom-generator.feature b/releng/release-notes-next/sbom-generator.feature index 1a38a5cc6..a1b6f04a2 100644 --- a/releng/release-notes-next/sbom-generator.feature +++ b/releng/release-notes-next/sbom-generator.feature @@ -7,3 +7,4 @@ and SPDX 2.3 formats, the plugin leverages a chroot-native analysis model to ensure high metadata accuracy for cross-distribution builds and compatibility with modern security scanners, File Integrity Monitoring (FIM), and Supply Chain forensic analysis. +