|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# VulnerableCode is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/aboutcode-org/vulnerablecode for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | +from collections import defaultdict |
| 10 | +from pathlib import Path |
| 11 | +from typing import Iterable |
| 12 | + |
| 13 | +import saneyaml |
| 14 | +from fetchcode.vcs import fetch_via_vcs |
| 15 | +from packageurl import PackageURL |
| 16 | +from univers.version_range import RANGE_CLASS_BY_SCHEMES |
| 17 | +from univers.versions import InvalidVersion |
| 18 | + |
| 19 | +from vulnerabilities.importer import AdvisoryData |
| 20 | +from vulnerabilities.importer import AffectedPackageV2 |
| 21 | +from vulnerabilities.importer import ReferenceV2 |
| 22 | +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 |
| 23 | +from vulnerabilities.pipes.advisory import append_patch_classifications |
| 24 | +from vulnerabilities.utils import get_advisory_url |
| 25 | +from vulnerabilities.utils import is_commit |
| 26 | + |
| 27 | + |
| 28 | +class ProjectKBStatementsPipeline(VulnerableCodeBaseImporterPipelineV2): |
| 29 | + """ |
| 30 | + ProjectKB Importer Pipeline |
| 31 | + Collect advisory from ProjectKB data: |
| 32 | + - YAML statements: https://github.com/SAP/project-kb/blob/vulnerability-data/statements/*/*.yaml |
| 33 | + """ |
| 34 | + |
| 35 | + pipeline_id = "project-kb-statements_v2" |
| 36 | + spdx_license_expression = "Apache-2.0" |
| 37 | + license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt" |
| 38 | + repo_url = "git+https://github.com/SAP/project-kb@vulnerability-data" |
| 39 | + |
| 40 | + @classmethod |
| 41 | + def steps(cls): |
| 42 | + return ( |
| 43 | + cls.clone, |
| 44 | + cls.collect_and_store_advisories, |
| 45 | + cls.clean_downloads, |
| 46 | + ) |
| 47 | + |
| 48 | + def clone(self): |
| 49 | + self.log("Cloning ProjectKB Statements advisory data...") |
| 50 | + self.vcs_response = fetch_via_vcs(self.repo_url) |
| 51 | + |
| 52 | + def advisories_count(self): |
| 53 | + base_path = Path(self.vcs_response.dest_dir) / "statements" |
| 54 | + count = sum(1 for _ in base_path.rglob("*.yaml")) |
| 55 | + self.log(f"Estimated advisories to process: {count}") |
| 56 | + return count |
| 57 | + |
| 58 | + def collect_advisories(self) -> Iterable[AdvisoryData]: |
| 59 | + self.log("Collecting fix commits from YAML statements under /statements....") |
| 60 | + base_path = Path(self.vcs_response.dest_dir) / "statements" |
| 61 | + |
| 62 | + for yaml_file in base_path.rglob("*.yaml"): |
| 63 | + if yaml_file.name != "statement.yaml": |
| 64 | + continue |
| 65 | + |
| 66 | + with open(yaml_file, encoding="utf-8") as f: |
| 67 | + yaml_data = saneyaml.load(f) |
| 68 | + |
| 69 | + vulnerability_id = yaml_data.get("vulnerability_id") |
| 70 | + if not vulnerability_id: |
| 71 | + continue |
| 72 | + |
| 73 | + note_texts = [] |
| 74 | + references = [] |
| 75 | + for note_entry in yaml_data.get("notes", []): |
| 76 | + text_content = note_entry.get("text") |
| 77 | + if not text_content: |
| 78 | + continue |
| 79 | + note_texts.append(text_content) |
| 80 | + |
| 81 | + for link in note_entry.get("links", []): |
| 82 | + ref = ReferenceV2(url=link) |
| 83 | + references.append(ref) |
| 84 | + |
| 85 | + description = "\n".join(note_texts) |
| 86 | + affected_packages = [] |
| 87 | + patches = [] |
| 88 | + for fix in yaml_data.get("fixes", []): |
| 89 | + for commit in fix.get("commits", []): |
| 90 | + commit_hash = commit.get("id") |
| 91 | + if not is_commit(commit_hash): |
| 92 | + commit_hash = None |
| 93 | + |
| 94 | + vcs_url = commit.get("repository") |
| 95 | + append_patch_classifications( |
| 96 | + url=vcs_url, |
| 97 | + commit_hash=commit_hash, |
| 98 | + patch_text=None, |
| 99 | + affected_packages=affected_packages, |
| 100 | + references=references, |
| 101 | + patches=patches, |
| 102 | + ) |
| 103 | + |
| 104 | + purls_to_versions = defaultdict(lambda: [[], []]) |
| 105 | + for artifact in yaml_data.get("artifacts", []): |
| 106 | + affected = artifact.get("affected") |
| 107 | + purl_str = artifact.get("id") |
| 108 | + |
| 109 | + try: |
| 110 | + purl = PackageURL.from_string(purl_str) |
| 111 | + except ValueError: |
| 112 | + self.log(f"Invalid PackageURL: {purl_str!r}") |
| 113 | + continue |
| 114 | + |
| 115 | + version_range_class = RANGE_CLASS_BY_SCHEMES.get(purl.type) |
| 116 | + if not version_range_class: |
| 117 | + continue |
| 118 | + |
| 119 | + base_purl = PackageURL( |
| 120 | + type=purl.type, |
| 121 | + namespace=purl.namespace, |
| 122 | + name=purl.name, |
| 123 | + ) |
| 124 | + |
| 125 | + if affected: |
| 126 | + purls_to_versions[base_purl][0].append(purl.version) |
| 127 | + else: |
| 128 | + purls_to_versions[base_purl][1].append(purl.version) |
| 129 | + |
| 130 | + for base_purl, (affected_versions, fixed_versions) in purls_to_versions.items(): |
| 131 | + version_range_class = RANGE_CLASS_BY_SCHEMES.get(base_purl.type) |
| 132 | + |
| 133 | + affected_range = None |
| 134 | + fixed_range = None |
| 135 | + |
| 136 | + if affected_versions: |
| 137 | + try: |
| 138 | + affected_range = version_range_class.from_versions(affected_versions) |
| 139 | + except InvalidVersion as e: |
| 140 | + self.log(f"Invalid affected versions for {base_purl}: {e}") |
| 141 | + |
| 142 | + if fixed_versions: |
| 143 | + try: |
| 144 | + fixed_range = version_range_class.from_versions(fixed_versions) |
| 145 | + except InvalidVersion as e: |
| 146 | + self.log(f"Invalid fixed versions for {base_purl}: {e}") |
| 147 | + |
| 148 | + if affected_range or fixed_range: |
| 149 | + pkg = AffectedPackageV2( |
| 150 | + package=base_purl, |
| 151 | + affected_version_range=affected_range, |
| 152 | + fixed_version_range=fixed_range, |
| 153 | + ) |
| 154 | + affected_packages.append(pkg) |
| 155 | + |
| 156 | + advisory_url = get_advisory_url( |
| 157 | + file=yaml_file, |
| 158 | + base_path=base_path, |
| 159 | + url="https://github.com/SAP/project-kb/blob/vulnerability-data/statements/", |
| 160 | + ) |
| 161 | + |
| 162 | + yield AdvisoryData( |
| 163 | + advisory_id=vulnerability_id, |
| 164 | + summary=description, |
| 165 | + affected_packages=affected_packages, |
| 166 | + references_v2=references, |
| 167 | + patches=patches, |
| 168 | + url=advisory_url, |
| 169 | + ) |
| 170 | + |
| 171 | + def clean_downloads(self): |
| 172 | + """Remove the cloned repository from disk.""" |
| 173 | + self.log("Removing cloned repository...") |
| 174 | + |
| 175 | + if self.vcs_response: |
| 176 | + self.vcs_response.delete() |
| 177 | + |
| 178 | + def on_failure(self): |
| 179 | + """Ensure cleanup happens on pipeline failure.""" |
| 180 | + self.clean_downloads() |
0 commit comments