Skip to content

Commit 219903d

Browse files
authored
feat: add GitHub attestation discovery (#1020)
This PR allows Macaron to discover GitHub attestation. To retrieve these attestations, the SHA256 hash of the related artefact is required. Hashes are computed from local artefact files if available, or from downloaded ones otherwise. Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent 612e27e commit 219903d

File tree

24 files changed

+742
-119
lines changed

24 files changed

+742
-119
lines changed

src/macaron/artifact/local_artifact.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
1-
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module declares types and utilities for handling local artifacts."""
55

66
import fnmatch
77
import glob
8+
import hashlib
9+
import logging
810
import os
911

1012
from packageurl import PackageURL
1113

12-
from macaron.artifact.maven import construct_maven_repository_path
14+
from macaron.artifact.maven import construct_maven_repository_path, construct_primary_jar_file_name
1315
from macaron.errors import LocalArtifactFinderError
1416

17+
logger: logging.Logger = logging.getLogger(__name__)
18+
1519

1620
def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None:
1721
"""Return a list of glob pattern(s) representing the directory that contains the local maven artifacts for ``maven_purl``.
@@ -247,3 +251,55 @@ def get_local_artifact_dirs(
247251
)
248252

249253
raise LocalArtifactFinderError(f"Unsupported PURL type {purl_type}")
254+
255+
256+
def get_local_artifact_hash(purl: PackageURL, artifact_dirs: list[str]) -> str | None:
257+
"""Compute the hash of the local artifact.
258+
259+
Parameters
260+
----------
261+
purl: PackageURL
262+
The PURL of the artifact being sought.
263+
artifact_dirs: list[str]
264+
The list of directories that may contain the artifact file.
265+
266+
Returns
267+
-------
268+
str | None
269+
The hash, or None if not found.
270+
"""
271+
if not artifact_dirs:
272+
logger.debug("No artifact directories provided.")
273+
return None
274+
275+
if not purl.version:
276+
logger.debug("PURL is missing version.")
277+
return None
278+
279+
artifact_target = None
280+
if purl.type == "maven":
281+
artifact_target = construct_primary_jar_file_name(purl)
282+
283+
# TODO add support for other PURL types here.
284+
# Other purl types can be easily supported if user provided artifacts are accepted from the command line.
285+
# See https://github.com/oracle/macaron/issues/498.
286+
287+
if not artifact_target:
288+
logger.debug("PURL type not supported: %s", purl.type)
289+
return None
290+
291+
for artifact_dir in artifact_dirs:
292+
full_path = os.path.join(artifact_dir, artifact_target)
293+
if not os.path.exists(full_path):
294+
continue
295+
296+
with open(full_path, "rb") as file:
297+
try:
298+
hash_result = hashlib.file_digest(file, "sha256")
299+
except ValueError as error:
300+
logger.debug("Error while hashing file: %s", error)
301+
continue
302+
303+
return hash_result.hexdigest()
304+
305+
return None

src/macaron/artifact/maven.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module declares types and utilities for Maven artifacts."""
@@ -196,3 +196,22 @@ def construct_maven_repository_path(
196196
if asset_name:
197197
path = "/".join([path, asset_name])
198198
return path
199+
200+
201+
def construct_primary_jar_file_name(purl: PackageURL) -> str | None:
202+
"""Return the name of the primary JAR for the passed PURL based on the Maven registry standard.
203+
204+
Parameters
205+
----------
206+
purl: PackageURL
207+
The PURL of the artifact.
208+
209+
Returns
210+
-------
211+
str | None
212+
The artifact file name, or None if invalid.
213+
"""
214+
if not purl.version:
215+
return None
216+
217+
return purl.name + "-" + purl.version + ".jar"

src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def _get_setup_source_code(self, pypi_package_json: PyPIPackageJsonAsset) -> str
5959
response = requests.get(sourcecode_url, stream=True, timeout=40)
6060
response.raise_for_status()
6161
except requests.exceptions.HTTPError as http_err:
62-
logger.debug("HTTP error occurred: %s", http_err)
62+
logger.debug("HTTP error occurred when trying to download source: %s", http_err)
6363
return None
6464

6565
if response.status_code != 200:

src/macaron/repo_finder/repo_finder.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,12 @@ def find_repo_alternative(
166166
found_repo, outcome = repo_finder_pypi.find_repo(purl, package_registries_info)
167167

168168
if not found_repo:
169-
logger.debug("Could not find repository using type specific (%s) methods for PURL: %s", purl.type, purl)
169+
logger.debug(
170+
"Could not find repository using type specific (%s) methods for PURL %s. Outcome: %s",
171+
purl.type,
172+
purl,
173+
outcome,
174+
)
170175

171176
return found_repo, outcome
172177

src/macaron/repo_finder/repo_finder_pypi.py

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from macaron.repo_finder.repo_finder_enums import RepoFinderInfo
1010
from macaron.repo_finder.repo_validator import find_valid_repository_url
1111
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES, PyPIRegistry
12-
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset
12+
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, find_or_create_pypi_asset
1313
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
1414

1515
logger: logging.Logger = logging.getLogger(__name__)
@@ -44,38 +44,29 @@ def find_repo(
4444
),
4545
None,
4646
)
47+
if not pypi_info:
48+
return "", RepoFinderInfo.PYPI_NO_REGISTRY
4749

48-
if not pypi_info or not isinstance(pypi_info.package_registry, PyPIRegistry):
49-
pypi_registry = next((registry for registry in PACKAGE_REGISTRIES if isinstance(registry, PyPIRegistry)), None)
50-
else:
51-
pypi_registry = pypi_info.package_registry
52-
53-
if not pypi_registry:
54-
logger.debug("PyPI package registry not available.")
55-
return "", RepoFinderInfo.PYPI_NO_REGISTRY
50+
if not purl.version:
51+
return "", RepoFinderInfo.NO_VERSION_PROVIDED
5652

57-
pypi_asset = None
58-
from_metadata = False
53+
# Create the asset.
5954
if pypi_info:
60-
for existing_asset in pypi_info.metadata:
61-
if not isinstance(existing_asset, PyPIPackageJsonAsset):
62-
continue
63-
64-
if existing_asset.component_name == purl.name:
65-
pypi_asset = existing_asset
66-
from_metadata = True
67-
break
55+
pypi_asset = find_or_create_pypi_asset(purl.name, purl.version, pypi_info)
56+
else:
57+
# If this function has been reached via find-source, we do not store the asset.
58+
pypi_registry = next((registry for registry in PACKAGE_REGISTRIES if isinstance(registry, PyPIRegistry)), None)
59+
if not pypi_registry:
60+
return "", RepoFinderInfo.PYPI_NO_REGISTRY
61+
pypi_asset = PyPIPackageJsonAsset(purl.name, purl.version, False, pypi_registry, {}, "")
6862

6963
if not pypi_asset:
70-
pypi_asset = PyPIPackageJsonAsset(purl.name, purl.version, False, pypi_registry, {}, "")
64+
# This should be unreachable, as the pypi_registry has already been confirmed to be of type PyPIRegistry.
65+
return "", RepoFinderInfo.PYPI_NO_REGISTRY
7166

7267
if not pypi_asset.package_json and not pypi_asset.download(dest=""):
7368
return "", RepoFinderInfo.PYPI_HTTP_ERROR
7469

75-
if not from_metadata and pypi_info:
76-
# Save the asset for later use.
77-
pypi_info.metadata.append(pypi_asset)
78-
7970
url_dict = pypi_asset.get_project_links()
8071
if not url_dict:
8172
return "", RepoFinderInfo.PYPI_JSON_ERROR

0 commit comments

Comments
 (0)