Skip to content

Commit 51acca1

Browse files
authored
Mirror attestations during BCR post-submit. (#2185)
Progress towards #2177
1 parent b7b9fa9 commit 51acca1

File tree

1 file changed

+132
-1
lines changed

1 file changed

+132
-1
lines changed

buildkite/bazel-central-registry/bcr_postsubmit.py

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,156 @@
2323
- Sync the bazel_registry.json and modules/ directory in the main branch of the BCR to https://bcr.bazel.build
2424
"""
2525

26+
import base64
27+
import hashlib
28+
import json
29+
import os
30+
import requests
2631
import subprocess
2732
import sys
33+
import tempfile
2834

2935
BCR_BUCKET = "gs://bcr.bazel.build/"
36+
ATTESTATION_METADATA_FILE = "attestations.json"
37+
FILES_WITH_ATTESTATIONS = ("source.json", "MODULE.bazel")
38+
39+
# Basename of the file that contains the most recent commit
40+
# that passed through the post-submit pipeline successfully.
41+
LAST_GREEN_FILE = "last_green.txt"
42+
43+
44+
class AttestationError(Exception):
45+
"""Raised when there is a problem wrt attestations."""
3046

3147
def print_expanded_group(name):
3248
print("\n\n+++ {0}\n\n".format(name))
3349

50+
def get_output(command):
51+
return subprocess.run(
52+
command,
53+
encoding='utf-8',
54+
stdout=subprocess.PIPE,
55+
).stdout
56+
57+
def check_and_write_new_attestations():
58+
print_expanded_group(":cop::copybara: Check & write attestations")
59+
paths = get_new_attestations_json_paths()
60+
if not paths:
61+
# TODO: turn this into an error
62+
print(f"No {ATTESTATION_METADATA_FILE} files were changed.")
63+
return
64+
65+
for p in paths:
66+
check_and_write_module_attestations(p)
67+
68+
69+
def get_new_attestations_json_paths():
70+
cwd = os.getcwd()
71+
cmd = ["git", "diff-tree", "--no-commit-id", "--name-only", "-r"]
72+
73+
# last_green should be the parent commit. However, sometimes the
74+
# pipeline can fail due to infra issues. In this case we need
75+
# to mirror attestations in the commits of the failing runs, too.
76+
last_green = get_last_green()
77+
if last_green:
78+
cmd.append(last_green)
79+
80+
paths = get_output(cmd + [get_commit()])
81+
return [os.path.join(cwd, p) for p in paths.split("\n") if p.endswith(f"/{ATTESTATION_METADATA_FILE}")]
82+
83+
84+
def get_last_green():
85+
url = os.path.join(
86+
BCR_BUCKET.replace("gs://", "https://storage.googleapis.com/"), LAST_GREEN_FILE
87+
)
88+
with requests.get(url) as response:
89+
if response.status_code != 200:
90+
return ""
91+
92+
return response.content.decode("utf-8")
93+
94+
95+
def get_commit():
96+
return os.getenv("BUILDKITE_COMMIT")
97+
98+
99+
def check_and_write_module_attestations(attestations_json_path):
100+
print(f"Checking {attestations_json_path}...")
101+
dest_dir = os.path.dirname(attestations_json_path)
102+
with open(attestations_json_path, "rb") as af:
103+
metadata = json.loads(af.read())
104+
105+
for f in FILES_WITH_ATTESTATIONS:
106+
try:
107+
entry = metadata["attestations"][f]
108+
check_and_write_single_attestation(entry["url"], entry["integrity"], dest_dir)
109+
except Exception as ex:
110+
raise AttestationError(f"{attestations_json_path} - {f}: {ex}") from ex
111+
112+
print("Done!")
113+
114+
def check_and_write_single_attestation(url, integrity, dest_dir):
115+
print(f"\tFound attestation @ {url}")
116+
with requests.get(url) as response:
117+
if response.status_code != 200:
118+
raise AttestationError(f"{url}: HTTP {response.status_code}")
119+
120+
raw_content = response.content
121+
122+
check_integrity(raw_content, integrity)
123+
print("\t\tIntegrity: OK")
124+
125+
dest = os.path.join(dest_dir, get_canonical_basename(url))
126+
print(f"\t\tWriting attestation to {dest}...")
127+
with open(dest, "wb") as f:
128+
f.write(raw_content)
129+
130+
def check_integrity(data, expected):
131+
algorithm, _, _ = expected.partition("-")
132+
assert algorithm in {"sha224", "sha256", "sha384", "sha512"}, "Unsupported SRI algorithm"
133+
134+
hash = getattr(hashlib, algorithm)(data)
135+
encoded = base64.b64encode(hash.digest()).decode()
136+
actual = f"{algorithm}-{encoded}"
137+
if actual != expected:
138+
raise AttestationError(f"Expected checksum {expected}, got {actual}.")
139+
140+
# Attestation files in GitHub releases may have prefixes in their basename
141+
# to avoid conflicts when multiple modules are released together
142+
# (e.g. rules_python and rules_python_gazelle_plugin).
143+
# In this case we need to get the canonical basename.
144+
def get_canonical_basename(url):
145+
actual_basename = os.path.basename(url)
146+
for f in FILES_WITH_ATTESTATIONS:
147+
if f in actual_basename:
148+
return f"{f}.intoto.jsonl"
149+
150+
raise AttestationError(f"Invalid basename of {url}.")
151+
152+
34153
def sync_bcr_content():
35154
print_expanded_group(":gcloud: Sync BCR content")
36155
subprocess.check_output(
37156
["gsutil", "-h", "Cache-Control:no-cache", "cp", "./bazel_registry.json", BCR_BUCKET]
38157
)
39158
subprocess.check_output(
40-
["gsutil", "-h", "Cache-Control:no-cache", "-m", "rsync", "-d", "-r", "./modules", BCR_BUCKET + "modules"]
159+
["gsutil", "-h", "Cache-Control:no-cache", "-m", "rsync", "-r", "./modules", BCR_BUCKET + "modules"]
41160
)
42161

162+
163+
def update_last_green():
164+
path = os.path.join(tempfile.mkdtemp(), LAST_GREEN_FILE)
165+
with open(path, "wt") as f:
166+
f.write(get_commit())
167+
168+
dest = os.path.join(BCR_BUCKET, LAST_GREEN_FILE)
169+
subprocess.check_output(["gsutil", "cp", path, dest])
170+
171+
43172
def main():
173+
check_and_write_new_attestations()
44174
sync_bcr_content()
175+
update_last_green()
45176
return 0
46177

47178
if __name__ == "__main__":

0 commit comments

Comments
 (0)