|
15 | 15 | a downstream verifier weeks later. |
16 | 16 |
|
17 | 17 | (C) Artefact correspondence -- the gitoid recorded against the |
18 | | - wolfSSL package equals the git-blob hash of the actual library |
19 | | - artefact that `make bomsh` traced (read from the |
20 | | - `_bomsh.artefact` manifest written by the bomsh: Makefile |
21 | | - target). This is what makes the SBOM a true attestation of |
22 | | - the binary that would ship. |
| 18 | + wolfSSL package equals the gitoid bomsh itself recorded for the |
| 19 | + library it traced (read from the `_bomsh.artefact` manifest the |
| 20 | + bomsh: Makefile target writes as '<path>\\t<gitoid>' BEFORE |
| 21 | + `make sbom` runs). This is the strongest claim the bomsh |
| 22 | + pipeline alone can make: the SPDX agrees with what bomsh saw. |
| 23 | +
|
| 24 | + Comparing against bomsh's own recorded gitoid (rather than |
| 25 | + against the on-disk file's *current* bytes) is deliberate. |
| 26 | + `make sbom`'s subsequent `make install` step relinks |
| 27 | + src/.libs/lib*.so* in place via libtool to fix RPATH, mutating |
| 28 | + the bytes after bomsh has already gitoid-ed them. The verifier |
| 29 | + still hashes the on-disk file and emits a NOTE if it has |
| 30 | + diverged, so the install-time relink remains visible without |
| 31 | + causing a false negative on the bomsh<->SPDX agreement. |
23 | 32 |
|
24 | 33 | Without this, a future `bomsh_sbom.py` change that emits a |
25 | 34 | plausibly-shaped but fictional gitoid (one that does not resolve in |
26 | | -the ADG, or resolves but to the wrong artefact) would pass the |
27 | | -existing PERSISTENT-ID assertion and ship a provenance bundle whose |
28 | | -externalRef is a lie. |
| 35 | +the ADG, or resolves but to a different artefact than bomsh recorded) |
| 36 | +would pass the existing PERSISTENT-ID assertion and ship a provenance |
| 37 | +bundle whose externalRef is a lie. |
29 | 38 |
|
30 | 39 | CLI form (used by `.github/workflows/sbom.yml`): |
31 | 40 |
|
@@ -150,27 +159,51 @@ def check_object_store_integrity(omnibor_objects_dir): |
150 | 159 | return obj_count, bad |
151 | 160 |
|
152 | 161 |
|
153 | | -def check_artefact_correspondence(spdx_gitoids, artefact_path, |
154 | | - package_name_substr='wolfssl'): |
155 | | - """(C) The gitoid recorded against the wolfSSL package equals the |
156 | | - git-blob hash of the library artefact at <artefact_path>. |
157 | | -
|
158 | | - Returns (artefact_gid, wolfssl_gids). Caller checks |
159 | | - `artefact_gid in wolfssl_gids`. Raises FileNotFoundError if the |
160 | | - artefact does not exist; raises ValueError if no SPDX gitoid is |
161 | | - associated with a wolfSSL package.""" |
162 | | - if not os.path.isfile(artefact_path): |
| 162 | +def parse_artefact_manifest(manifest_path): |
| 163 | + """Parse the `_bomsh.artefact` manifest written by the bomsh: |
| 164 | + recipe. Format: a single line, `<absolute-path>\\t<gitoid-hex>` |
| 165 | + -- both fields captured by the recipe AFTER bomtrace3 finishes |
| 166 | + but BEFORE `make sbom` relinks the library. |
| 167 | +
|
| 168 | + Returns (path, recorded_gid). Raises FileNotFoundError if the |
| 169 | + manifest does not exist (bomsh: skipped artefact discovery, e.g. |
| 170 | + no built library); raises ValueError if the line is malformed.""" |
| 171 | + if not os.path.isfile(manifest_path): |
163 | 172 | raise FileNotFoundError( |
164 | | - f'artefact {artefact_path!r} does not exist') |
165 | | - artefact_gid = gitoid_sha1(artefact_path) |
| 173 | + f'{manifest_path} not produced by `make bomsh`; cannot ' |
| 174 | + f'verify gitoid <-> artefact correspondence. This usually ' |
| 175 | + f'means the bomsh enrichment step skipped the artefact-' |
| 176 | + f'discovery loop (no built library).') |
| 177 | + with open(manifest_path) as f: |
| 178 | + line = f.readline().rstrip('\n') |
| 179 | + if not line: |
| 180 | + raise ValueError( |
| 181 | + f'{manifest_path} is empty; bomsh: recipe wrote nothing') |
| 182 | + parts = line.split('\t') |
| 183 | + if len(parts) != 2 or not all(parts): |
| 184 | + raise ValueError( |
| 185 | + f'{manifest_path}: expected "<path>\\t<gitoid>", got {line!r}. ' |
| 186 | + f'Re-run `make bomsh` against an up-to-date Makefile.am.') |
| 187 | + return parts[0], parts[1] |
| 188 | + |
| 189 | + |
| 190 | +def check_artefact_correspondence(spdx_gitoids, recorded_gid, |
| 191 | + package_name_substr='wolfssl'): |
| 192 | + """(C) The gitoid bomsh recorded for the traced library matches a |
| 193 | + gitoid externalRef on the wolfSSL SPDX package. This is the |
| 194 | + bomsh<->SPDX agreement check; it does NOT compare against the |
| 195 | + on-disk file's current bytes (see module docstring). |
| 196 | +
|
| 197 | + Returns (matched, wolfssl_gids). Raises ValueError if no SPDX |
| 198 | + gitoid is associated with a wolfSSL-named package.""" |
166 | 199 | wolfssl_gids = [gid for name, gid in spdx_gitoids |
167 | 200 | if package_name_substr in name.lower()] |
168 | 201 | if not wolfssl_gids: |
169 | 202 | raise ValueError( |
170 | 203 | f'no SPDX gitoid externalRef on a package whose name ' |
171 | 204 | f'contains {package_name_substr!r}; cannot verify ' |
172 | 205 | f'artefact correspondence') |
173 | | - return artefact_gid, wolfssl_gids |
| 206 | + return recorded_gid in wolfssl_gids, wolfssl_gids |
174 | 207 |
|
175 | 208 |
|
176 | 209 | def verify(spdx_glob, omnibor_dir, artefact_manifest, |
@@ -214,39 +247,50 @@ def verify(spdx_glob, omnibor_dir, artefact_manifest, |
214 | 247 | f'round-trip (object store is corrupt)') |
215 | 248 | return False, messages |
216 | 249 |
|
217 | | - if not os.path.isfile(artefact_manifest): |
218 | | - messages.append( |
219 | | - f'{artefact_manifest} not produced by `make bomsh`; ' |
220 | | - f'cannot verify gitoid <-> artefact correspondence. ' |
221 | | - f'This usually means the bomsh enrichment step skipped ' |
222 | | - f'the artefact-discovery loop (no built library).') |
223 | | - return False, messages |
224 | | - with open(artefact_manifest) as f: |
225 | | - artefact = f.read().strip() |
226 | | - if not artefact: |
227 | | - messages.append( |
228 | | - f'{artefact_manifest} is empty; bomsh: recipe wrote a ' |
229 | | - f'blank path') |
| 250 | + try: |
| 251 | + artefact, recorded_gid = parse_artefact_manifest(artefact_manifest) |
| 252 | + except (FileNotFoundError, ValueError) as e: |
| 253 | + messages.append(str(e)) |
230 | 254 | return False, messages |
231 | 255 |
|
232 | 256 | try: |
233 | | - artefact_gid, wolfssl_gids = check_artefact_correspondence( |
234 | | - spdx_gitoids, artefact, package_name_substr) |
235 | | - except (FileNotFoundError, ValueError) as e: |
| 257 | + matched, wolfssl_gids = check_artefact_correspondence( |
| 258 | + spdx_gitoids, recorded_gid, package_name_substr) |
| 259 | + except ValueError as e: |
236 | 260 | messages.append(str(e)) |
237 | 261 | return False, messages |
238 | 262 |
|
239 | | - if artefact_gid not in wolfssl_gids: |
| 263 | + if not matched: |
240 | 264 | messages.append( |
241 | 265 | f'wolfSSL package SPDX gitoids {wolfssl_gids} do not ' |
242 | | - f'include the gitoid of the actual built artefact ' |
243 | | - f'{artefact} ({artefact_gid}); the SBOM does not ' |
244 | | - f'attest to the binary that would ship') |
| 266 | + f'include the gitoid bomsh recorded for the traced ' |
| 267 | + f'artefact {artefact} ({recorded_gid}); the SBOM is ' |
| 268 | + f'inconsistent with what bomsh actually saw') |
245 | 269 | return False, messages |
246 | 270 |
|
247 | 271 | messages.append(f'OK: {len(spdx_gitoids)} gitoid(s) verified') |
248 | 272 | messages.append(f' objects round-trip: {obj_count} blobs') |
249 | | - messages.append(f' artefact match: {artefact} -> {artefact_gid}') |
| 273 | + messages.append( |
| 274 | + f' artefact match: {artefact} -> {recorded_gid} (bomsh-traced)') |
| 275 | + |
| 276 | + # Diagnostic-only: the on-disk file may have been rewritten since |
| 277 | + # bomsh saw it (the canonical case is `make sbom`'s `make install` |
| 278 | + # step relinking via libtool to fix RPATH). We do NOT fail on |
| 279 | + # this -- the SBOM<->bomsh agreement above is what matters for |
| 280 | + # the provenance proof -- but surfacing it as a NOTE keeps the |
| 281 | + # divergence visible so it does not silently grow into a |
| 282 | + # bigger gap (e.g. someone adds a strip step that goes unflagged). |
| 283 | + if os.path.isfile(artefact): |
| 284 | + on_disk = gitoid_sha1(artefact) |
| 285 | + if on_disk != recorded_gid: |
| 286 | + messages.append( |
| 287 | + f'NOTE: on-disk {artefact} now has gitoid {on_disk}, ' |
| 288 | + f'but bomsh recorded {recorded_gid}. This is expected ' |
| 289 | + f'when `make sbom` runs `make install` (libtool relinks ' |
| 290 | + f'src/.libs/lib*.so* in place to fix RPATH). The SBOM ' |
| 291 | + f'attests to the bomsh-traced bytes; if you need it to ' |
| 292 | + f'attest to the *installed* bytes, the bomsh: recipe ' |
| 293 | + f'must trace `make install` too.') |
250 | 294 | return True, messages |
251 | 295 |
|
252 | 296 |
|
|
0 commit comments