Skip to content

Commit 0921145

Browse files
committed
Refactor manifest extraction to use verification path
Replace Manifest.from_signature() with signing.manifest_from_signature() that verifies signatures before extracting manifests. This addresses reviewer feedback to reuse existing verification logic and adds security to incremental signing by ensuring old signatures are verified before their hashes are reused. Changes: - Add manifest_from_signature() to signing.py that calls Verifier.verify() - Update sign_incremental() to require identity/oidc_issuer parameters for verification of old signatures - Remove Manifest.from_signature() from manifest.py (eliminated code duplication) - Update documentation examples in hashing.py - Remove redundant tests (DSSE parsing already tested in signing_test.py) This is a breaking change for incremental signing API, but improves security by preventing tampering of old signatures. Signed-off-by: Emrick Donadei <[email protected]>
1 parent 3fc63b0 commit 0921145

File tree

5 files changed

+94
-249
lines changed

5 files changed

+94
-249
lines changed

src/model_signing/_signing/signing.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,45 @@ def dsse_payload_to_manifest_compat(
166166
return manifest.Manifest(model_name, items, serialization)
167167

168168

169+
def manifest_from_signature(
170+
signature_path: pathlib.Path,
171+
*,
172+
identity: str,
173+
oidc_issuer: str,
174+
use_staging: bool = False,
175+
) -> manifest.Manifest:
176+
"""Extracts and verifies a manifest from an existing signature file.
177+
178+
This function reads a signature file (Sigstore bundle), verifies the
179+
cryptographic signature, and returns the manifest. This is essential when
180+
reusing hashes from an old signature (e.g., in incremental signing) to
181+
ensure the old signature hasn't been tampered with.
182+
183+
Args:
184+
signature_path: Path to the signature file to read and verify.
185+
identity: The expected identity that signed the model (e.g., email).
186+
oidc_issuer: The expected OpenID Connect issuer that provided the
187+
certificate used for the signature.
188+
use_staging: Use staging configurations instead of production. This
189+
should only be set to True when testing. Default is False.
190+
191+
Returns:
192+
A Manifest object representing the signed model.
193+
194+
Raises:
195+
ValueError: If signature verification fails or the signature file
196+
cannot be parsed.
197+
FileNotFoundError: If the signature file doesn't exist.
198+
"""
199+
from model_signing._signing import sign_sigstore # noqa: PLC0415
200+
201+
signature = sign_sigstore.Signature.read(signature_path)
202+
verifier = sign_sigstore.Verifier(
203+
identity=identity, oidc_issuer=oidc_issuer, use_staging=use_staging
204+
)
205+
return verifier.verify(signature)
206+
207+
169208
class Payload:
170209
"""In-toto payload used to represent a model for signing.
171210

src/model_signing/hashing.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,9 +406,13 @@ def use_incremental_serialization(
406406
You can override any of these by passing explicit values.
407407
408408
Usage example:
409-
# Extract manifest from previous signature
410-
old_manifest = manifest.Manifest.from_signature(
411-
pathlib.Path("model.sig.old")
409+
from model_signing._signing import signing
410+
411+
# Extract and verify manifest from previous signature
412+
old_manifest = signing.manifest_from_signature(
413+
pathlib.Path("model.sig.old"),
414+
identity="[email protected]",
415+
oidc_issuer="https://github.com/login/oauth",
412416
)
413417
414418
# Configure incremental hashing (auto-detects parameters)

src/model_signing/manifest.py

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,8 @@
3939
"""
4040

4141
import abc
42-
import base64
4342
from collections.abc import Iterable, Iterator
4443
import dataclasses
45-
import json
4644
import pathlib
4745
import sys
4846
from typing import Any, Final
@@ -468,51 +466,3 @@ def serialization_type(self) -> dict[str, Any]:
468466
manifest so that signature verification can use the same method.
469467
"""
470468
return self._serialization_type.serialization_parameters
471-
472-
@classmethod
473-
def from_signature(cls, signature_path: pathlib.Path) -> Self:
474-
"""Extracts a manifest from an existing signature file.
475-
476-
This method reads a signature file (Sigstore bundle) and extracts the
477-
manifest without performing cryptographic verification. This is useful
478-
for incremental re-hashing where you need to know what files were
479-
previously signed without verifying the signature.
480-
481-
Args:
482-
signature_path: Path to the signature file to read.
483-
484-
Returns:
485-
A Manifest object representing the signed model.
486-
487-
Raises:
488-
ValueError: If the signature file cannot be parsed or doesn't
489-
contain a valid manifest.
490-
FileNotFoundError: If the signature file doesn't exist.
491-
"""
492-
# Avoid circular import by importing here
493-
from model_signing._signing import signing
494-
495-
# Read the signature file
496-
content = signature_path.read_text(encoding="utf-8")
497-
bundle_dict = json.loads(content)
498-
499-
# Extract the DSSE envelope payload
500-
if "dsseEnvelope" in bundle_dict:
501-
# This is a protobuf-based bundle
502-
envelope = bundle_dict["dsseEnvelope"]
503-
elif "dsse_envelope" in bundle_dict:
504-
# Alternative snake_case naming
505-
envelope = bundle_dict["dsse_envelope"]
506-
else:
507-
raise ValueError("Signature file does not contain a DSSE envelope")
508-
509-
# Decode the payload (it's base64 encoded)
510-
payload_b64 = envelope.get("payload")
511-
if not payload_b64:
512-
raise ValueError("DSSE envelope does not contain a payload")
513-
514-
payload_bytes = base64.b64decode(payload_b64)
515-
payload_dict = json.loads(payload_bytes)
516-
517-
# Use the existing function to convert DSSE payload to manifest
518-
return signing.dsse_payload_to_manifest(payload_dict)

src/model_signing/signing.py

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
from typing import Optional
4949

5050
from model_signing import hashing
51-
from model_signing import manifest
5251
from model_signing._signing import sign_certificate as certificate
5352
from model_signing._signing import sign_ec_key as ec_key
5453
from model_signing._signing import sign_sigstore as sigstore
@@ -81,6 +80,9 @@ def sign_incremental(
8180
old_signature_path: hashing.PathLike,
8281
new_signature_path: hashing.PathLike,
8382
*,
83+
identity: str,
84+
oidc_issuer: str,
85+
use_staging: bool = False,
8486
files_to_hash: Optional[Iterable[hashing.PathLike]] = None,
8587
):
8688
"""Signs a model incrementally, only re-hashing changed files.
@@ -91,6 +93,9 @@ def sign_incremental(
9193
digests from the previous signature for unchanged files and only hashes
9294
new or modified files.
9395
96+
The old signature is cryptographically verified before its hashes are
97+
reused, ensuring the integrity of the incremental signing process.
98+
9499
In this default configuration we sign using Sigstore.
95100
96101
Usage example:
@@ -99,28 +104,40 @@ def sign_incremental(
99104
model_path="huge-model/",
100105
old_signature_path="model.sig.old",
101106
new_signature_path="model.sig.new",
107+
identity="[email protected]",
108+
oidc_issuer="https://github.com/login/oauth",
102109
files_to_hash=["huge-model/README.md"]
103110
)
104111
105112
Args:
106113
model_path: The path to the model to sign.
107114
old_signature_path: The path to the previous signature. The manifest
108-
from this signature will be extracted and used for incremental
115+
from this signature will be verified and extracted for incremental
109116
hashing.
110117
new_signature_path: The path where the new signature will be written.
118+
identity: The expected identity that signed the old signature
119+
(e.g., email address).
120+
oidc_issuer: The expected OpenID Connect issuer that provided the
121+
certificate for the old signature.
122+
use_staging: Use staging configurations for verification instead of
123+
production. Should only be True when testing. Default is False.
111124
files_to_hash: Optional list of files that changed and need to be
112125
re-hashed. If None, only new files (not in old signature) will
113126
be hashed. Existing files will have their digests reused.
114127
To detect changed files, use git diff or similar tools.
115128
116129
Raises:
117130
FileNotFoundError: If old_signature_path doesn't exist.
118-
ValueError: If old_signature_path cannot be parsed.
131+
ValueError: If old_signature_path cannot be parsed or verification
132+
fails.
119133
"""
120134
Config().sign_incremental(
121135
model_path,
122136
old_signature_path,
123137
new_signature_path,
138+
identity=identity,
139+
oidc_issuer=oidc_issuer,
140+
use_staging=use_staging,
124141
files_to_hash=files_to_hash,
125142
)
126143

@@ -165,30 +182,44 @@ def sign_incremental(
165182
old_signature_path: hashing.PathLike,
166183
new_signature_path: hashing.PathLike,
167184
*,
185+
identity: str,
186+
oidc_issuer: str,
187+
use_staging: bool = False,
168188
files_to_hash: Optional[Iterable[hashing.PathLike]] = None,
169189
):
170190
"""Signs a model incrementally using the current configuration.
171191
172-
This method extracts the manifest from an existing signature and
173-
configures incremental hashing to reuse digests for unchanged files.
174-
Only new or modified files are re-hashed, providing significant
175-
performance improvements for large models.
192+
This method extracts and verifies the manifest from an existing
193+
signature, then configures incremental hashing to reuse digests for
194+
unchanged files. Only new or modified files are re-hashed, providing
195+
significant performance improvements for large models.
176196
177197
Args:
178198
model_path: The path to the model to sign.
179199
old_signature_path: The path to the previous signature.
180200
new_signature_path: The path where the new signature will be
181201
written.
202+
identity: The expected identity that signed the old signature
203+
(e.g., email address).
204+
oidc_issuer: The expected OpenID Connect issuer that provided
205+
the certificate for the old signature.
206+
use_staging: Use staging configurations for verification instead
207+
of production. Should only be True when testing. Default is
208+
False.
182209
files_to_hash: Optional list of files that changed and need to
183210
be re-hashed. If None, only new files will be hashed.
184211
185212
Raises:
186213
FileNotFoundError: If old_signature_path doesn't exist.
187-
ValueError: If old_signature_path cannot be parsed.
214+
ValueError: If old_signature_path cannot be parsed or verification
215+
fails.
188216
"""
189-
# Extract manifest from old signature
190-
old_manifest = manifest.Manifest.from_signature(
191-
pathlib.Path(old_signature_path)
217+
# Extract and verify manifest from old signature
218+
old_manifest = signing.manifest_from_signature(
219+
pathlib.Path(old_signature_path),
220+
identity=identity,
221+
oidc_issuer=oidc_issuer,
222+
use_staging=use_staging,
192223
)
193224

194225
# Configure incremental hashing
@@ -351,7 +382,9 @@ def use_pkcs11_signer(
351382
The new signing configuration.
352383
"""
353384
try:
354-
from model_signing._signing import sign_pkcs11 as pkcs11
385+
from model_signing._signing import ( # noqa: PLC0415
386+
sign_pkcs11 as pkcs11,
387+
)
355388
except ImportError as e:
356389
raise RuntimeError(
357390
"PKCS #11 functionality requires the 'pkcs11' extra. "
@@ -384,7 +417,9 @@ def use_pkcs11_certificate_signer(
384417
The new signing configuration.
385418
"""
386419
try:
387-
from model_signing._signing import sign_pkcs11 as pkcs11
420+
from model_signing._signing import ( # noqa: PLC0415
421+
sign_pkcs11 as pkcs11,
422+
)
388423
except ImportError as e:
389424
raise RuntimeError(
390425
"PKCS #11 functionality requires the 'pkcs11' extra. "

0 commit comments

Comments
 (0)