Skip to content

Commit bee4fe3

Browse files
committed
feat: narrow sbom cache key to extension list only
SBOM content depends only on the extension list, Talos version, and architecture. Previously the cache key included the full schematicID, causing cache misses when non-SBOM fields changed (kernel args, config, meta, secureboot, etc.). Now hash only the relevant inputs so schematics with identical extension lists share cached bundles and scan results. Singleflight uses the same hash for both SPDX build and vulnerability scan. Signed-off-by: Mateusz Urbanek <mateusz.urbanek@siderolabs.com>
1 parent e0e4a44 commit bee4fe3

8 files changed

Lines changed: 148 additions & 71 deletions

File tree

enterprise/scanner/builder/builder.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ type VEXSource interface {
5555
// It must enforce ownership before returning bytes.
5656
type SPDXSource interface {
5757
Build(ctx context.Context, schematicID, versionTag string, arch artifacts.Arch) (io.ReadCloser, error)
58+
PayloadHash(ctx context.Context, schematicID, versionTag string, arch artifacts.Arch) (string, error)
5859
}
5960

6061
// Options configures the Builder.
@@ -199,9 +200,15 @@ func (b *Builder) Build(ctx context.Context, schematicID, versionTag, arch strin
199200
}
200201

201202
func (b *Builder) scan(ctx context.Context, schematicID, versionTag, arch string) (*models.Document, *sbom.SBOM, error) {
202-
key := cacheKey(schematicID, versionTag, arch)
203+
// Derive the cache key from only the inputs that affect the SBOM content,
204+
// so schematics that share the same extension list, version and arch
205+
// reuse scan results.
206+
sbomHash, err := b.spdxSource.PayloadHash(ctx, schematicID, versionTag, artifacts.Arch(arch))
207+
if err != nil {
208+
return nil, nil, err
209+
}
203210

204-
if item := b.c.TTL.Get(key); item != nil && !item.IsExpired() {
211+
if item := b.c.TTL.Get(sbomHash); item != nil && !item.IsExpired() {
205212
entry := item.Value()
206213

207214
return entry.document, entry.sbom, nil
@@ -215,8 +222,8 @@ func (b *Builder) scan(ctx context.Context, schematicID, versionTag, arch string
215222
// carry the request ID into the detached scan so its logs keep the request_id.
216223
reqID := ctxlog.RequestID(ctx)
217224

218-
resultCh := b.c.SF.DoChan(key, func() (any, error) { //nolint:contextcheck
219-
return b.scanAndCache(reqID, username, schematicID, versionTag, arch, key)
225+
resultCh := b.c.SF.DoChan(sbomHash, func() (any, error) { //nolint:contextcheck
226+
return b.scanAndCache(reqID, username, schematicID, versionTag, arch, sbomHash)
220227
})
221228

222229
select {
@@ -236,10 +243,6 @@ func (b *Builder) scan(ctx context.Context, schematicID, versionTag, arch string
236243
}
237244
}
238245

239-
func cacheKey(schematicID, versionTag, arch string) string {
240-
return schematicID + "|" + versionTag + "|" + arch
241-
}
242-
243246
// scanAndCache runs under singleflight with a detached context.
244247
//
245248
// reqID is the request ID, carried into the detached context so the scan logs

enterprise/spdx/builder/builder.go

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,22 @@ func NewBuilder(
7979
}
8080
}
8181

82+
// PayloadHash returns the SBOM cache key for the given schematic/version/arch.
83+
//
84+
// It fetches the schematic to extract the extension list, then computes a
85+
// content hash that reflects only the inputs that affect the SPDX bundle
86+
// content. Callers should use this hash as a cache key so that schematics
87+
// differing only in non-SBOM fields (kernel args, config, etc.) share
88+
// cached bundles.
89+
func (b *Builder) PayloadHash(ctx context.Context, schematicID, versionTag string, arch artifacts.Arch) (string, error) {
90+
sc, err := b.schematicFactory.Get(ctx, schematicID, b.authProvider)
91+
if err != nil {
92+
return "", fmt.Errorf("failed to get schematic: %w", err)
93+
}
94+
95+
return Hash(sc.Customization.SystemExtensions.OfficialExtensions, versionTag, string(arch)), nil
96+
}
97+
8298
// Build returns an SPDX bundle, building and caching if necessary.
8399
func (b *Builder) Build(ctx context.Context, schematicID, versionTag string, arch artifacts.Arch) (storage.Bundle, error) {
84100
// Normalize version tag
@@ -91,29 +107,32 @@ func (b *Builder) Build(ctx context.Context, schematicID, versionTag string, arc
91107
return nil, fmt.Errorf("invalid version: %w", err)
92108
}
93109

94-
// Check cache first
95-
if err := b.storage.Head(ctx, schematicID, versionTag, string(arch)); err == nil {
96-
ctxlog.Logger(ctx, b.logger).Debug("SPDX bundle cache hit", zap.String("schematic", schematicID), zap.String("version", versionTag), zap.String("arch", string(arch)))
97-
98-
return b.storage.Get(ctx, schematicID, versionTag, string(arch))
99-
}
100-
101-
// Verify access and fetch schematic data before entering singleflight.
102-
// buildBundle runs with context.Background() (request context may be canceled),
103-
// so ownership enforcement must happen here with the live request context.
110+
// Fetch schematic first: we need the extension list to derive the cache key.
111+
// Ownership enforcement happens here with the live request context, before
112+
// entering singleflight which uses a detached context.
104113
sc, err := b.schematicFactory.Get(ctx, schematicID, b.authProvider)
105114
if err != nil {
106115
return nil, fmt.Errorf("failed to get schematic: %w", err)
107116
}
108117

109-
// Build the bundle using singleflight to prevent duplicate work
110-
cacheKey := CacheTag(schematicID, versionTag, string(arch))
118+
// Compute cache key from only the inputs that affect the SBOM content
119+
// (extensions list, version, architecture), so that schematics differing
120+
// in other fields share the same cached bundle.
121+
sbomHash := Hash(sc.Customization.SystemExtensions.OfficialExtensions, versionTag, string(arch))
111122

123+
// Check cache first
124+
if err := b.storage.Head(ctx, sbomHash); err == nil {
125+
ctxlog.Logger(ctx, b.logger).Debug("SPDX bundle cache hit", zap.String("schematic", schematicID), zap.String("version", versionTag), zap.String("arch", string(arch)))
126+
127+
return b.storage.Get(ctx, sbomHash)
128+
}
129+
130+
// Build the bundle using singleflight to prevent duplicate work
112131
// carry the request ID into the detached build so its logs keep the request_id.
113132
reqID := ctxlog.RequestID(ctx)
114133

115-
resultCh := b.sf.DoChan(cacheKey, func() (any, error) { //nolint:contextcheck
116-
return nil, b.buildBundle(reqID, sc, schematicID, versionTag, arch)
134+
resultCh := b.sf.DoChan(sbomHash, func() (any, error) { //nolint:contextcheck
135+
return nil, b.buildBundle(reqID, sc, schematicID, sbomHash, versionTag, arch)
117136
})
118137

119138
select {
@@ -125,14 +144,14 @@ func (b *Builder) Build(ctx context.Context, schematicID, versionTag string, arc
125144
}
126145

127146
// Retrieve from cache after building
128-
return b.storage.Get(ctx, schematicID, versionTag, string(arch))
147+
return b.storage.Get(ctx, sbomHash)
129148
}
130149
}
131150

132151
// buildBundle creates and stores an SPDX bundle for a single architecture.
133152
// sc must be pre-fetched by the caller (Build) using the live request context,
134153
// since this function runs inside singleflight with context.Background().
135-
func (b *Builder) buildBundle(reqID string, sc *schematicpkg.Schematic, schematicID, versionTag string, arch artifacts.Arch) error {
154+
func (b *Builder) buildBundle(reqID string, sc *schematicpkg.Schematic, schematicID, sbomHash, versionTag string, arch artifacts.Arch) error {
136155
// Use a fresh context since we're in singleflight, but carry the
137156
// request ID so build logs keep the request_id.
138157
ctx := ctxlog.WithRequestID(context.Background(), reqID)
@@ -173,8 +192,8 @@ func (b *Builder) buildBundle(reqID string, sc *schematicpkg.Schematic, schemati
173192
return fmt.Errorf("failed to create SPDX JSON document: %w", err)
174193
}
175194

176-
// Store the bundle
177-
if err := b.storage.Put(ctx, schematicID, versionTag, string(arch), jsonReader, size); err != nil {
195+
// Store the bundle keyed by the SBOM content hash
196+
if err := b.storage.Put(ctx, sbomHash, jsonReader, size); err != nil {
178197
return fmt.Errorf("failed to store SPDX bundle: %w", err)
179198
}
180199

enterprise/spdx/builder/hash.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Copyright (c) 2026 Sidero Labs, Inc.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the LICENSE file.
5+
6+
//go:build enterprise
7+
8+
package builder
9+
10+
import (
11+
"crypto/sha256"
12+
"encoding/hex"
13+
"slices"
14+
"sort"
15+
)
16+
17+
// Hash returns a content hash describing the inputs that determine the SPDX
18+
// bundle content: the extension list, Talos version, and architecture. It is
19+
// used as the OCI cache tag, so that:
20+
//
21+
// - Two schematics with the same extension list, version and architecture
22+
// share a single cached bundle even when other schematic fields differ.
23+
// - Fixes to the SPDX extraction/merge logic can invalidate previously
24+
// cached bundles via errata strings (see internal/profile.Hash).
25+
func Hash(extensions []string, version, arch string) string {
26+
hasher := sha256.New()
27+
28+
// Format version so the hash scheme can be evolved in the future.
29+
hasher.Write([]byte("sbom/v1"))
30+
hasher.Write([]byte{0})
31+
32+
// Sort extensions for deterministic hashing regardless of schematic order.
33+
sorted := slices.Clone(extensions)
34+
sort.Strings(sorted)
35+
36+
for _, ext := range sorted {
37+
hasher.Write([]byte(ext))
38+
hasher.Write([]byte{0})
39+
}
40+
41+
hasher.Write([]byte(version))
42+
hasher.Write([]byte{0})
43+
hasher.Write([]byte(arch))
44+
hasher.Write([]byte{0})
45+
46+
// Errata: append a marker string whenever the SPDX bundle content or
47+
// extraction logic changes in a way that must invalidate existing cached
48+
// bundles. Add new entries below; never remove or reorder existing ones.
49+
// Guard entries with conditions when the fix is scoped.
50+
51+
return hex.EncodeToString(hasher.Sum(nil))
52+
}

enterprise/spdx/builder/spdx.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -216,21 +216,6 @@ func prefixDocElementID(prefix string, id common.DocElementID) common.DocElement
216216
}
217217
}
218218

219-
// CacheTag returns the cache tag for an SPDX bundle.
220-
//
221-
// Format: spdx-<schematic_id>-<version>-<arch>
222-
//
223-
// Operators are expected to use distinct cache repositories for OSS vs
224-
// Enterprise deployments since the bundle content differs by build flavor.
225-
//
226-
// Version is sanitized to replace characters that are invalid in OCI tags.
227-
func CacheTag(schematicID, version, arch string) string {
228-
// OCI tags cannot contain '+', replace with '-'
229-
sanitizedVersion := strings.ReplaceAll(version, "+", "-")
230-
231-
return fmt.Sprintf("spdx-%s-%s-%s", schematicID, sanitizedVersion, arch)
232-
}
233-
234219
// buildDocumentNamespace assembles the SPDX DocumentNamespace from the
235220
// configured external URL plus the schematic / version / arch path. It uses
236221
// url.URL.JoinPath rather than string concatenation to avoid producing

enterprise/spdx/builder/spdx_test.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,32 @@ import (
2323
ifconstants "github.com/siderolabs/image-factory/pkg/constants"
2424
)
2525

26-
func TestCacheTag(t *testing.T) {
26+
func TestHash(t *testing.T) {
2727
t.Parallel()
2828

29-
tag := builder.CacheTag("schematic123", "v1.13.0", "amd64")
29+
base := builder.Hash([]string{"ext1", "ext2"}, "v1.13.0", "amd64")
3030

31-
assert.True(t, strings.HasPrefix(tag, "spdx-"), "got %q", tag)
32-
assert.Contains(t, tag, "schematic123")
33-
assert.Contains(t, tag, "v1.13.0")
34-
assert.Contains(t, tag, "amd64")
31+
// The hash is the OCI cache tag, so it must always be valid (hex, no '+').
32+
assert.NotContains(t, base, "+")
3533

36-
// `+` must be sanitized for OCI tag compatibility.
37-
tagWithPlus := builder.CacheTag("schematic", "v1.13.0+rc.0", "amd64")
38-
assert.NotContains(t, tagWithPlus, "+")
39-
assert.Contains(t, tagWithPlus, "v1.13.0-rc.0")
34+
// Deterministic for the same inputs.
35+
assert.Equal(t, base, builder.Hash([]string{"ext1", "ext2"}, "v1.13.0", "amd64"))
36+
37+
// Extension order does not matter (sorting is internal).
38+
assert.Equal(
39+
t,
40+
builder.Hash([]string{"ext2", "ext1"}, "v1.13.0", "amd64"),
41+
builder.Hash([]string{"ext1", "ext2"}, "v1.13.0", "amd64"),
42+
)
43+
44+
// Sensitive to distinct inputs so different bundles never collide.
45+
assert.NotEqual(t, base, builder.Hash([]string{"ext1", "ext3"}, "v1.13.0", "amd64"))
46+
assert.NotEqual(t, base, builder.Hash([]string{"ext1", "ext2"}, "v1.13.1", "amd64"))
47+
assert.NotEqual(t, base, builder.Hash([]string{"ext1", "ext2"}, "v1.13.0", "arm64"))
48+
49+
// Empty extension list is valid and produces a consistent hash.
50+
empty := builder.Hash([]string{}, "v1.13.0", "amd64")
51+
assert.Equal(t, empty, builder.Hash([]string{}, "v1.13.0", "amd64"))
4052
}
4153

4254
func TestBundleToJSON_DocumentNamespace(t *testing.T) {

enterprise/spdx/storage/registry/registry.go

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"github.com/siderolabs/gen/xerrors"
2727
"go.uber.org/zap"
2828

29-
"github.com/siderolabs/image-factory/enterprise/spdx/builder"
3029
"github.com/siderolabs/image-factory/enterprise/spdx/storage"
3130
"github.com/siderolabs/image-factory/internal/ctxlog"
3231
"github.com/siderolabs/image-factory/internal/image/signer"
@@ -81,16 +80,15 @@ func NewStorage(logger *zap.Logger, options Options) (*Storage, error) {
8180
return s, nil
8281
}
8382

84-
// Head checks if an SPDX bundle exists for the given schematic, version and architecture.
85-
func (s *Storage) Head(ctx context.Context, schematicID, version, arch string) error {
86-
tag := builder.CacheTag(schematicID, version, arch)
87-
taggedRef := s.cacheRepository.Tag(tag)
83+
// Head checks if an SPDX bundle exists for the given cache tag.
84+
func (s *Storage) Head(ctx context.Context, cacheTag string) error {
85+
taggedRef := s.cacheRepository.Tag(cacheTag)
8886

8987
ctxlog.Logger(ctx, s.logger).Debug("heading SPDX bundle", zap.Stringer("ref", taggedRef))
9088

9189
_, err := s.puller.Head(ctx, taggedRef)
9290
if regtransport.IsStatusCodeError(err, http.StatusNotFound, http.StatusForbidden) {
93-
return xerrors.NewTaggedf[storage.ErrNotFoundTag]("SPDX bundle for schematic %q version %q arch %q not found", schematicID, version, arch)
91+
return xerrors.NewTaggedf[storage.ErrNotFoundTag]("SPDX bundle not found for tag %q", cacheTag)
9492
}
9593

9694
if err != nil {
@@ -100,16 +98,15 @@ func (s *Storage) Head(ctx context.Context, schematicID, version, arch string) e
10098
return nil
10199
}
102100

103-
// Get retrieves an SPDX bundle for the given schematic, version and architecture.
104-
func (s *Storage) Get(ctx context.Context, schematicID, version, arch string) (storage.Bundle, error) {
105-
tag := builder.CacheTag(schematicID, version, arch)
106-
taggedRef := s.cacheRepository.Tag(tag)
101+
// Get retrieves an SPDX bundle for the given cache tag.
102+
func (s *Storage) Get(ctx context.Context, cacheTag string) (storage.Bundle, error) {
103+
taggedRef := s.cacheRepository.Tag(cacheTag)
107104

108105
ctxlog.Logger(ctx, s.logger).Debug("getting SPDX bundle", zap.Stringer("ref", taggedRef))
109106

110107
desc, err := s.puller.Head(ctx, taggedRef)
111108
if regtransport.IsStatusCodeError(err, http.StatusNotFound, http.StatusForbidden) {
112-
return nil, xerrors.NewTaggedf[storage.ErrNotFoundTag]("SPDX bundle for schematic %q version %q arch %q not found", schematicID, version, arch)
109+
return nil, xerrors.NewTaggedf[storage.ErrNotFoundTag]("SPDX bundle not found for tag %q", cacheTag)
113110
}
114111

115112
if err != nil {
@@ -161,9 +158,8 @@ func (s *Storage) Get(ctx context.Context, schematicID, version, arch string) (s
161158
}
162159

163160
// Put stores an SPDX bundle.
164-
func (s *Storage) Put(ctx context.Context, schematicID, version, arch string, data io.Reader, size int64) error {
165-
tag := builder.CacheTag(schematicID, version, arch)
166-
taggedRef := s.cacheRepository.Tag(tag)
161+
func (s *Storage) Put(ctx context.Context, cacheTag string, data io.Reader, size int64) error {
162+
taggedRef := s.cacheRepository.Tag(cacheTag)
167163

168164
ctxlog.Logger(ctx, s.logger).Info("pushing SPDX bundle", zap.Stringer("ref", taggedRef))
169165

enterprise/spdx/storage/storage.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,19 @@ import (
1414
)
1515

1616
// Storage is the SPDX bundle storage interface.
17+
//
18+
// cacheTag is a content-hash derived from the inputs that determine the
19+
// SPDX bundle content (extension list, version, architecture). It is
20+
// computed by the caller using builder.Hash.
1721
type Storage interface {
18-
// Head checks if a bundle exists for the given schematic, version and architecture.
19-
Head(ctx context.Context, schematicID, version, arch string) error
22+
// Head checks if a bundle exists for the given cache tag.
23+
Head(ctx context.Context, cacheTag string) error
2024

21-
// Get retrieves a bundle for the given schematic, version and architecture.
22-
Get(ctx context.Context, schematicID, version, arch string) (Bundle, error)
25+
// Get retrieves a bundle for the given cache tag.
26+
Get(ctx context.Context, cacheTag string) (Bundle, error)
2327

24-
// Put stores a bundle for the given schematic, version and architecture.
25-
Put(ctx context.Context, schematicID, version, arch string, data io.Reader, size int64) error
28+
// Put stores a bundle for the given cache tag.
29+
Put(ctx context.Context, cacheTag string, data io.Reader, size int64) error
2630
}
2731

2832
// Bundle represents a stored SPDX bundle that can be read.

pkg/enterprise/enterprise.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ type VEXSource interface {
7878
// so the SBOM extraction and access control live in one place.
7979
type SPDXSource interface {
8080
Build(ctx context.Context, schematicID, versionTag string, arch artifacts.Arch) (io.ReadCloser, error)
81+
82+
// PayloadHash returns a content-hash describing the inputs that determine
83+
// the SPDX bundle content (extension list, version, architecture). Schematics
84+
// with the same SBOM-relevant inputs share the same hash. Callers should use
85+
// this hash as a cache key.
86+
PayloadHash(ctx context.Context, schematicID, versionTag string, arch artifacts.Arch) (string, error)
8187
}
8288

8389
// ScannerOptions holds configuration options for the Scanner frontend.

0 commit comments

Comments
 (0)