Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion enterprise/spdx/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func (b *Builder) Build(ctx context.Context, schematicID, versionTag string, arc
}

// Build the bundle using singleflight to prevent duplicate work
cacheKey := CacheTag(schematicID, versionTag, string(arch))
cacheKey := Hash(schematicID, versionTag, string(arch))

resultCh := b.sf.DoChan(cacheKey, func() (any, error) { //nolint:contextcheck
return nil, b.buildBundle(sc, schematicID, versionTag, arch)
Expand Down
43 changes: 43 additions & 0 deletions enterprise/spdx/builder/hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) 2026 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.

//go:build enterprise

package builder

import (
"crypto/sha256"
"encoding/hex"
)

// Hash returns a content hash describing the inputs and extraction logic that
// produce an SPDX bundle. It is used directly as the OCI cache tag, so that
// fixes to the SPDX extraction/merge logic can invalidate previously cached
// bundles even though the schematic, version and architecture are unchanged.
//
// This mirrors internal/profile.Hash (whose output is likewise used as the
// asset cache tag): the inputs are checksummed and then errata strings are
// mixed in, bumping the hash (and therefore the cache key) whenever a bug in a
// previously cached SPDX bundle needs to be invalidated.
//
// Operators are expected to use distinct cache repositories for OSS vs
// Enterprise deployments since the bundle content differs by build flavor.
func Hash(schematicID, version, arch string) string {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like us to move way forward on this one, and do proper caching for SBOMs.

If we just depend on schematic IDs, two schematics with e.g. just a different kernel args will not produce same SBOM ID.

Let's reconsider this, probably @shanduur can help here.

It feels like we only need list of extensions from the schematic (plus overlay?), but let's do a proper tested hashing here to avoid recomputing the SPDX.

Same applies to scan results - if a scan is requested, if the SPDX hash is same, don't re-run the scan unless vulndb/VEX changed.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we just depend on schematic IDs, two schematics with e.g. just a different kernel args will not produce same SBOM ID.

Ahhh that's bad already, we should rely on extensions only, overlays don't ship sboms, unless we want them to

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, schematicID is too fragile, we should cache on Version+Arch+Extensions.

hasher := sha256.New()

// NUL-separate inputs so distinct fields can't collide via concatenation.
hasher.Write([]byte(schematicID))
hasher.Write([]byte{0})
hasher.Write([]byte(version))
hasher.Write([]byte{0})
hasher.Write([]byte(arch))

// Errata: append a marker string whenever the SPDX bundle content or
// extraction logic changes in a way that must invalidate existing cached
// bundles. Add new entries below; never remove or reorder existing ones.
// Guard entries with conditions (version/arch) when the fix is scoped.

return hex.EncodeToString(hasher.Sum(nil))
}
15 changes: 0 additions & 15 deletions enterprise/spdx/builder/spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,21 +216,6 @@ func prefixDocElementID(prefix string, id common.DocElementID) common.DocElement
}
}

// CacheTag returns the cache tag for an SPDX bundle.
//
// Format: spdx-<schematic_id>-<version>-<arch>
//
// Operators are expected to use distinct cache repositories for OSS vs
// Enterprise deployments since the bundle content differs by build flavor.
//
// Version is sanitized to replace characters that are invalid in OCI tags.
func CacheTag(schematicID, version, arch string) string {
// OCI tags cannot contain '+', replace with '-'
sanitizedVersion := strings.ReplaceAll(version, "+", "-")

return fmt.Sprintf("spdx-%s-%s-%s", schematicID, sanitizedVersion, arch)
}

// buildDocumentNamespace assembles the SPDX DocumentNamespace from the
// configured external URL plus the schematic / version / arch path. It uses
// url.URL.JoinPath rather than string concatenation to avoid producing
Expand Down
21 changes: 11 additions & 10 deletions enterprise/spdx/builder/spdx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,21 @@ import (
ifconstants "github.com/siderolabs/image-factory/pkg/constants"
)

func TestCacheTag(t *testing.T) {
func TestHash(t *testing.T) {
t.Parallel()

tag := builder.CacheTag("schematic123", "v1.13.0", "amd64")
base := builder.Hash("schematic123", "v1.13.0", "amd64")

assert.True(t, strings.HasPrefix(tag, "spdx-"), "got %q", tag)
assert.Contains(t, tag, "schematic123")
assert.Contains(t, tag, "v1.13.0")
assert.Contains(t, tag, "amd64")
// The hash is the OCI cache tag, so it must always be a valid tag (hex, no '+').
assert.NotContains(t, base, "+")

// `+` must be sanitized for OCI tag compatibility.
tagWithPlus := builder.CacheTag("schematic", "v1.13.0+rc.0", "amd64")
assert.NotContains(t, tagWithPlus, "+")
assert.Contains(t, tagWithPlus, "v1.13.0-rc.0")
// Deterministic for the same inputs.
assert.Equal(t, base, builder.Hash("schematic123", "v1.13.0", "amd64"))

// Sensitive to each input so distinct bundles never collide.
assert.NotEqual(t, base, builder.Hash("schematic456", "v1.13.0", "amd64"))
assert.NotEqual(t, base, builder.Hash("schematic123", "v1.13.1", "amd64"))
assert.NotEqual(t, base, builder.Hash("schematic123", "v1.13.0", "arm64"))
}

func TestBundleToJSON_DocumentNamespace(t *testing.T) {
Expand Down
6 changes: 3 additions & 3 deletions enterprise/spdx/storage/registry/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func NewStorage(logger *zap.Logger, options Options) (*Storage, error) {

// Head checks if an SPDX bundle exists for the given schematic, version and architecture.
func (s *Storage) Head(ctx context.Context, schematicID, version, arch string) error {
tag := builder.CacheTag(schematicID, version, arch)
tag := builder.Hash(schematicID, version, arch)
taggedRef := s.cacheRepository.Tag(tag)

s.logger.Debug("heading SPDX bundle", zap.Stringer("ref", taggedRef))
Expand All @@ -101,7 +101,7 @@ func (s *Storage) Head(ctx context.Context, schematicID, version, arch string) e

// Get retrieves an SPDX bundle for the given schematic, version and architecture.
func (s *Storage) Get(ctx context.Context, schematicID, version, arch string) (storage.Bundle, error) {
tag := builder.CacheTag(schematicID, version, arch)
tag := builder.Hash(schematicID, version, arch)
taggedRef := s.cacheRepository.Tag(tag)

s.logger.Debug("getting SPDX bundle", zap.Stringer("ref", taggedRef))
Expand Down Expand Up @@ -161,7 +161,7 @@ func (s *Storage) Get(ctx context.Context, schematicID, version, arch string) (s

// Put stores an SPDX bundle.
func (s *Storage) Put(ctx context.Context, schematicID, version, arch string, data io.Reader, size int64) error {
tag := builder.CacheTag(schematicID, version, arch)
tag := builder.Hash(schematicID, version, arch)
taggedRef := s.cacheRepository.Tag(tag)

s.logger.Info("pushing SPDX bundle", zap.Stringer("ref", taggedRef))
Expand Down