diff --git a/go.mod b/go.mod index ffdb25bdf..d457b6b2d 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/in-toto/attestation v1.1.2 github.com/invopop/jsonschema v0.13.0 github.com/joho/godotenv v1.5.1 + github.com/jonjohnsonjr/targz v0.0.0-20250908171716-7f45c9361279 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/klauspost/compress v1.18.0 github.com/klauspost/pgzip v1.2.6 diff --git a/go.sum b/go.sum index 2e124df8e..e88bc7386 100644 --- a/go.sum +++ b/go.sum @@ -216,6 +216,10 @@ github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOl github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/jonjohnsonjr/targz v0.0.0-20241113200849-4986e08f3fb4 h1:yzUKZR6eq4hfKkNLe2KfxOBiVHyjXny7g4bEDuiYCtY= +github.com/jonjohnsonjr/targz v0.0.0-20241113200849-4986e08f3fb4/go.mod h1:vFsMbFCBsTclpEtIkbCOBAJj1mBsqoMtm22ibo1cG2o= +github.com/jonjohnsonjr/targz v0.0.0-20250908171716-7f45c9361279 h1:K+c7xw1y3Hf7KawbI0Nhh9jl7me43uey6HFBMjIk8uU= +github.com/jonjohnsonjr/targz v0.0.0-20250908171716-7f45c9361279/go.mod h1:vFsMbFCBsTclpEtIkbCOBAJj1mBsqoMtm22ibo1cG2o= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= diff --git a/pkg/cli/commands.go b/pkg/cli/commands.go index ed50807a4..0d90cdfd8 100644 --- a/pkg/cli/commands.go +++ b/pkg/cli/commands.go @@ -62,6 +62,7 @@ func New() *cobra.Command { cmd.AddCommand(packageVersion()) cmd.AddCommand(query()) cmd.AddCommand(scan()) + cmd.AddCommand(scanTar()) cmd.AddCommand(signCmd()) cmd.AddCommand(signIndex()) cmd.AddCommand(test()) diff --git a/pkg/cli/scan.go b/pkg/cli/scan.go index ca1a51085..1189a8ba9 100644 --- a/pkg/cli/scan.go +++ b/pkg/cli/scan.go @@ -17,6 +17,7 @@ package cli import ( "bufio" "bytes" + "compress/gzip" "context" "fmt" "io" @@ -34,6 +35,7 @@ import ( "chainguard.dev/melange/pkg/config" "chainguard.dev/melange/pkg/sca" "github.com/chainguard-dev/clog" + "github.com/jonjohnsonjr/targz/tarfs" "github.com/spf13/cobra" "go.opentelemetry.io/otel" ) @@ -76,6 +78,18 @@ func scan() *cobra.Command { return cmd } +func scanTar() *cobra.Command { + return &cobra.Command{ + Use: "scan-tar", + Short: "Scan a tar stream from stdin and analyze dependencies", + Example: `docker export container_id | melange scan-tar --name mypackage --version 1.0.0`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + return scanTarCmd(cmd.Context()) + }, + } +} + // TODO: It would be cool if there was a way this could take just a directory. func scanCmd(ctx context.Context, file string, sc *scanConfig) error { ctx, span := otel.Tracer("melange").Start(ctx, "scan") @@ -339,6 +353,148 @@ func scanCmd(ctx context.Context, file string, sc *scanConfig) error { return nil } +// scanTarCmd processes a tar stream from stdin and analyzes it for dependencies +func scanTarCmd(ctx context.Context) error { + ctx, span := otel.Tracer("melange").Start(ctx, "scan-tar") + defer span.End() + + log := clog.FromContext(ctx) + + // Create temporary file to store the tar stream + // This is necessary because apko's tarfs requires io.ReaderAt (random access) + tmpFile, err := os.CreateTemp("", "melange-scan-*.tar") + if err != nil { + return fmt.Errorf("create temp file: %w", err) + } + defer os.Remove(tmpFile.Name()) + defer tmpFile.Close() + + log.Infof("Reading tar stream from stdin...") + + // Try to detect gzip magic bytes + var reader io.Reader + peekReader := bufio.NewReader(os.Stdin) + peek, err := peekReader.Peek(2) + if err != nil && err != io.EOF { + return fmt.Errorf("peek stdin: %w", err) + } + + // Check for gzip magic bytes (1f 8b) + if len(peek) >= 2 && peek[0] == 0x1f && peek[1] == 0x8b { + log.Infof("Detected gzip-compressed tar stream") + gzReader, err := gzip.NewReader(peekReader) + if err != nil { + return fmt.Errorf("create gzip reader: %w", err) + } + defer gzReader.Close() + reader = gzReader + } else { + reader = peekReader + } + + // Copy the tar stream to temporary file + written, err := io.Copy(tmpFile, reader) + if err != nil { + return fmt.Errorf("copy tar stream: %w", err) + } + + log.Infof("Wrote %d bytes to temporary file", written) + + if written == 0 { + return fmt.Errorf("no data received from stdin") + } + + // Seek back to beginning for reading + if _, err := tmpFile.Seek(0, 0); err != nil { + return fmt.Errorf("seek temp file: %w", err) + } + + // Create our custom TarSCAHandle + tarHandle, err := newTarSCAHandle(tmpFile) + if err != nil { + return fmt.Errorf("create tar SCA handle: %w", err) + } + + // Run SCA analysis + generated := &config.Dependencies{} + if err := sca.Analyze(ctx, tarHandle, generated); err != nil { + return fmt.Errorf("SCA analysis: %w", err) + } + + // For tar scanning, remove versions from command provides since commands + // don't have meaningful separate versions from the container + for i, provide := range generated.Provides { + if strings.HasPrefix(provide, "cmd:") { + if idx := strings.Index(provide, "="); idx != -1 { + generated.Provides[i] = provide[:idx] + } + } + } + + // Output results in the same format as regular scan + log.Infof("Analysis complete. Found %d runtime deps, %d provides, %d vendored", + len(generated.Runtime), len(generated.Provides), len(generated.Vendored)) + + // Create a minimal PackageBuild for output formatting + pkg := &config.Package{} + + bb := &build.Build{ + Configuration: &config.Configuration{ + Package: *pkg, + }, + } + + pb := &build.PackageBuild{ + Build: bb, + Origin: pkg, + Dependencies: *generated, + } + + var buf bytes.Buffer + if err := pb.GenerateControlData(&buf); err != nil { + return fmt.Errorf("generate control data: %w", err) + } + + os.Stdout.Write(buf.Bytes()) + return nil +} + +// TarSCAHandle implements sca.SCAHandle for tar files +type TarSCAHandle struct { + tarFile *os.File + tarFS *tarfs.FS +} + +// newTarSCAHandle creates a new TarSCAHandle from a tar file +func newTarSCAHandle(tarFile *os.File) (*TarSCAHandle, error) { + // Get file info to determine size + stat, err := tarFile.Stat() + if err != nil { + return nil, fmt.Errorf("stat tar file: %w", err) + } + + // Create tarfs filesystem + fs, err := tarfs.New(tarFile, stat.Size()) + if err != nil { + return nil, fmt.Errorf("create tar filesystem: %w", err) + } + + return &TarSCAHandle{ + tarFile: tarFile, + tarFS: fs, + }, nil +} + +func (t *TarSCAHandle) PackageName() string { return "" } +func (t *TarSCAHandle) RelativeNames() []string { return []string{} } +func (t *TarSCAHandle) Version() string { return "" } +func (t *TarSCAHandle) FilesystemForRelative(pkgName string) (sca.SCAFS, error) { return t.tarFS, nil } +func (t *TarSCAHandle) Filesystem() (sca.SCAFS, error) { return t.tarFS, nil } +func (t *TarSCAHandle) Options() config.PackageOption { return config.PackageOption{} } +func (t *TarSCAHandle) BaseDependencies() config.Dependencies { return config.Dependencies{} } +func (t *TarSCAHandle) InstalledPackages() map[string]string { return map[string]string{} } +func (t *TarSCAHandle) PkgResolver() *apk.PkgResolver { return nil } + type pkginfo struct { pkgname string pkgver string diff --git a/pkg/sca/debug.test4251693856 b/pkg/sca/debug.test4251693856 new file mode 100755 index 000000000..e7f9485b1 Binary files /dev/null and b/pkg/sca/debug.test4251693856 differ diff --git a/pkg/sca/sca.go b/pkg/sca/sca.go index a45ba6362..c5032cbea 100644 --- a/pkg/sca/sca.go +++ b/pkg/sca/sca.go @@ -38,7 +38,7 @@ import ( "chainguard.dev/melange/pkg/config" ) -var libDirs = []string{"lib/", "usr/lib/", "lib64/", "usr/lib64/"} +var libDirs = []string{"lib/", "usr/lib/", "lib64/", "usr/lib64/", "usr/lib/x86_64-linux-gnu/"} // SCAFS represents the minimum required filesystem accessors which are needed by // the SCA engine.