From 51096ca510c322250060d1a06a67c3d24a4e10bf Mon Sep 17 00:00:00 2001 From: justinsb Date: Tue, 28 Jan 2025 09:10:57 -0500 Subject: [PATCH] tools: introduce simple hget download tool --- tools/hget/README.md | 40 +++++++ tools/hget/go.mod | 9 ++ tools/hget/go.sum | 4 + tools/hget/main.go | 213 +++++++++++++++++++++++++++++++++++ tools/hget/pkg/hget/hash.go | 25 ++++ tools/hget/pkg/hget/http.go | 141 +++++++++++++++++++++++ tools/hget/pkg/hget/index.go | 68 +++++++++++ 7 files changed, 500 insertions(+) create mode 100644 tools/hget/README.md create mode 100644 tools/hget/go.mod create mode 100644 tools/hget/go.sum create mode 100644 tools/hget/main.go create mode 100644 tools/hget/pkg/hget/hash.go create mode 100644 tools/hget/pkg/hget/http.go create mode 100644 tools/hget/pkg/hget/index.go diff --git a/tools/hget/README.md b/tools/hget/README.md new file mode 100644 index 0000000000000..6fadbb488d6f2 --- /dev/null +++ b/tools/hget/README.md @@ -0,0 +1,40 @@ +# hget + +hget is a tool for downloading files, verifying their integrity using a hash like sha256. + +It has a few main goals: + +* Scriptable: Make it easy to download files from a script, without worrying about whether curl or wget is available. +* Verifiable: Ensure that the file is downloaded correctly, and that the hash matches the expected value. +* Flexible: Abstracts away the source of the file, so that you can easily use mirrors and (in future) things like local caches. + +# Usage + +## Direct download + +```bash +# Download kOps for linux/amd64 from github +hget --sha256=9253d15938376236d6578384e3d5ee0b973bdaf3303fb5fd6fbb3c59aedb9d8d --output=./kops --url=https://github.com/kubernetes/kops/releases/download/v1.31.0/kops-linux-amd64 --chmod=0755 +``` + +## Use of index files (e.g. SHA256SUMS) + +hget can use a SHA256SUMS file to find the file to download. + +```bash +# Download kubectl for linux/amd64 from kubernetes v1.32.0 +# 646d58f6d98ee670a71d9cdffbf6625aeea2849d567f214bc43a35f8ccb7bf70 bin/linux/amd64/kubectl +hget --sha256=646d58f6d98ee670a71d9cdffbf6625aeea2849d567f214bc43a35f8ccb7bf70 --chmod=0755 --output=./kubectl --index=https://dl.k8s.io/v1.32.0/SHA256SUMS +``` + +This will download the sha256sum file, locate the matching file and download it, verify the sha256 hash, and then set the permissions to 0755. + +The sha256sum index file is not verified, but the file itself is verified. + +# Installation + +From source: + +```bash +go install k8s.io/kops/tools/hget@latest +``` diff --git a/tools/hget/go.mod b/tools/hget/go.mod new file mode 100644 index 0000000000000..a63dc74acd6a0 --- /dev/null +++ b/tools/hget/go.mod @@ -0,0 +1,9 @@ +module k8s.io/kops/tools/hget + +go 1.23 + +toolchain go1.23.4 + +require k8s.io/klog/v2 v2.130.1 + +require github.com/go-logr/logr v1.4.1 // indirect diff --git a/tools/hget/go.sum b/tools/hget/go.sum new file mode 100644 index 0000000000000..d28ed98b112e8 --- /dev/null +++ b/tools/hget/go.sum @@ -0,0 +1,4 @@ +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= diff --git a/tools/hget/main.go b/tools/hget/main.go new file mode 100644 index 0000000000000..44e4b0e870748 --- /dev/null +++ b/tools/hget/main.go @@ -0,0 +1,213 @@ +package main + +import ( + "context" + "errors" + "flag" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "k8s.io/klog/v2" + "k8s.io/kops/tools/hget/pkg/hget" +) + +type options struct { + Sha256 string + Chmod os.FileMode + OutputPath string + + Indexes []string +} + +func main() { + ctx := context.Background() + if err := run(ctx); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } +} + +// // StringSliceVar is like flag.StringSliceVar, but it allows empty strings to be added +// func StringSliceVar(p *[]string, name string, value []string, usage string) { +// flag.Var(stringSliceValue(p), name, usage) +// } + +type stringSliceValue []string + +func (s *stringSliceValue) String() string { + return strings.Join(*s, ",") +} + +func (s *stringSliceValue) Set(value string) error { + *s = append(*s, value) + return nil +} + +func run(ctx context.Context) error { + log := klog.FromContext(ctx) + + opts := &options{} + + fileMode := "" + flag.StringVar(&fileMode, "chmod", fileMode, "Permissions to set on the output file (octal)") + flag.StringVar(&opts.Sha256, "sha256", "", "SHA256 hash to verify against") + flag.StringVar(&opts.OutputPath, "output", "", "Path to write the downloaded file") + + var sha256sums stringSliceValue + flag.Var(&sha256sums, "sha256sums", "URL to SHA256SUMS file to find the file to download") + + var urls stringSliceValue + flag.Var(&urls, "url", "URL to download the file from") + + flag.Parse() + + opts.Indexes = sha256sums + if fileMode != "" { + parsed, err := strconv.ParseUint(fileMode, 8, 32) + if err != nil { + return fmt.Errorf("parsing chmod %q: %v", fileMode, err) + } + opts.Chmod = os.FileMode(parsed) + } + + if opts.Sha256 == "" { + fmt.Fprintln(os.Stderr, "error: --sha256 is required") + flag.Usage() + os.Exit(1) + } + + if opts.OutputPath == "" { + fmt.Fprintln(os.Stderr, "error: --output is required") + flag.Usage() + os.Exit(1) + } + + alreadyExists, err := checkExistingFile(ctx, opts.OutputPath, opts) + if err != nil { + log.Error(err, "failed to check existing file", "path", opts.OutputPath) + } + if alreadyExists { + log.Info("file already exists", "path", opts.OutputPath) + return nil + } + + var errs []error + for _, url := range urls { + if err := downloadToFile(ctx, opts, url); err != nil { + errs = append(errs, err) + } else { + return nil + } + } + + // Try assets from the index + { + assets := hget.NewIndex() + + for _, index := range opts.Indexes { + if err := assets.AddToIndex(ctx, index); err != nil { + errs = append(errs, fmt.Errorf("adding %q to asset index: %w", index, err)) + } + } + + asset, err := assets.Lookup(ctx, opts.Sha256) + if err != nil { + errs = append(errs, fmt.Errorf("looking up asset: %w", err)) + } + if asset != nil { + if err := downloadToFile(ctx, opts, asset.URL); err != nil { + errs = append(errs, fmt.Errorf("downloading asset: %w", err)) + } else { + return nil + } + } + } + + if len(errs) > 0 { + return fmt.Errorf("failed to download file: %w", errors.Join(errs...)) + } + + return nil +} + +func downloadToFile(ctx context.Context, opts *options, url string) error { + log := klog.FromContext(ctx) + + startTime := time.Now() + + dir := filepath.Dir(opts.OutputPath) + // Create a temporary file to download to + tmpFile, err := os.CreateTemp(dir, "hget-*") + if err != nil { + return fmt.Errorf("creating temp file: %v", err) + } + removeTempFile := true + defer func() { + if removeTempFile { + if err := os.Remove(tmpFile.Name()); err != nil { + log.Error(err, "failed to remove temp file", "path", tmpFile.Name()) + } + } + }() + + downloadResults, err := hget.DownloadURL(ctx, url, tmpFile) + if err != nil { + return fmt.Errorf("downloading file: %w", err) + } + + elapsed := time.Since(startTime) + log.Info("downloaded file", "url", url, "results", downloadResults, "elapsed", elapsed) + + // Verify hash + if downloadResults.Hash != opts.Sha256 { + return fmt.Errorf("hash mismatch: got %s, want %s", downloadResults.Hash, opts.Sha256) + } + + // Set permissions if specified + if opts.Chmod != 0 { + if err := os.Chmod(tmpFile.Name(), opts.Chmod); err != nil { + return fmt.Errorf("setting permissions: %w", err) + } + } + + // Move to final destination + if err := os.Rename(tmpFile.Name(), opts.OutputPath); err != nil { + return fmt.Errorf("moving temp file to destination: %w", err) + } + removeTempFile = false + return nil +} + +func checkExistingFile(ctx context.Context, p string, opts *options) (bool, error) { + log := klog.FromContext(ctx) + + stat, err := os.Stat(p) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, fmt.Errorf("failed to stat file %q: %w", p, err) + } + + hash, err := hget.GetHashForFile(p) + if err != nil { + return false, fmt.Errorf("failed to get hash of %q: %w", p, err) + } + if hash != opts.Sha256 { + log.Info("file already exists but hash is not correct", "path", p, "got", hash, "want", opts.Sha256) + return false, nil + } + + if opts.Chmod != 0 && stat.Mode() != opts.Chmod { + log.Info("file already exists but permissions are not correct", "path", p, "got", stat.Mode(), "want", opts.Chmod) + if err := os.Chmod(p, opts.Chmod); err != nil { + return false, fmt.Errorf("setting permissions on %q: %w", p, err) + } + } + + return true, nil +} diff --git a/tools/hget/pkg/hget/hash.go b/tools/hget/pkg/hget/hash.go new file mode 100644 index 0000000000000..bdc6c6a9ab5f5 --- /dev/null +++ b/tools/hget/pkg/hget/hash.go @@ -0,0 +1,25 @@ +package hget + +import ( + "crypto/sha256" + "encoding/hex" + "io" + "os" +) + +func GetHash(r io.Reader) (string, error) { + h := sha256.New() + if _, err := io.Copy(h, r); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +func GetHashForFile(p string) (string, error) { + f, err := os.Open(p) + if err != nil { + return "", err + } + defer f.Close() + return GetHash(f) +} diff --git a/tools/hget/pkg/hget/http.go b/tools/hget/pkg/hget/http.go new file mode 100644 index 0000000000000..4752dc0002581 --- /dev/null +++ b/tools/hget/pkg/hget/http.go @@ -0,0 +1,141 @@ +package hget + +import ( + "bufio" + "compress/flate" + "compress/gzip" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "net/http" + "time" + + "k8s.io/klog/v2" +) + +// DownloadResult is the result of a download operation. +// It contains the hash of the downloaded file and the length of the file. +type DownloadResult struct { + Hash string + Length int64 +} + +// DownloadURL downloads the file from the given URL and writes it to the given writer. +// It returns the hash of the downloaded file and the length of the file. +func DownloadURL(ctx context.Context, url string, w io.Writer) (*DownloadResult, error) { + log := klog.FromContext(ctx) + + // Set up the HTTP client and request + client := &http.Client{} + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, fmt.Errorf("creating http request: %w", err) + } + + // Enable compression, it saves everyone time and money + req.Header.Add("Accept-Encoding", "gzip, deflate") + + info := fmt.Sprintf("%s %s", req.Method, req.URL.String()) + + startTime := time.Now() + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("doing http request %s: %w", info, err) + } + defer resp.Body.Close() + + var src io.Reader + + switch resp.Header.Get("Content-Encoding") { + case "gzip": + src, err = gzip.NewReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("creating gzip reader: %w", err) + } + case "deflate": + src = flate.NewReader(resp.Body) + + case "": + // No compression + src = resp.Body + + default: + return nil, fmt.Errorf("unsupported content encoding: %s", resp.Header.Get("Content-Encoding")) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status from http request %s: %d", info, resp.StatusCode) + } + + // Calculate hash while downloading + hasher := sha256.New() + writer := io.MultiWriter(w, hasher) + + if _, err := io.Copy(writer, src); err != nil { + return nil, fmt.Errorf("downloading from %s: %w", info, err) + } + + elapsed := time.Since(startTime) + + result := &DownloadResult{ + Hash: hex.EncodeToString(hasher.Sum(nil)), + Length: resp.ContentLength, + } + log.Info("downloaded file", "url", url, "result", result, "elapsed", elapsed, "content-encoding", resp.Header.Get("Content-Encoding")) + + return result, nil +} + +func readLines(ctx context.Context, url string, callback func(line string) error) (*DownloadResult, error) { + // Set up the HTTP client and request + client := &http.Client{} + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, fmt.Errorf("creating http request: %w", err) + } + + info := fmt.Sprintf("%s %s", req.Method, req.URL.String()) + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("doing http request %s: %w", info, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status from http request %s: %d", info, resp.StatusCode) + } + + // Calculate hash while downloading + hasher := sha256.New() + + reader := bufio.NewReader(resp.Body) + for { + line, err := reader.ReadBytes('\n') + if len(line) != 0 { + if _, err := hasher.Write(line); err != nil { + return nil, fmt.Errorf("writing to hasher: %w", err) + } + s := string(line[:len(line)-1]) + if err := callback(s); err != nil { + return nil, fmt.Errorf("callback failed: %w", err) + } + } + if err != nil { + if err == io.EOF { + break + } + return nil, fmt.Errorf("reading from %s: %w", info, err) + } + } + + result := &DownloadResult{ + Hash: hex.EncodeToString(hasher.Sum(nil)), + Length: resp.ContentLength, + } + + return result, nil +} diff --git a/tools/hget/pkg/hget/index.go b/tools/hget/pkg/hget/index.go new file mode 100644 index 0000000000000..9029db634bd1d --- /dev/null +++ b/tools/hget/pkg/hget/index.go @@ -0,0 +1,68 @@ +package hget + +import ( + "context" + "fmt" + "net/url" + "path" + "strings" + + "k8s.io/klog/v2" +) + +type Index struct { + assets []*Asset +} + +type Asset struct { + URL string + SHA256 string +} + +func NewIndex() *Index { + return &Index{} +} + +func (i *Index) AddToIndex(ctx context.Context, indexURL string) error { + log := klog.FromContext(ctx) + + baseURL, err := url.Parse(indexURL) + if err != nil { + return fmt.Errorf("parsing base URL: %w", err) + } + + lastToken := path.Base(baseURL.Path) + baseURL.Path = strings.TrimSuffix(baseURL.Path, lastToken) + + results, err := readLines(ctx, indexURL, func(line string) error { + tokens := strings.Fields(line) + if len(tokens) != 2 { + log.Info("ignoring unknown line", "line", line) + return nil + } + assetURL := baseURL.JoinPath(tokens[1]) + asset := &Asset{ + URL: assetURL.String(), + SHA256: tokens[0], + } + i.assets = append(i.assets, asset) + return nil + }) + if err != nil { + return err + } + log.Info("downloaded index", "results", results) + return nil +} + +// Lookup tries to find assets in the index that match the given SHA256 hash +func (i *Index) Lookup(ctx context.Context, sha256 string) (*Asset, error) { + // log := klog.FromContext(ctx) + + for _, asset := range i.assets { + if asset.SHA256 == sha256 { + return asset, nil + } + } + return nil, fmt.Errorf("asset not found") +}