|
| 1 | +// Copyright 2026 The Go Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +package test |
| 6 | + |
| 7 | +import ( |
| 8 | + "bytes" |
| 9 | + "io/fs" |
| 10 | + "os" |
| 11 | + "os/exec" |
| 12 | + "path" |
| 13 | + "strconv" |
| 14 | + "strings" |
| 15 | + "sync" |
| 16 | +) |
| 17 | + |
| 18 | +type gitHash string // hex blog hash from git (probably SHA-1, but not necessarily) |
| 19 | + |
| 20 | +var useGitHash = sync.OnceValue(func() bool { |
| 21 | + s := os.Getenv("CMD_GO_USE_GIT_HASH") |
| 22 | + if s == "" { |
| 23 | + return false |
| 24 | + } |
| 25 | + v, _ := strconv.ParseBool(s) |
| 26 | + return v |
| 27 | +}) |
| 28 | + |
| 29 | +// gitHashKey is the key used to look up possible files in |
| 30 | +// a git repo that match the same base name & size. |
| 31 | +// |
| 32 | +// This is used to avoid statting all files in a git repo |
| 33 | +// when trying to find the git hash for a given file. |
| 34 | +// Instead, we only stat files that match on name & size. |
| 35 | +type gitHashKey struct { |
| 36 | + baseName string // base name of file; as that's fs.FileInfo.Name |
| 37 | + size int64 |
| 38 | +} |
| 39 | + |
| 40 | +type gitHashMap struct { |
| 41 | + gitRoot string // absolute path to git repo root |
| 42 | + |
| 43 | + // cands is a list of files in the git repo, bucketed by their (base name, |
| 44 | + // size) bucket key. This makes looking for a file faster later, without |
| 45 | + // statting the whole world, yet still permitting lookup only from a |
| 46 | + // fs.FileInfo that only has a base name & size & Sys info. |
| 47 | + cands map[gitHashKey][]*gitHashCand |
| 48 | +} |
| 49 | + |
| 50 | +type gitHashCand struct { |
| 51 | + rel string // the relative git path from "git ls-files -r" |
| 52 | + hash gitHash |
| 53 | + |
| 54 | + statOnce sync.Once |
| 55 | + stat fs.FileInfo |
| 56 | +} |
| 57 | + |
| 58 | +func (c *gitHashCand) getStat(m *gitHashMap) fs.FileInfo { |
| 59 | + c.statOnce.Do(func() { |
| 60 | + fullPath := path.Join(m.gitRoot, c.rel) |
| 61 | + info, err := os.Lstat(fullPath) |
| 62 | + if err == nil { |
| 63 | + c.stat = info |
| 64 | + } |
| 65 | + }) |
| 66 | + return c.stat |
| 67 | +} |
| 68 | + |
| 69 | +var getGitHashMap = sync.OnceValue(buildGitHashMap) |
| 70 | + |
| 71 | +func buildGitHashMap() *gitHashMap { |
| 72 | + m := &gitHashMap{ |
| 73 | + cands: make(map[gitHashKey][]*gitHashCand), |
| 74 | + } |
| 75 | + gitRoot, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() |
| 76 | + if err != nil { |
| 77 | + return nil |
| 78 | + } |
| 79 | + m.gitRoot = strings.TrimSpace(string(gitRoot)) |
| 80 | + |
| 81 | + cmd := exec.Command("git", "ls-tree", |
| 82 | + "-r", // recursive |
| 83 | + "--long", // include file sizes |
| 84 | + "-z", // null-separated entries; don't have to deal with C quoting of some filenames |
| 85 | + "HEAD", |
| 86 | + ) |
| 87 | + cmd.Dir = m.gitRoot // effectively git -C <dir>; either way. |
| 88 | + out, err := cmd.Output() |
| 89 | + if err != nil { |
| 90 | + return nil |
| 91 | + } |
| 92 | + // Parse lines of the form: |
| 93 | + // |
| 94 | + // 100644 blob cabbb1732c418125f9c773ce7a28ba34f2708554 639 .gitattributes |
| 95 | + // 100644 blob 2b4a5fccdaf12f98cf8e255affa28cfd7e6a784d 95 .github/CODE_OF_CONDUCT.md |
| 96 | + // |
| 97 | + // .... but null-terminated instead of newline-terminated, so we don't have to deal |
| 98 | + // with C quoting of filenames with certain characters. |
| 99 | + // |
| 100 | + // We don't care about the permissions. |
| 101 | + remain := out |
| 102 | + for len(remain) > 0 { |
| 103 | + line, rest, ok := bytes.Cut(remain, []byte{0}) |
| 104 | + if !ok { |
| 105 | + break |
| 106 | + } |
| 107 | + remain = rest |
| 108 | + meta, nameB, ok := bytes.Cut(line, []byte("\t")) |
| 109 | + |
| 110 | + _, hashAndSize, ok := bytes.Cut(meta, []byte(" blob ")) |
| 111 | + if !ok { |
| 112 | + continue |
| 113 | + } |
| 114 | + hashB, sizeB, ok := bytes.Cut(hashAndSize, []byte(" ")) |
| 115 | + if !ok { |
| 116 | + continue |
| 117 | + } |
| 118 | + size, err := strconv.ParseInt(strings.TrimSpace(string(sizeB)), 10, 64) |
| 119 | + if err != nil { |
| 120 | + continue |
| 121 | + } |
| 122 | + name := strings.TrimSpace(string(nameB)) |
| 123 | + hash := strings.TrimSpace(string(hashB)) |
| 124 | + k := gitHashKey{ |
| 125 | + baseName: path.Base(name), |
| 126 | + size: size, |
| 127 | + } |
| 128 | + m.cands[k] = append(m.cands[k], &gitHashCand{ |
| 129 | + rel: name, |
| 130 | + hash: gitHash(hash), |
| 131 | + }) |
| 132 | + } |
| 133 | + return m |
| 134 | +} |
| 135 | + |
| 136 | +func getGitHash(info fs.FileInfo) (gitHash, bool) { |
| 137 | + if !useGitHash() || info == nil || !info.Mode().IsRegular() { |
| 138 | + return "", false |
| 139 | + } |
| 140 | + k := gitHashKey{ |
| 141 | + baseName: info.Name(), |
| 142 | + size: info.Size(), |
| 143 | + } |
| 144 | + m := getGitHashMap() |
| 145 | + if m == nil { |
| 146 | + return "", false |
| 147 | + } |
| 148 | + for _, cand := range m.cands[k] { |
| 149 | + if os.SameFile(info, cand.getStat(m)) { |
| 150 | + return cand.hash, true |
| 151 | + } |
| 152 | + } |
| 153 | + return "", false |
| 154 | +} |
0 commit comments