Skip to content

Commit 95aa413

Browse files
committed
cmd/go/internal/test: add opt-in file hashing instead of modtime for test caching (w/ git)
Updates golang#58571 Updates #150 (cherry picked from commit 64af022)
1 parent 0c028ef commit 95aa413

File tree

2 files changed

+170
-2
lines changed

2 files changed

+170
-2
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// Copyright 2026 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package test
6+
7+
import (
8+
"bytes"
9+
"io/fs"
10+
"os"
11+
"os/exec"
12+
"path"
13+
"strconv"
14+
"strings"
15+
"sync"
16+
)
17+
18+
type gitHash string // hex blog hash from git (probably SHA-1, but not necessarily)
19+
20+
var useGitHash = sync.OnceValue(func() bool {
21+
s := os.Getenv("CMD_GO_USE_GIT_HASH")
22+
if s == "" {
23+
return false
24+
}
25+
v, _ := strconv.ParseBool(s)
26+
return v
27+
})
28+
29+
// gitHashKey is the key used to look up possible files in
30+
// a git repo that match the same base name & size.
31+
//
32+
// This is used to avoid statting all files in a git repo
33+
// when trying to find the git hash for a given file.
34+
// Instead, we only stat files that match on name & size.
35+
type gitHashKey struct {
36+
baseName string // base name of file; as that's fs.FileInfo.Name
37+
size int64
38+
}
39+
40+
type gitHashMap struct {
41+
gitRoot string // absolute path to git repo root
42+
43+
// cands is a list of files in the git repo, bucketed by their (base name,
44+
// size) bucket key. This makes looking for a file faster later, without
45+
// statting the whole world, yet still permitting lookup only from a
46+
// fs.FileInfo that only has a base name & size & Sys info.
47+
cands map[gitHashKey][]*gitHashCand
48+
}
49+
50+
type gitHashCand struct {
51+
rel string // the relative git path from "git ls-files -r"
52+
hash gitHash
53+
54+
statOnce sync.Once
55+
stat fs.FileInfo
56+
}
57+
58+
func (c *gitHashCand) getStat(m *gitHashMap) fs.FileInfo {
59+
c.statOnce.Do(func() {
60+
fullPath := path.Join(m.gitRoot, c.rel)
61+
info, err := os.Lstat(fullPath)
62+
if err == nil {
63+
c.stat = info
64+
}
65+
})
66+
return c.stat
67+
}
68+
69+
var getGitHashMap = sync.OnceValue(buildGitHashMap)
70+
71+
func buildGitHashMap() *gitHashMap {
72+
m := &gitHashMap{
73+
cands: make(map[gitHashKey][]*gitHashCand),
74+
}
75+
gitRoot, err := exec.Command("git", "rev-parse", "--show-toplevel").Output()
76+
if err != nil {
77+
return nil
78+
}
79+
m.gitRoot = strings.TrimSpace(string(gitRoot))
80+
81+
cmd := exec.Command("git", "ls-tree",
82+
"-r", // recursive
83+
"--long", // include file sizes
84+
"-z", // null-separated entries; don't have to deal with C quoting of some filenames
85+
"HEAD",
86+
)
87+
cmd.Dir = m.gitRoot // effectively git -C <dir>; either way.
88+
out, err := cmd.Output()
89+
if err != nil {
90+
return nil
91+
}
92+
// Parse lines of the form:
93+
//
94+
// 100644 blob cabbb1732c418125f9c773ce7a28ba34f2708554 639 .gitattributes
95+
// 100644 blob 2b4a5fccdaf12f98cf8e255affa28cfd7e6a784d 95 .github/CODE_OF_CONDUCT.md
96+
//
97+
// .... but null-terminated instead of newline-terminated, so we don't have to deal
98+
// with C quoting of filenames with certain characters.
99+
//
100+
// We don't care about the permissions.
101+
remain := out
102+
for len(remain) > 0 {
103+
line, rest, ok := bytes.Cut(remain, []byte{0})
104+
if !ok {
105+
break
106+
}
107+
remain = rest
108+
meta, nameB, ok := bytes.Cut(line, []byte("\t"))
109+
110+
_, hashAndSize, ok := bytes.Cut(meta, []byte(" blob "))
111+
if !ok {
112+
continue
113+
}
114+
hashB, sizeB, ok := bytes.Cut(hashAndSize, []byte(" "))
115+
if !ok {
116+
continue
117+
}
118+
size, err := strconv.ParseInt(strings.TrimSpace(string(sizeB)), 10, 64)
119+
if err != nil {
120+
continue
121+
}
122+
name := strings.TrimSpace(string(nameB))
123+
hash := strings.TrimSpace(string(hashB))
124+
k := gitHashKey{
125+
baseName: path.Base(name),
126+
size: size,
127+
}
128+
m.cands[k] = append(m.cands[k], &gitHashCand{
129+
rel: name,
130+
hash: gitHash(hash),
131+
})
132+
}
133+
return m
134+
}
135+
136+
func getGitHash(info fs.FileInfo) (gitHash, bool) {
137+
if !useGitHash() || info == nil || !info.Mode().IsRegular() {
138+
return "", false
139+
}
140+
k := gitHashKey{
141+
baseName: info.Name(),
142+
size: info.Size(),
143+
}
144+
m := getGitHashMap()
145+
if m == nil {
146+
return "", false
147+
}
148+
for _, cand := range m.cands[k] {
149+
if os.SameFile(info, cand.getStat(m)) {
150+
return cand.hash, true
151+
}
152+
}
153+
return "", false
154+
}

src/cmd/go/internal/test/test.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,7 +2028,7 @@ func hashOpen(name string) (cache.ActionID, error) {
20282028
hashWriteStat(h, finfo)
20292029
}
20302030
}
2031-
} else if info.Mode().IsRegular() {
2031+
} else if info.Mode().IsRegular() && !useGitHash() {
20322032
// Because files might be very large, do not attempt
20332033
// to hash the entirety of their content. Instead assume
20342034
// the mtime and size recorded in hashWriteStat above
@@ -2061,7 +2061,21 @@ func hashStat(name string) cache.ActionID {
20612061
}
20622062

20632063
func hashWriteStat(h io.Writer, info fs.FileInfo) {
2064-
fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), info.ModTime(), info.IsDir())
2064+
if !useGitHash() {
2065+
// Classic behavior: use mod time.
2066+
fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), info.ModTime(), info.IsDir())
2067+
return
2068+
}
2069+
var modTimeOrHash any = info.ModTime()
2070+
switch {
2071+
case info.Mode().IsRegular():
2072+
if hash, ok := getGitHash(info); ok {
2073+
modTimeOrHash = hash
2074+
}
2075+
default:
2076+
modTimeOrHash = nil // including for directories
2077+
}
2078+
fmt.Fprintf(h, "stat %d %x %v %v\n", info.Size(), uint64(info.Mode()), modTimeOrHash, info.IsDir())
20652079
}
20662080

20672081
// testAndInputKey returns the actual cache key for the pair (testID, testInputsID).

0 commit comments

Comments
 (0)