Skip to content

Commit a0206e2

Browse files
committed
Restructuring the cache file/directory layout for better performance.
1 parent 50846d6 commit a0206e2

File tree

1 file changed

+21
-15
lines changed

1 file changed

+21
-15
lines changed

go/pkg/diskcache/diskcache.go

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ func New(ctx context.Context, root string, maxCapacityBytes uint64) *DiskCache {
116116
}
117117
heap.Init(res.queue)
118118
_ = os.MkdirAll(root, os.ModePerm)
119+
// We use Git's directory/file naming structure as inspiration:
120+
// https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#:~:text=The%20subdirectory%20is%20named%20with%20the%20first%202%20characters%20of%20the%20SHA%2D1%2C%20and%20the%20filename%20is%20the%20remaining%2038%20characters.
121+
for i := 0; i < 256; i++ {
122+
_ = os.MkdirAll(filepath.Join(root, fmt.Sprintf("%02x", i)), os.ModePerm)
123+
}
119124
_ = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
120125
// We log and continue on all errors, because cache read errors are not critical.
121126
if err != nil {
@@ -125,15 +130,15 @@ func New(ctx context.Context, root string, maxCapacityBytes uint64) *DiskCache {
125130
if d.IsDir() {
126131
return nil
127132
}
128-
fname := d.Name()
129-
k, err := res.getKeyFromFileName(fname)
133+
subdir := filepath.Base(filepath.Dir(path))
134+
k, err := res.getKeyFromFileName(subdir + d.Name())
130135
if err != nil {
131-
log.Errorf("Error parsing cached file name %s: %v", fname, err)
136+
log.Errorf("Error parsing cached file name %s: %v", path, err)
132137
return nil
133138
}
134-
atime, err := GetLastAccessTime(filepath.Join(root, fname))
139+
atime, err := GetLastAccessTime(path)
135140
if err != nil {
136-
log.Errorf("Error getting last accessed time of %s: %v", fname, err)
141+
log.Errorf("Error getting last accessed time of %s: %v", path, err)
137142
return nil
138143
}
139144
it := &qitem{
@@ -142,7 +147,7 @@ func New(ctx context.Context, root string, maxCapacityBytes uint64) *DiskCache {
142147
}
143148
size, err := res.getItemSize(k)
144149
if err != nil {
145-
log.Errorf("Error getting file size of %s: %v", fname, err)
150+
log.Errorf("Error getting file size of %s: %v", path, err)
146151
return nil
147152
}
148153
res.store.Store(k, it)
@@ -175,25 +180,25 @@ func (d *DiskCache) TotalSizeBytes() uint64 {
175180
return uint64(atomic.LoadInt64(&d.sizeBytes))
176181
}
177182

178-
// This function is defined in https://pkg.go.dev/strings#CutPrefix
183+
// This function is defined in https://pkg.go.dev/strings#CutSuffix
179184
// It is copy/pasted here as a hack, because I failed to upgrade the *Reclient* repo to the latest Go 1.20.7.
180-
func CutPrefix(s, prefix string) (after string, found bool) {
181-
if !strings.HasPrefix(s, prefix) {
185+
func CutSuffix(s, suffix string) (before string, found bool) {
186+
if !strings.HasSuffix(s, suffix) {
182187
return s, false
183188
}
184-
return s[len(prefix):], true
189+
return s[:len(s)-len(suffix)], true
185190
}
186191

187192
func (d *DiskCache) getKeyFromFileName(fname string) (key, error) {
188193
pair := strings.Split(fname, ".")
189194
if len(pair) != 2 {
190-
return key{}, fmt.Errorf("Expected file name in the form [ac_]hash/size, got %s", fname)
195+
return key{}, fmt.Errorf("expected file name in the form [ac_]hash/size, got %s", fname)
191196
}
192197
size, err := strconv.ParseInt(pair[1], 10, 64)
193198
if err != nil {
194199
return key{}, fmt.Errorf("invalid size in digest %s: %s", fname, err)
195200
}
196-
hash, isAc := CutPrefix(pair[0], "ac_")
201+
hash, isAc := CutSuffix(pair[0], "ac_")
197202
dg, err := digest.New(hash, size)
198203
if err != nil {
199204
return key{}, fmt.Errorf("invalid digest from file name %s: %v", fname, err)
@@ -202,11 +207,11 @@ func (d *DiskCache) getKeyFromFileName(fname string) (key, error) {
202207
}
203208

204209
func (d *DiskCache) getPath(k key) string {
205-
prefix := ""
210+
suffix := ""
206211
if !k.isCas {
207-
prefix = "ac_"
212+
suffix = "_ac"
208213
}
209-
return filepath.Join(d.root, fmt.Sprintf("%s%s.%d", prefix, k.digest.Hash, k.digest.Size))
214+
return filepath.Join(d.root, k.digest.Hash[:2], fmt.Sprintf("%s%s.%d", k.digest.Hash[2:], suffix, k.digest.Size))
210215
}
211216

212217
func (d *DiskCache) StoreCas(dg digest.Digest, path string) error {
@@ -292,6 +297,7 @@ func (d *DiskCache) gc() {
292297
}
293298
atomic.AddInt64(&d.sizeBytes, -size)
294299
it.mu.Lock()
300+
// We only delete the files, and not the prefix directories, because the prefixes are not worth worrying about.
295301
if err := os.Remove(d.getPath(it.key)); err != nil {
296302
log.Errorf("Error removing file: %v", err)
297303
}

0 commit comments

Comments
 (0)