@@ -97,12 +97,28 @@ type DiskCache struct {
9797 mu sync.Mutex // protects the queue.
9898 store sync.Map // map of keys to qitems.
9999 queue * priorityQueue // keys by last accessed time.
100- sizeBytes int64 // total size.
101100 ctx context.Context
102101 shutdown chan bool
102+ shutdownOnce sync.Once
103103 gcTick uint64
104104 gcReq chan uint64
105- testGcTicks chan uint64
105+ gcDone chan bool
106+ stats * DiskCacheStats
107+ }
108+
109+ type DiskCacheStats struct {
110+ TotalSizeBytes int64
111+ TotalNumFiles int64
112+ NumFilesStored int64
113+ TotalStoredBytes int64
114+ NumFilesGCed int64
115+ TotalGCedSizeBytes int64
116+ NumCacheHits int64
117+ NumCacheMisses int64
118+ TotalCacheHitSizeBytes int64
119+ InitTime time.Duration
120+ TotalGCTime time.Duration
121+ TotalGCDiskOpsTime time.Duration
106122}
107123
108124func New (ctx context.Context , root string , maxCapacityBytes uint64 ) (* DiskCache , error ) {
@@ -115,7 +131,11 @@ func New(ctx context.Context, root string, maxCapacityBytes uint64) (*DiskCache,
115131 },
116132 gcReq : make (chan uint64 , maxConcurrentRequests ),
117133 shutdown : make (chan bool ),
134+ gcDone : make (chan bool ),
135+ stats : & DiskCacheStats {},
118136 }
137+ start := time .Now ()
138+ defer func () { atomic .AddInt64 ((* int64 )(& res .stats .InitTime ), int64 (time .Since (start ))) }()
119139 heap .Init (res .queue )
120140 if err := os .MkdirAll (root , os .ModePerm ); err != nil {
121141 return nil , err
@@ -135,30 +155,31 @@ func New(ctx context.Context, root string, maxCapacityBytes uint64) (*DiskCache,
135155 return filepath .WalkDir (prefixDir , func (path string , d fs.DirEntry , err error ) error {
136156 // We log and continue on all errors, because cache read errors are not critical.
137157 if err != nil {
138- return fmt .Errorf ("error reading cache directory: %v " , err )
158+ return fmt .Errorf ("error reading cache directory: %w " , err )
139159 }
140160 if d .IsDir () {
141161 return nil
142162 }
143163 subdir := filepath .Base (filepath .Dir (path ))
144- k , err := res . getKeyFromFileName (subdir + d .Name ())
164+ k , err := getKeyFromFileName (subdir + d .Name ())
145165 if err != nil {
146- return fmt .Errorf ("error parsing cached file name %s: %v " , path , err )
166+ return fmt .Errorf ("error parsing cached file name %s: %w " , path , err )
147167 }
148- atime , err := getLastAccessTime ( path )
168+ info , err := d . Info ( )
149169 if err != nil {
150- return fmt .Errorf ("error getting last accessed time of %s: %v " , path , err )
170+ return fmt .Errorf ("error getting file info of %s: %w " , path , err )
151171 }
152172 it := & qitem {
153173 key : k ,
154- lat : atime ,
174+ lat : fileInfoToAccessTime ( info ) ,
155175 }
156176 size , err := res .getItemSize (k )
157177 if err != nil {
158- return fmt .Errorf ("error getting file size of %s: %v " , path , err )
178+ return fmt .Errorf ("error getting file size of %s: %w " , path , err )
159179 }
160180 res .store .Store (k , it )
161- atomic .AddInt64 (& res .sizeBytes , size )
181+ atomic .AddInt64 (& res .stats .TotalSizeBytes , size )
182+ atomic .AddInt64 (& res .stats .TotalNumFiles , 1 )
162183 res .mu .Lock ()
163184 heap .Push (res .queue , it )
164185 res .mu .Unlock ()
@@ -180,42 +201,50 @@ func (d *DiskCache) getItemSize(k key) (int64, error) {
180201 fname := d .getPath (k )
181202 info , err := os .Stat (fname )
182203 if err != nil {
183- return 0 , fmt .Errorf ("Error getting info for %s: %v " , fname , err )
204+ return 0 , fmt .Errorf ("error getting info for %s: %w " , fname , err )
184205 }
185206 return info .Size (), nil
186207}
187208
188- // Releases resources and terminates the GC daemon. Should be the last call to the DiskCache.
209+ // Terminates the GC daemon, waiting for it to complete. No further Store* calls to the DiskCache should be made .
189210func (d * DiskCache ) Shutdown () {
190- d .shutdown <- true
191- }
192-
193- func ( d * DiskCache ) TotalSizeBytes () uint64 {
194- return uint64 ( atomic . LoadInt64 ( & d . sizeBytes ) )
211+ d .shutdownOnce . Do ( func () {
212+ d . shutdown <- true
213+ <- d . gcDone
214+ log . Infof ( "DiskCacheStats: %+v" , d . stats )
215+ } )
195216}
196217
197- // This function is defined in https://pkg.go.dev/strings#CutSuffix
198- // It is copy/pasted here as a hack, because I failed to upgrade the *Reclient* repo to the latest Go 1.20.7.
199- func CutSuffix (s , suffix string ) (before string , found bool ) {
200- if ! strings .HasSuffix (s , suffix ) {
201- return s , false
218+ func (d * DiskCache ) GetStats () * DiskCacheStats {
219+ // Return a copy for safety.
220+ return & DiskCacheStats {
221+ TotalSizeBytes : atomic .LoadInt64 (& d .stats .TotalSizeBytes ),
222+ TotalNumFiles : atomic .LoadInt64 (& d .stats .TotalNumFiles ),
223+ NumFilesStored : atomic .LoadInt64 (& d .stats .NumFilesStored ),
224+ TotalStoredBytes : atomic .LoadInt64 (& d .stats .TotalStoredBytes ),
225+ NumFilesGCed : atomic .LoadInt64 (& d .stats .NumFilesGCed ),
226+ TotalGCedSizeBytes : atomic .LoadInt64 (& d .stats .TotalGCedSizeBytes ),
227+ NumCacheHits : atomic .LoadInt64 (& d .stats .NumCacheHits ),
228+ NumCacheMisses : atomic .LoadInt64 (& d .stats .NumCacheMisses ),
229+ TotalCacheHitSizeBytes : atomic .LoadInt64 (& d .stats .TotalCacheHitSizeBytes ),
230+ InitTime : time .Duration (atomic .LoadInt64 ((* int64 )(& d .stats .InitTime ))),
231+ TotalGCTime : time .Duration (atomic .LoadInt64 ((* int64 )(& d .stats .TotalGCTime ))),
202232 }
203- return s [:len (s )- len (suffix )], true
204233}
205234
206- func ( d * DiskCache ) getKeyFromFileName (fname string ) (key , error ) {
235+ func getKeyFromFileName (fname string ) (key , error ) {
207236 pair := strings .Split (fname , "." )
208237 if len (pair ) != 2 {
209- return key {}, fmt .Errorf ("expected file name in the form [ac_]hash/ size, got %s" , fname )
238+ return key {}, fmt .Errorf ("expected file name in the form hash[_ac]. size, got %s" , fname )
210239 }
211240 size , err := strconv .ParseInt (pair [1 ], 10 , 64 )
212241 if err != nil {
213242 return key {}, fmt .Errorf ("invalid size in digest %s: %s" , fname , err )
214243 }
215- hash , isAc := CutSuffix (pair [0 ], "ac_ " )
244+ hash , isAc := strings . CutSuffix (pair [0 ], "_ac " )
216245 dg , err := digest .New (hash , size )
217246 if err != nil {
218- return key {}, fmt .Errorf ("invalid digest from file name %s: %v " , fname , err )
247+ return key {}, fmt .Errorf ("invalid digest from file name %s: %w " , fname , err )
219248 }
220249 return key {digest : dg , isCas : ! isAc }, nil
221250}
@@ -248,7 +277,10 @@ func (d *DiskCache) StoreCas(dg digest.Digest, path string) error {
248277 if err := copyFile (path , d .getPath (it .key ), dg .Size ); err != nil {
249278 return err
250279 }
251- newSize := uint64 (atomic .AddInt64 (& d .sizeBytes , dg .Size ))
280+ newSize := uint64 (atomic .AddInt64 (& d .stats .TotalSizeBytes , dg .Size ))
281+ atomic .AddInt64 (& d .stats .TotalNumFiles , 1 )
282+ atomic .AddInt64 (& d .stats .NumFilesStored , 1 )
283+ atomic .AddInt64 (& d .stats .TotalStoredBytes , dg .Size )
252284 if newSize > d .maxCapacityBytes {
253285 select {
254286 case d .gcReq <- atomic .AddUint64 (& d .gcTick , 1 ):
@@ -281,7 +313,10 @@ func (d *DiskCache) StoreActionCache(dg digest.Digest, ar *repb.ActionResult) er
281313 if err := os .WriteFile (d .getPath (it .key ), bytes , 0644 ); err != nil {
282314 return err
283315 }
284- newSize := uint64 (atomic .AddInt64 (& d .sizeBytes , int64 (size )))
316+ newSize := uint64 (atomic .AddInt64 (& d .stats .TotalSizeBytes , int64 (size )))
317+ atomic .AddInt64 (& d .stats .TotalNumFiles , 1 )
318+ atomic .AddInt64 (& d .stats .NumFilesStored , 1 )
319+ atomic .AddInt64 (& d .stats .TotalStoredBytes , int64 (size ))
285320 if newSize > d .maxCapacityBytes {
286321 select {
287322 case d .gcReq <- atomic .AddUint64 (& d .gcTick , 1 ):
@@ -292,38 +327,40 @@ func (d *DiskCache) StoreActionCache(dg digest.Digest, ar *repb.ActionResult) er
292327}
293328
294329func (d * DiskCache ) gc () {
330+ defer func () { d .gcDone <- true }()
295331 for {
296332 select {
297333 case <- d .shutdown :
298334 return
299335 case <- d .ctx .Done ():
300336 return
301- case t := <- d .gcReq :
337+ case <- d .gcReq :
338+ start := time .Now ()
302339 // Evict old entries until total size is below cap.
303- for uint64 (atomic .LoadInt64 (& d .sizeBytes )) > d .maxCapacityBytes {
340+ for uint64 (atomic .LoadInt64 (& d .stats . TotalSizeBytes )) > d .maxCapacityBytes {
304341 d .mu .Lock ()
305342 it := heap .Pop (d .queue ).(* qitem )
306343 d .mu .Unlock ()
307344 size , err := d .getItemSize (it .key )
308345 if err != nil {
309- log .Errorf ("error getting item size for %v: %v " , it .key , err )
346+ log .Errorf ("error getting item size for %v: %w " , it .key , err )
310347 size = 0
311348 }
312- atomic .AddInt64 (& d .sizeBytes , - size )
349+ atomic .AddInt64 (& d .stats .TotalSizeBytes , - size )
350+ atomic .AddInt64 (& d .stats .TotalNumFiles , - 1 )
351+ atomic .AddInt64 (& d .stats .TotalGCedSizeBytes , size )
352+ atomic .AddInt64 (& d .stats .NumFilesGCed , 1 )
313353 it .mu .Lock ()
354+ diskOpsStart := time .Now ()
314355 // We only delete the files, and not the prefix directories, because the prefixes are not worth worrying about.
315356 if err := os .Remove (d .getPath (it .key )); err != nil {
316- log .Errorf ("Error removing file: %v " , err )
357+ log .Errorf ("Error removing file: %w " , err )
317358 }
359+ atomic .AddInt64 ((* int64 )(& d .stats .TotalGCDiskOpsTime ), int64 (time .Since (diskOpsStart )))
318360 d .store .Delete (it .key )
319361 it .mu .Unlock ()
320362 }
321- if d .testGcTicks != nil {
322- select {
323- case d .testGcTicks <- t :
324- default :
325- }
326- }
363+ atomic .AddInt64 ((* int64 )(& d .stats .TotalGCTime ), int64 (time .Since (start )))
327364 }
328365 }
329366}
@@ -363,6 +400,7 @@ func (d *DiskCache) LoadCas(dg digest.Digest, path string) bool {
363400 k := key {digest : dg , isCas : true }
364401 iUntyped , loaded := d .store .Load (k )
365402 if ! loaded {
403+ atomic .AddInt64 (& d .stats .NumCacheMisses , 1 )
366404 return false
367405 }
368406 it := iUntyped .(* qitem )
@@ -371,58 +409,59 @@ func (d *DiskCache) LoadCas(dg digest.Digest, path string) bool {
371409 it .mu .RUnlock ()
372410 if err != nil {
373411 // It is not possible to prevent a race with GC; hence, we return false on copy errors.
412+ atomic .AddInt64 (& d .stats .NumCacheMisses , 1 )
374413 return false
375414 }
376415
377416 d .mu .Lock ()
378417 d .queue .Bump (it )
379418 d .mu .Unlock ()
419+ atomic .AddInt64 (& d .stats .NumCacheHits , 1 )
420+ atomic .AddInt64 (& d .stats .TotalCacheHitSizeBytes , dg .Size )
380421 return true
381422}
382423
383424func (d * DiskCache ) LoadActionCache (dg digest.Digest ) (ar * repb.ActionResult , loaded bool ) {
384425 k := key {digest : dg , isCas : false }
385426 iUntyped , loaded := d .store .Load (k )
386427 if ! loaded {
428+ atomic .AddInt64 (& d .stats .NumCacheMisses , 1 )
387429 return nil , false
388430 }
389431 it := iUntyped .(* qitem )
390432 it .mu .RLock ()
391433 ar = & repb.ActionResult {}
392- if err := d .loadActionResult (k , ar ); err != nil {
434+ size , err := d .loadActionResult (k , ar )
435+ if err != nil {
393436 // It is not possible to prevent a race with GC; hence, we return false on load errors.
394437 it .mu .RUnlock ()
438+ atomic .AddInt64 (& d .stats .NumCacheMisses , 1 )
395439 return nil , false
396440 }
397441 it .mu .RUnlock ()
398442
399443 d .mu .Lock ()
400444 d .queue .Bump (it )
401445 d .mu .Unlock ()
446+ atomic .AddInt64 (& d .stats .NumCacheHits , 1 )
447+ atomic .AddInt64 (& d .stats .TotalCacheHitSizeBytes , int64 (size ))
402448 return ar , true
403449}
404450
405- func (d * DiskCache ) loadActionResult (k key , ar * repb.ActionResult ) error {
451+ func (d * DiskCache ) loadActionResult (k key , ar * repb.ActionResult ) ( int , error ) {
406452 bytes , err := os .ReadFile (d .getPath (k ))
407453 if err != nil {
408- return err
454+ return 0 , err
409455 }
456+ n := len (bytes )
410457 // Required sanity check: sometimes the read pretends to succeed, but doesn't, if
411458 // the file is being concurrently deleted. Empty ActionResult is advised against in
412459 // the RE-API: https://github.com/bazelbuild/remote-apis/blob/main/build/bazel/remote/execution/v2/remote_execution.proto#L1052
413- if len ( bytes ) == 0 {
414- return fmt .Errorf ("read empty ActionResult for %v" , k .digest )
460+ if n == 0 {
461+ return n , fmt .Errorf ("read empty ActionResult for %v" , k .digest )
415462 }
416463 if err := proto .Unmarshal (bytes , ar ); err != nil {
417- return fmt .Errorf ("error unmarshalling %v as ActionResult: %v" , bytes , err )
418- }
419- return nil
420- }
421-
422- func getLastAccessTime (path string ) (time.Time , error ) {
423- info , err := os .Stat (path )
424- if err != nil {
425- return time.Time {}, err
464+ return n , fmt .Errorf ("error unmarshalling %v as ActionResult: %w" , bytes , err )
426465 }
427- return FileInfoToAccessTime ( info ) , nil
466+ return n , nil
428467}
0 commit comments