Skip to content

Commit c16d3fd

Browse files
nixprimegvisor-bot
authored andcommitted
pgalloc: log async page loading progress and info about awaited loads
PiperOrigin-RevId: 738555942
1 parent 97820ce commit c16d3fd

File tree

2 files changed

+110
-13
lines changed

2 files changed

+110
-13
lines changed

pkg/sentry/pgalloc/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ go_library(
151151
"//pkg/context",
152152
"//pkg/errors/linuxerr",
153153
"//pkg/fd",
154+
"//pkg/gohacks",
154155
"//pkg/goid",
155156
"//pkg/hostarch",
156157
"//pkg/log",

pkg/sentry/pgalloc/save_restore.go

Lines changed: 109 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"gvisor.dev/gvisor/pkg/bitmap"
3030
"gvisor.dev/gvisor/pkg/errors/linuxerr"
3131
"gvisor.dev/gvisor/pkg/fd"
32+
"gvisor.dev/gvisor/pkg/gohacks"
3233
"gvisor.dev/gvisor/pkg/goid"
3334
"gvisor.dev/gvisor/pkg/hostarch"
3435
"gvisor.dev/gvisor/pkg/log"
@@ -362,6 +363,9 @@ func (f *MemoryFile) LoadFrom(ctx context.Context, r io.Reader, opts *LoadOpts)
362363
)
363364
if opts.PagesFile != nil {
364365
aplg = &aplGoroutine{
366+
apl: aplShared{
367+
timeStartWaiters: math.MaxInt64,
368+
},
365369
f: f,
366370
q: aio.NewGoQueue(aplQueueCapacity),
367371
doneCallback: opts.OnAsyncPageLoadDone,
@@ -451,8 +455,8 @@ func (f *MemoryFile) LoadFrom(ctx context.Context, r io.Reader, opts *LoadOpts)
451455
return nil
452456
}
453457

454-
// aplShared holds asynchronous page loading state that is shared with
455-
// users of the MemoryFile.
458+
// aplShared holds asynchronous page loading state that is shared with other
459+
// goroutines.
456460
type aplShared struct {
457461
// minUnloaded is the MemoryFile offset of the first unloaded byte.
458462
minUnloaded atomicbitops.Uint64
@@ -471,6 +475,29 @@ type aplShared struct {
471475
// priority contains possibly-unstarted ranges in unloaded with at least
472476
// one waiter.
473477
priority ringdeque.Deque[memmap.FileRange]
478+
479+
// numWaiters is the current number of waiting waiters.
480+
numWaiters int
481+
482+
// totalWaiters is the number of waiters that have ever waited for pages
483+
// from this MemoryFile.
484+
totalWaiters int
485+
486+
// timeStartWaiters was the value of gohacks.Nanotime() when numWaiters
487+
// most recently transitioned from 0 to 1. If numWaiters is 0,
488+
// timeStartWaiters is MaxInt64.
489+
timeStartWaiters int64
490+
491+
// nsWaitedOne is the duration for which at least one waiter was waiting
492+
// for a load. nsWaitedTotal is the duration for which waiters were waiting
493+
// for loads, summed across all waiters. bytesWaited is the number of bytes
494+
// for which at least one waiter waited.
495+
durWaitedOne time.Duration
496+
durWaitedTotal time.Duration
497+
bytesWaited uint64
498+
499+
// bytesLoaded is the number of bytes that have been loaded so far.
500+
bytesLoaded uint64
474501
}
475502

476503
// aplUnloadedInfo is the value type of aplShared.unloaded.
@@ -488,11 +515,17 @@ type aplUnloadedInfo struct {
488515

489516
type aplWaiter struct {
490517
// wakeup is used by a caller of MemoryFile.awaitLoad() to block until all
491-
// pages in fr are loaded.
518+
// pages in fr are loaded. wakeup is internally synchronized. fr is
519+
// immutable after initialization.
492520
wakeup syncevent.Waiter
493521
fr memmap.FileRange
494522

523+
// timeStart was the value of gohacks.Nanotime() when this waiter started
524+
// waiting. timeStart is immutable after initialization.
525+
timeStart int64
526+
495527
// pending is the number of unloaded bytes that this waiter is waiting for.
528+
// pending is protected by aplShared.mu.
496529
pending uint64
497530
}
498531

@@ -552,27 +585,36 @@ func (apl *aplShared) awaitLoad(f *MemoryFile, fr memmap.FileRange) error {
552585
apl.unloaded.MutateRange(fr, func(ulseg aplUnloadedIterator) bool {
553586
ul := ulseg.ValuePtr()
554587
ulFR := ulseg.Range()
588+
ullen := ulFR.Length()
555589
if len(ul.waiters) == 0 && !ul.started {
556590
apl.priority.PushBack(ulFR)
591+
apl.bytesWaited += ullen
557592
if logAwaitedLoads {
558593
log.Infof("MemoryFile(%p): prioritize %v", f, ulFR)
559594
}
560595
}
561596
ul.waiters = append(ul.waiters, w)
562-
w.pending += ulFR.Length()
597+
w.pending += ullen
563598
return true
564599
})
565600
pending := w.pending != 0
601+
if pending {
602+
w.timeStart = gohacks.Nanotime()
603+
if apl.numWaiters == 0 {
604+
apl.timeStartWaiters = w.timeStart
605+
}
606+
apl.numWaiters++
607+
apl.totalWaiters++
608+
}
566609
apl.mu.Unlock()
567610
if pending {
568-
var startWaitTime time.Time
569611
if logAwaitedLoads {
570-
startWaitTime = time.Now()
571612
log.Infof("MemoryFile(%p): awaitLoad goid %d start: %v (%d bytes)", f, goid.Get(), fr, fr.Length())
572613
}
573614
w.wakeup.WaitAndAckAll()
574615
if logAwaitedLoads {
575-
log.Infof("MemoryFile(%p): awaitLoad goid %d waited %v: %v (%d bytes)", f, goid.Get(), time.Since(startWaitTime), fr, fr.Length())
616+
waitNS := gohacks.Nanotime() - w.timeStart
617+
log.Infof("MemoryFile(%p): awaitLoad goid %d waited %v: %v (%d bytes)", f, goid.Get(), time.Duration(waitNS), fr, fr.Length())
576618
}
577619
}
578620
return apl.err
@@ -854,9 +896,47 @@ func (g *aplGoroutine) main() {
854896
}
855897
defer dropDelayedDecRefs()
856898

857-
timeStart := time.Now()
858-
loadedBytes := uint64(0)
859-
log.Debugf("MemoryFile(%p): async page loading started", f)
899+
timeStart := gohacks.Nanotime()
900+
if log.IsLogging(log.Debug) {
901+
log.Debugf("MemoryFile(%p): async page loading started", f)
902+
progressTicker := time.NewTicker(5 * time.Second)
903+
progressStopC := make(chan struct{})
904+
defer func() { close(progressStopC) }()
905+
go func() {
906+
timeLast := timeStart
907+
bytesLoadedLast := uint64(0)
908+
for {
909+
select {
910+
case <-progressStopC:
911+
progressTicker.Stop()
912+
return
913+
case <-progressTicker.C:
914+
// Take a snapshot of our progress.
915+
apl.mu.Lock()
916+
totalWaiters := apl.totalWaiters
917+
timeStartWaiters := apl.timeStartWaiters
918+
durWaitedOne := apl.durWaitedOne
919+
durWaitedTotal := apl.durWaitedTotal
920+
bytesWaited := apl.bytesWaited
921+
bytesLoaded := apl.bytesLoaded
922+
apl.mu.Unlock()
923+
now := gohacks.Nanotime()
924+
durTotal := time.Duration(now - timeStart)
925+
// apl can have at least one waiter for a very long time
926+
// due to new waiters enqueueing before old ones are
927+
// served; avoid apparent jumps in durWaitedOne.
928+
if timeStartWaiters < now {
929+
durWaitedOne += time.Duration(now - timeStartWaiters)
930+
}
931+
durDelta := time.Duration(now - timeLast)
932+
bytesLoadedDelta := bytesLoaded - bytesLoadedLast
933+
log.Infof("MemoryFile(%p): async page loading in progress for %s (%d bytes, %.3f MB/s); since last update %s ago: %d bytes, %.3f MB/s; %d waiters waited %v~%v for %d bytes", f, durTotal.Round(time.Millisecond), bytesLoaded, float64(bytesLoaded)*1e-6/durTotal.Seconds(), durDelta.Round(time.Millisecond), bytesLoadedDelta, float64(bytesLoadedDelta)*1e-6/durDelta.Seconds(), totalWaiters, durWaitedOne.Round(time.Millisecond), durWaitedTotal.Round(time.Millisecond), bytesWaited)
934+
timeLast = now
935+
bytesLoadedLast = bytesLoaded
936+
}
937+
}
938+
}()
939+
}
860940
for {
861941
// Enqueue as many reads as possible.
862942
if !g.canEnqueue() {
@@ -956,8 +1036,11 @@ func (g *aplGoroutine) main() {
9561036
// so async page loading has completed successfully.
9571037
apl.minUnloaded.Store(math.MaxUint64)
9581038
f.asyncPageLoad.Store(nil)
959-
dur := time.Since(timeStart)
960-
log.Infof("MemoryFile(%p): async page loading completed in %s (%d bytes, %f bytes/second)", f, dur, loadedBytes, float64(loadedBytes)/dur.Seconds())
1039+
timeFinish := gohacks.Nanotime()
1040+
durTotal := time.Duration(timeFinish - timeStart)
1041+
apl.mu.Lock()
1042+
log.Infof("MemoryFile(%p): async page loading completed in %s (%d bytes, %.3f MB/s); %d waiters waited %v~%v for %d bytes", f, durTotal.Round(time.Millisecond), apl.bytesLoaded, float64(apl.bytesLoaded)*1e-6/durTotal.Seconds(), apl.totalWaiters, apl.durWaitedOne.Round(time.Millisecond), apl.durWaitedTotal.Round(time.Millisecond), apl.bytesWaited)
1043+
apl.mu.Unlock()
9611044
return
9621045
}
9631046
panic(fmt.Sprintf("unknown events in lfStatus: %#x", ev))
@@ -1001,6 +1084,7 @@ func (g *aplGoroutine) main() {
10011084
return
10021085
}
10031086
haveWaiters := false
1087+
now := int64(0)
10041088
for _, fr := range op.frs() {
10051089
// All pages in fr have been started and were split around fr
10061090
// when they were started (above), and apl.unloaded never
@@ -1009,7 +1093,7 @@ func (g *aplGoroutine) main() {
10091093
for ulseg := apl.unloaded.FindSegment(fr.Start); ulseg.Ok() && ulseg.Start() < fr.End; ulseg = apl.unloaded.Remove(ulseg).NextSegment() {
10101094
ul := ulseg.ValuePtr()
10111095
ullen := ulseg.Range().Length()
1012-
loadedBytes += ullen
1096+
apl.bytesLoaded += ullen
10131097
if !ul.started {
10141098
panic(fmt.Sprintf("completion of %v includes pages %v that were never started", fr, ulseg.Range()))
10151099
}
@@ -1018,6 +1102,18 @@ func (g *aplGoroutine) main() {
10181102
w.pending -= ullen
10191103
if w.pending == 0 {
10201104
wakeups = append(wakeups, w)
1105+
if now == 0 {
1106+
now = gohacks.Nanotime()
1107+
}
1108+
// This definition of "wait time" skips the time
1109+
// taken for w to wake up (bad), but avoids having
1110+
// to lock apl.mu again in apl.awaitLoad() (good).
1111+
apl.durWaitedTotal += time.Duration(now - w.timeStart)
1112+
apl.numWaiters--
1113+
if apl.numWaiters == 0 {
1114+
apl.durWaitedOne += time.Duration(now - apl.timeStartWaiters)
1115+
apl.timeStartWaiters = math.MaxInt64
1116+
}
10211117
}
10221118
}
10231119
}

0 commit comments

Comments
 (0)