@@ -71,6 +71,17 @@ type machine struct {
71
71
// vCPUsByID are the machine vCPUs, can be indexed by the vCPU's ID.
72
72
vCPUsByID []* vCPU
73
73
74
+ // vCPUList is a list of vCPUs, ordered by most-recently-used.
75
+ // The most recently used vCPUs are at the end of the list.
76
+ vCPUList vCPUList
77
+
78
+ // numRecentVCPUs tracks the number of vCPUs considered recently used.
79
+ numRecentVCPUs atomicbitops.Int32
80
+
81
+ // recentVCPUThreshold is the maximum number of vCPUs to track as
82
+ // recently used before triggering a reordering of vCPUList.
83
+ recentVCPUThreshold int32
84
+
74
85
// usedVCPUs is the number of vCPUs that have been used from the
75
86
// vCPUsByID pool.
76
87
usedVCPUs int
@@ -213,6 +224,9 @@ type vCPU struct {
213
224
214
225
// dieState holds state related to vCPU death.
215
226
dieState dieState
227
+
228
+ recentlyUsed atomicbitops.Bool
229
+ vCPUEntry
216
230
}
217
231
218
232
type dieState struct {
@@ -241,6 +255,7 @@ func (m *machine) createVCPU(id int) *vCPU {
241
255
}
242
256
c .CPU .Init (& m .kernel , c .id , c )
243
257
m .vCPUsByID [c .id ] = c
258
+ m .vCPUList .PushFront (c )
244
259
245
260
// Ensure the signal mask is correct.
246
261
if err := c .setSignalMask (); err != nil {
@@ -532,6 +547,10 @@ func (m *machine) Get() *vCPU {
532
547
runtime .UnlockOSThread ()
533
548
m .mu .Lock ()
534
549
550
+ if m .numRecentVCPUs .Load () > m .recentVCPUThreshold {
551
+ m .resortRecentlyUsedListLocked ()
552
+ }
553
+
535
554
for {
536
555
runtime .LockOSThread ()
537
556
tid = hosttid .Current ()
@@ -557,10 +576,12 @@ func (m *machine) Get() *vCPU {
557
576
}
558
577
559
578
// Scan for an available vCPU.
560
- for origTID , c := range m .vCPUsByTID {
579
+ for c := m .vCPUList .Front (); c != nil ; c = c .Next () {
580
+ origTID := c .tid .Load ()
561
581
if c .state .CompareAndSwap (vCPUReady , vCPUUser ) {
562
582
delete (m .vCPUsByTID , origTID )
563
583
m .vCPUsByTID [tid ] = c
584
+ c .setRecentlyUsed (true )
564
585
m .mu .Unlock ()
565
586
c .loadSegments (tid )
566
587
getVCPUCounter .Increment (& getVCPUAcquisitionUnused )
@@ -569,7 +590,7 @@ func (m *machine) Get() *vCPU {
569
590
}
570
591
571
592
// Scan for something not in user mode.
572
- for origTID , c := range m . vCPUsByTID {
593
+ for c := m . vCPUList . Front (); c != nil ; c = c . Next () {
573
594
if ! c .state .CompareAndSwap (vCPUGuest , vCPUGuest | vCPUWaiter ) {
574
595
continue
575
596
}
@@ -587,8 +608,10 @@ func (m *machine) Get() *vCPU {
587
608
}
588
609
589
610
// Steal the vCPU.
611
+ origTID := c .tid .Load ()
590
612
delete (m .vCPUsByTID , origTID )
591
613
m .vCPUsByTID [tid ] = c
614
+ c .setRecentlyUsed (true )
592
615
m .mu .Unlock ()
593
616
c .loadSegments (tid )
594
617
getVCPUCounter .Increment (& getVCPUAcquisitionStolen )
@@ -636,13 +659,59 @@ func (m *machine) dropPageTables(pt *pagetables.PageTables) {
636
659
}
637
660
}
638
661
662
+ // getMaxVCPU get max vCPU number
663
+ func (m * machine ) getMaxVCPU () {
664
+ maxVCPUs , errno := hostsyscall .RawSyscall (unix .SYS_IOCTL , uintptr (m .fd ), KVM_CHECK_EXTENSION , _KVM_CAP_MAX_VCPUS )
665
+ if errno != 0 {
666
+ m .maxVCPUs = _KVM_NR_VCPUS
667
+ } else {
668
+ m .maxVCPUs = int (maxVCPUs )
669
+ }
670
+
671
+ // The goal here is to avoid vCPU contentions for reasonable workloads.
672
+ // But "reasonable" isn't defined well in this case. Let's say that CPU
673
+ // overcommit with factor 2 is still acceptable. We allocate a set of
674
+ // vCPU for each goruntime processor (P) and two sets of vCPUs to run
675
+ // user code.
676
+ rCPUs := runtime .GOMAXPROCS (0 )
677
+ if 3 * rCPUs < m .maxVCPUs {
678
+ m .maxVCPUs = 3 * rCPUs
679
+ }
680
+ m .recentVCPUThreshold = int32 (m .maxVCPUs * 2 / 3 )
681
+ }
682
+
683
+ // resortRecentlyUsedListLocked reorders the m.vCPUList so that the most
684
+ // recently used vCPUs are located at the back. It either reset
685
+ // `vCPU.recentlyUsed` flag for all vCPUs.
686
+ //
687
+ // Precondition: callers must hold m.mu for writing.
688
+ func (m * machine ) resortRecentlyUsedListLocked () {
689
+ var activeList vCPUList
690
+ cur := m .vCPUList .Front ()
691
+ next := cur .Next ()
692
+ for {
693
+ if cur .recentlyUsed .Load () {
694
+ m .vCPUList .Remove (cur )
695
+ activeList .PushBack (cur )
696
+ cur .setRecentlyUsed (false )
697
+ }
698
+ cur = next
699
+ if cur == nil {
700
+ break
701
+ }
702
+ next = cur .Next ()
703
+ }
704
+ m .vCPUList .PushBackList (& activeList )
705
+ }
706
+
639
707
// lock marks the vCPU as in user mode.
640
708
//
641
709
// This should only be called directly when known to be safe, i.e. when
642
710
// the vCPU is owned by the current TID with no chance of theft.
643
711
//
644
712
//go:nosplit
645
713
func (c * vCPU ) lock () {
714
+ c .setRecentlyUsed (true )
646
715
atomicbitops .OrUint32 (& c .state , vCPUUser )
647
716
}
648
717
@@ -697,6 +766,17 @@ func (c *vCPU) NotifyInterrupt() {
697
766
// pid is used below in bounce.
698
767
var pid = unix .Getpid ()
699
768
769
+ func (c * vCPU ) setRecentlyUsed (v bool ) {
770
+ old := c .recentlyUsed .Swap (v )
771
+ if v != old {
772
+ if v {
773
+ c .machine .numRecentVCPUs .Add (1 )
774
+ } else {
775
+ c .machine .numRecentVCPUs .Add (- 1 )
776
+ }
777
+ }
778
+ }
779
+
700
780
// bounce forces a return to the kernel or to host mode.
701
781
//
702
782
// This effectively unwinds the state machine.
0 commit comments