diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 2aac1a1ef7..6f7ec1d807 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -17,6 +17,18 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "vcpu_list",
+    out = "vcpu_list.go",
+    package = "kvm",
+    prefix = "vCPU",
+    template = "//pkg/ilist:generic_list",
+    types = {
+        "Element": "*vCPU",
+        "Linker": "*vCPU",
+    },
+)
+
 config_setting(
     name = "debug_build",
     values = {
@@ -67,6 +79,7 @@ go_library(
         "physical_map_amd64.go",
         "physical_map_arm64.go",
         "seccomp_mmap_unsafe.go",
+        "vcpu_list.go",
         "virtual_map.go",
     ],
     visibility = ["//pkg/sentry:internal"],
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index a89dea1706..d1b3be9dc2 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -71,6 +71,17 @@ type machine struct {
 	// vCPUsByID are the machine vCPUs, can be indexed by the vCPU's ID.
 	vCPUsByID []*vCPU
 
+	// vCPUList is a list of vCPUs, ordered by most-recently-used.
+	// The most recently used vCPUs are at the end of the list.
+	vCPUList vCPUList
+
+	// numRecentVCPUs tracks the number of vCPUs considered recently used.
+	numRecentVCPUs atomicbitops.Int32
+
+	// recentVCPUThreshold is the maximum number of vCPUs to track as
+	// recently used before triggering a reordering of vCPUList.
+	recentVCPUThreshold int32
+
 	// usedVCPUs is the number of vCPUs that have been used from the
 	// vCPUsByID pool.
 	usedVCPUs int
@@ -213,6 +224,9 @@ type vCPU struct {
 
 	// dieState holds state related to vCPU death.
 	dieState dieState
+
+	recentlyUsed atomicbitops.Bool
+	vCPUEntry
 }
 
 type dieState struct {
@@ -241,6 +255,7 @@ func (m *machine) createVCPU(id int) *vCPU {
 	}
 	c.CPU.Init(&m.kernel, c.id, c)
 	m.vCPUsByID[c.id] = c
+	m.vCPUList.PushFront(c)
 
 	// Ensure the signal mask is correct.
 	if err := c.setSignalMask(); err != nil {
@@ -532,6 +547,10 @@ func (m *machine) Get() *vCPU {
 	runtime.UnlockOSThread()
 	m.mu.Lock()
 
+	if m.numRecentVCPUs.Load() > m.recentVCPUThreshold {
+		m.resortRecentlyUsedListLocked()
+	}
+
 	for {
 		runtime.LockOSThread()
 		tid = hosttid.Current()
@@ -557,10 +576,12 @@ func (m *machine) Get() *vCPU {
 		}
 
 		// Scan for an available vCPU.
-		for origTID, c := range m.vCPUsByTID {
+		for c := m.vCPUList.Front(); c != nil; c = c.Next() {
+			origTID := c.tid.Load()
 			if c.state.CompareAndSwap(vCPUReady, vCPUUser) {
 				delete(m.vCPUsByTID, origTID)
 				m.vCPUsByTID[tid] = c
+				c.setRecentlyUsed(true)
 				m.mu.Unlock()
 				c.loadSegments(tid)
 				getVCPUCounter.Increment(&getVCPUAcquisitionUnused)
@@ -569,7 +590,7 @@ func (m *machine) Get() *vCPU {
 		}
 
 		// Scan for something not in user mode.
-		for origTID, c := range m.vCPUsByTID {
+		for c := m.vCPUList.Front(); c != nil; c = c.Next() {
 			if !c.state.CompareAndSwap(vCPUGuest, vCPUGuest|vCPUWaiter) {
 				continue
 			}
@@ -587,8 +608,10 @@ func (m *machine) Get() *vCPU {
 			}
 
 			// Steal the vCPU.
+			origTID := c.tid.Load()
 			delete(m.vCPUsByTID, origTID)
 			m.vCPUsByTID[tid] = c
+			c.setRecentlyUsed(true)
 			m.mu.Unlock()
 			c.loadSegments(tid)
 			getVCPUCounter.Increment(&getVCPUAcquisitionStolen)
@@ -636,6 +659,51 @@ func (m *machine) dropPageTables(pt *pagetables.PageTables) {
 	}
 }
 
+// getMaxVCPU get max vCPU number
+func (m *machine) getMaxVCPU() {
+	maxVCPUs, errno := hostsyscall.RawSyscall(unix.SYS_IOCTL, uintptr(m.fd), KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS)
+	if errno != 0 {
+		m.maxVCPUs = _KVM_NR_VCPUS
+	} else {
+		m.maxVCPUs = int(maxVCPUs)
+	}
+
+	// The goal here is to avoid vCPU contentions for reasonable workloads.
+	// But "reasonable" isn't defined well in this case. Let's say that CPU
+	// overcommit with factor 2 is still acceptable. We allocate a set of
+	// vCPU for each goruntime processor (P) and two sets of vCPUs to run
+	// user code.
+	rCPUs := runtime.GOMAXPROCS(0)
+	if 3*rCPUs < m.maxVCPUs {
+		m.maxVCPUs = 3 * rCPUs
+	}
+	m.recentVCPUThreshold = int32(m.maxVCPUs * 2 / 3)
+}
+
+// resortRecentlyUsedListLocked reorders the m.vCPUList so that the most
+// recently used vCPUs are located at the back. It either reset
+// `vCPU.recentlyUsed` flag for all vCPUs.
+//
+// Precondition: callers must hold m.mu for writing.
+func (m *machine) resortRecentlyUsedListLocked() {
+	var activeList vCPUList
+	cur := m.vCPUList.Front()
+	next := cur.Next()
+	for {
+		if cur.recentlyUsed.Load() {
+			m.vCPUList.Remove(cur)
+			activeList.PushBack(cur)
+			cur.setRecentlyUsed(false)
+		}
+		cur = next
+		if cur == nil {
+			break
+		}
+		next = cur.Next()
+	}
+	m.vCPUList.PushBackList(&activeList)
+}
+
 // lock marks the vCPU as in user mode.
 //
 // This should only be called directly when known to be safe, i.e. when
@@ -643,6 +711,7 @@ func (m *machine) dropPageTables(pt *pagetables.PageTables) {
 //
 //go:nosplit
 func (c *vCPU) lock() {
+	c.setRecentlyUsed(true)
 	atomicbitops.OrUint32(&c.state, vCPUUser)
 }
 
@@ -697,6 +766,17 @@ func (c *vCPU) NotifyInterrupt() {
 // pid is used below in bounce.
 var pid = unix.Getpid()
 
+func (c *vCPU) setRecentlyUsed(v bool) {
+	old := c.recentlyUsed.Swap(v)
+	if v != old {
+		if v {
+			c.machine.numRecentVCPUs.Add(1)
+		} else {
+			c.machine.numRecentVCPUs.Add(-1)
+		}
+	}
+}
+
 // bounce forces a return to the kernel or to host mode.
 //
 // This effectively unwinds the state machine.
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 342013d90b..a4925a45e8 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -21,7 +21,6 @@ import (
 	"fmt"
 	"math/big"
 	"reflect"
-	"runtime"
 
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
@@ -495,26 +494,6 @@ func (m *machine) mapUpperHalf(pageTable *pagetables.PageTables) {
 	}
 }
 
-// getMaxVCPU get max vCPU number
-func (m *machine) getMaxVCPU() {
-	maxVCPUs, errno := hostsyscall.RawSyscall(unix.SYS_IOCTL, uintptr(m.fd), KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS)
-	if errno != 0 {
-		m.maxVCPUs = _KVM_NR_VCPUS
-	} else {
-		m.maxVCPUs = int(maxVCPUs)
-	}
-
-	// The goal here is to avoid vCPU contentions for reasonable workloads.
-	// But "reasonable" isn't defined well in this case. Let's say that CPU
-	// overcommit with factor 2 is still acceptable. We allocate a set of
-	// vCPU for each goruntime processor (P) and two sets of vCPUs to run
-	// user code.
-	rCPUs := runtime.GOMAXPROCS(0)
-	if 3*rCPUs < m.maxVCPUs {
-		m.maxVCPUs = 3 * rCPUs
-	}
-}
-
 func archPhysicalRegions(physicalRegions []physicalRegion) []physicalRegion {
 	return physicalRegions
 }
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index bcc7fb7760..101039bd29 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -19,12 +19,10 @@ package kvm
 
 import (
 	"fmt"
-	"runtime"
 
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/hostarch"
-	"gvisor.dev/gvisor/pkg/hostsyscall"
 	"gvisor.dev/gvisor/pkg/ring0"
 	"gvisor.dev/gvisor/pkg/ring0/pagetables"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
@@ -182,19 +180,3 @@ func (c *vCPU) fault(signal int32, info *linux.SignalInfo) (hostarch.AccessType,
 
 	return accessType, platform.ErrContextSignal
 }
-
-// getMaxVCPU get max vCPU number
-func (m *machine) getMaxVCPU() {
-	rmaxVCPUs := runtime.NumCPU()
-	smaxVCPUs, errno := hostsyscall.RawSyscall(unix.SYS_IOCTL, uintptr(m.fd), KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS)
-	// compare the max vcpu number from runtime and syscall, use smaller one.
-	if errno != 0 {
-		m.maxVCPUs = rmaxVCPUs
-	} else {
-		if rmaxVCPUs < int(smaxVCPUs) {
-			m.maxVCPUs = rmaxVCPUs
-		} else {
-			m.maxVCPUs = int(smaxVCPUs)
-		}
-	}
-}