Skip to content

Commit 2590c1d

Browse files
authored
Merge pull request #988 from luomingmeng/dev/support-l3cache-package-align
support l3cache package align
2 parents 839c3e8 + 1cb56e1 commit 2590c1d

File tree

12 files changed

+1927
-76
lines changed

12 files changed

+1927
-76
lines changed

pkg/agent/qrm-plugins/commonstate/pool.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ func IsSystemPool(poolName string) bool {
6464
return strings.HasPrefix(poolName, PoolNamePrefixSystem)
6565
}
6666

67+
func IsShareNUMABindingPool(poolName string) bool {
68+
return strings.Contains(poolName, NUMAPoolInfix)
69+
}
70+
6771
func GetPoolType(poolName string) string {
6872
if IsIsolationPool(poolName) {
6973
return PoolNamePrefixIsolation

pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator/cpu_assignment.go

Lines changed: 153 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,95 @@ func (a *cpuAccumulator) sort(ids []int, getCPUs func(ids ...int) machine.CPUSet
141141
})
142142
}
143143

144+
// getBestMatchCPUsNeededL3Cache returns the L3 cache ID that best matches the number of CPUs needed.
145+
// It directly selects the L3 cache with the closest match to the required number of CPUs,
146+
// preferring caches with CPU count equal to or slightly greater than the requirement.
147+
func (a *cpuAccumulator) getBestMatchCPUsNeededL3Cache() (int, bool) {
148+
l3Caches := a.cpuDetails.L3Caches().ToSliceInt()
149+
if len(l3Caches) == 0 {
150+
return 0, false
151+
}
152+
153+
var bestL3CacheID int
154+
bestMatchFound := false
155+
var bestMatchDiff int = -1 // -1 indicates no match found yet
156+
157+
for _, l3CacheID := range l3Caches {
158+
cpusInL3Cache := a.cpuDetails.CPUsInL3Caches(l3CacheID)
159+
cpuCount := cpusInL3Cache.Size()
160+
161+
// Exact match - return immediately
162+
if cpuCount == a.numCPUsNeeded {
163+
return l3CacheID, true
164+
}
165+
166+
// For caches with more CPUs than needed, prefer the one with the smallest excess
167+
if cpuCount > a.numCPUsNeeded {
168+
diff := cpuCount - a.numCPUsNeeded
169+
if !bestMatchFound || diff < bestMatchDiff {
170+
bestL3CacheID = l3CacheID
171+
bestMatchDiff = diff
172+
bestMatchFound = true
173+
}
174+
}
175+
}
176+
177+
// If we found a cache with more CPUs than needed, return it
178+
if bestMatchFound {
179+
return bestL3CacheID, true
180+
}
181+
182+
// If no cache with more CPUs was found, find the one with the most CPUs
183+
// (closest match when all caches have fewer CPUs than needed)
184+
for _, l3CacheID := range l3Caches {
185+
cpusInL3Cache := a.cpuDetails.CPUsInL3Caches(l3CacheID)
186+
cpuCount := cpusInL3Cache.Size()
187+
188+
if !bestMatchFound || cpuCount > bestMatchDiff {
189+
bestL3CacheID = l3CacheID
190+
bestMatchDiff = cpuCount
191+
bestMatchFound = true
192+
}
193+
}
194+
195+
return bestL3CacheID, bestMatchFound
196+
}
197+
198+
// tryAlignL3Caches handles remaining CPU allocation with L3 cache topology awareness.
199+
//
200+
// This method implements fine-grained CPU allocation based on L3 cache topology.
201+
// When the requested CPU count doesn't align with complete L3 cache sizes,
202+
// it intelligently selects the most suitable L3 cache to minimize cache contention
203+
// and maximize memory locality for the workload.
204+
//
205+
// Algorithm:
206+
// 1. Directly selects the L3 cache that best matches the remaining CPU requirement
207+
// 2. If remaining need >= cache size: allocate entire cache and recurse
208+
// 3. If remaining need < cache size: restrict allocation to this cache only
209+
func (a *cpuAccumulator) tryAlignL3Caches() {
210+
l3Cache, found := a.getBestMatchCPUsNeededL3Cache()
211+
if !found {
212+
return
213+
}
214+
215+
cpusInL3Cache := a.cpuDetails.CPUsInL3Caches(l3Cache)
216+
if a.numCPUsNeeded >= cpusInL3Cache.Size() {
217+
// Cache is smaller than remaining need - take entire cache for efficiency
218+
klog.V(4).InfoS("tryAlignL3Caches: claiming entire L3 cache (partial)", "l3Cache", l3Cache, "cacheSize", cpusInL3Cache.Size(), "remainingNeed", a.numCPUsNeeded)
219+
a.take(cpusInL3Cache)
220+
if a.isSatisfied() {
221+
return
222+
}
223+
// Continue with remaining allocation from other caches
224+
a.tryAlignL3Caches()
225+
} else {
226+
// Cache is larger than remaining need - restrict to this cache for optimal locality
227+
// This ensures all allocated CPUs share the same L3 cache, minimizing memory latency
228+
klog.V(4).InfoS("tryAlignL3Caches: restricting allocation to L3 cache", "l3Cache", l3Cache, "cacheSize", cpusInL3Cache.Size(), "remainingNeed", a.numCPUsNeeded)
229+
a.cpuDetails = a.cpuDetails.KeepOnly(cpusInL3Cache)
230+
}
231+
}
232+
144233
// Sort all sockets with free CPUs using the sort() algorithm defined above.
145234
func (a *cpuAccumulator) sortAvailableSockets() []int {
146235
sockets := a.cpuDetails.Sockets().ToSliceNoSortInt()
@@ -224,42 +313,94 @@ func (a *cpuAccumulator) isFailed() bool {
224313
return a.numCPUsNeeded > a.cpuDetails.CPUs().Size()
225314
}
226315

227-
// TakeByTopology tries to allocate those required cpus in the same socket or cores
316+
// TakeByTopology implements a topology-aware CPU allocation strategy that prioritizes
317+
// hardware locality and cache efficiency for optimal workload performance.
318+
//
319+
// This function implements a multi-tier allocation strategy designed to minimize
320+
// cross-socket communication and maximize cache utilization. The allocation follows
321+
// a hierarchical approach from largest to smallest topology units.
322+
//
323+
// Parameters:
324+
// - info: Machine topology information including NUMA, socket, core, and cache hierarchy
325+
// - availableCPUs: Set of CPUs available for allocation
326+
// - cpuRequirement: Number of CPUs needed for the workload
327+
// - alignByL3Caches: Whether to consider L3 cache topology in allocation decisions
328+
//
329+
// Returns:
330+
// - CPUSet: The allocated set of CPUs with optimal topology placement
331+
// - error: Error if allocation fails due to insufficient resources
332+
//
333+
// Allocation Strategy (Topology-Aware Best-Fit):
334+
//
335+
// Phase 1: Socket-Level Allocation (Highest Locality)
336+
// - Attempts to allocate entire CPU sockets when the requirement matches or exceeds socket size
337+
// - Provides maximum memory bandwidth and minimal cross-socket latency
338+
//
339+
// Phase 2: L3 Cache-Aware Allocation (Conditional)
340+
// - Activated when alignByL3Caches is true
341+
// - Prioritizes allocation within shared L3 cache domains to minimize cache contention
342+
// - Uses tryAlignL3Caches() for intelligent cache-aligned distribution
343+
//
344+
// Phase 3: Core-Level Allocation (Medium Locality)
345+
// - Allocates complete CPU cores to avoid hyperthreading contention
346+
// - Preferred for workloads sensitive to thread interference
347+
//
348+
// Phase 4: Thread-Level Allocation (Fine-Grained)
349+
// - Allocates individual hyperthreads from partially utilized cores
350+
// - Prefers cores on sockets already allocated to maintain NUMA affinity
228351
func TakeByTopology(info *machine.KatalystMachineInfo, availableCPUs machine.CPUSet,
229-
cpuRequirement int,
352+
cpuRequirement int, alignByL3Caches bool,
230353
) (machine.CPUSet, error) {
354+
// Initialize accumulator with topology-aware state
231355
acc := newCPUAccumulator(info, availableCPUs, cpuRequirement)
356+
357+
// Fast-path: Handle edge cases immediately
232358
if acc.isSatisfied() {
359+
// Zero CPU requirement - return empty set immediately
233360
return acc.result.Clone(), nil
234361
}
235362
if acc.isFailed() {
236-
return machine.NewCPUSet(), fmt.Errorf("not enough cpus available to satisfy request")
363+
// Insufficient resources - fail fast with descriptive error
364+
return machine.NewCPUSet(), fmt.Errorf("insufficient CPUs: requested %d, available %d",
365+
cpuRequirement, availableCPUs.Size())
237366
}
238367

239-
// Algorithm: topology-aware best-fit
240-
// 1. Acquire whole sockets, if available and the container requires at
241-
// least a socket's-worth of CPUs.
368+
// Phase 1: Socket-level allocation for maximum locality
369+
// This phase attempts to allocate entire CPU sockets when beneficial
242370
acc.takeFullSockets()
243371
if acc.isSatisfied() {
372+
klog.V(4).InfoS("TakeByTopology: allocated at socket level", "allocated", acc.result.Size())
244373
return acc.result.Clone(), nil
245374
}
246375

247-
// 2. Acquire whole cores, if available and the container requires at least
248-
// a core's-worth of CPUs.
376+
// Phase 2: L3 cache topology optimization (if enabled)
377+
// This phase considers cache topology to minimize memory latency
378+
if alignByL3Caches {
379+
acc.tryAlignL3Caches()
380+
if acc.isSatisfied() {
381+
klog.V(4).InfoS("TakeByTopology: allocated with L3 cache alignment", "allocated", acc.result.Size())
382+
return acc.result.Clone(), nil
383+
}
384+
}
385+
386+
// Phase 3: Core-level allocation to avoid HT contention
387+
// Allocates complete cores for workloads sensitive to thread interference
249388
acc.takeFullCores()
250389
if acc.isSatisfied() {
390+
klog.V(4).InfoS("TakeByTopology: allocated at core level", "allocated", acc.result.Size())
251391
return acc.result.Clone(), nil
252392
}
253393

254-
// 3. Acquire single threads, preferring to fill partially-allocated cores
255-
// on the same sockets as the whole cores we have already taken in this
256-
// allocation.
394+
// Phase 4: Thread-level allocation for remaining needs
395+
// Allocates individual threads from partially utilized cores
257396
acc.takeRemainingCPUs()
258397
if acc.isSatisfied() {
398+
klog.V(4).InfoS("TakeByTopology: allocated at thread level", "allocated", acc.result.Size())
259399
return acc.result.Clone(), nil
260400
}
261401

262-
return machine.NewCPUSet(), fmt.Errorf("failed to allocate cpus")
402+
// Exhaustive allocation failed - no combination satisfies requirement
403+
return machine.NewCPUSet(), fmt.Errorf("topology-aware allocation failed: requested %d CPUs, exhausted all allocation strategies", cpuRequirement)
263404
}
264405

265406
// TakeByNUMABalance tries to make the allocated cpu spread on different

0 commit comments

Comments
 (0)