@@ -548,13 +548,19 @@ func (p *DynamicPolicy) applyCgroupConfigs(resp *advisorapi.ListAndWatchResponse
548548 return nil
549549}
550550
551- // generateBlockCPUSet generates BlockCPUSet from cpu-advisor response
552- // and the logic contains three main steps
553- // 1. handle blocks for static pools
554- // 2. handle blocks with specified NUMA ids (probably be blocks for
555- // numa_binding dedicated_cores containers and reclaimed_cores containers colocated with them)
556- // 3. handle blocks without specified NUMA id (probably be blocks for
557- // not numa_binding dedicated_cores containers and pools of shared_cores and reclaimed_cores containers)
551+ // generateBlockCPUSet generates BlockCPUSet from cpu-advisor response.
552+ // The logic contains the following main steps:
553+ // 1. Handle blocks for static pools and forbidden pools
554+ // 2. Handle blocks with specified NUMA IDs (for NUMA-bound dedicated_cores containers
555+ // and reclaimed_cores containers colocated with them)
556+ // 3. Handle blocks without specified NUMA ID (for non-NUMA-bound containers including
557+ // dedicated_cores, shared_cores and reclaimed_cores containers)
558+ //
559+ // For each block, the function allocates CPU sets based on:
560+ // - Already allocated CPUs for dedicated cores
561+ // - Available CPUs considering already allocated static/forbidden pools
562+ // - NUMA topology awareness for better performance
563+ // - CPU allocation strategies that minimize CPU migrations
558564func (p * DynamicPolicy ) generateBlockCPUSet (resp * advisorapi.ListAndWatchResponse ) (advisorapi.BlockCPUSet , error ) {
559565 if resp == nil {
560566 return nil , fmt .Errorf ("got nil resp" )
@@ -569,8 +575,9 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
569575 topology := machineInfo .CPUTopology
570576 availableCPUs := topology .CPUDetails .CPUs ()
571577
572- // walk through static pools to construct blockCPUSet (for static pool),
573- // and calculate availableCPUs after deducting static pools
578+ // Walk through static pools to construct blockCPUSet (for static pool),
579+ // and calculate availableCPUs after deducting static pools.
580+ // Static pools are predefined pools that should not be changed during runtime.
574581 blockCPUSet := advisorapi .NewBlockCPUSet ()
575582 for _ , poolName := range state .StaticPools .List () {
576583 allocationInfo := p .state .GetAllocationInfo (poolName , commonstate .FakedContainerName )
@@ -588,8 +595,8 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
588595 availableCPUs = availableCPUs .Difference (blockCPUSet [blockID ])
589596 }
590597
591- // walk through forbidden pools to construct blockCPUSet (for forbidden pool),
592- // and calculate availableCPUs after deducting forbidden pools
598+ // Walk through forbidden pools and deduct their CPUs from availableCPUs.
599+ // Forbidden pools are reserved pools that should not be allocated to any containers.
593600 for _ , poolName := range state .ForbiddenPools .List () {
594601 allocationInfo := p .state .GetAllocationInfo (poolName , commonstate .FakedContainerName )
595602 if allocationInfo == nil {
@@ -599,20 +606,30 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
599606 availableCPUs = availableCPUs .Difference (allocationInfo .AllocationResult .Clone ())
600607 }
601608
602- // walk through all blocks with specified NUMA ids
603- // for each block, add them into blockCPUSet (if not exist) and renew availableCPUs
609+ // Process blocks with specified NUMA IDs (for NUMA-bound containers)
610+ // These are typically dedicated_cores containers with NUMA binding and
611+ // reclaimed_cores containers colocated with them
604612 for numaID , blocks := range numaToBlocks {
605613 if numaID == commonstate .FakedNUMAID {
606614 continue
607615 }
608616
617+ withNUMABindingShareOrDedicatedPod := false
609618 numaAvailableCPUs := availableCPUs .Intersection (topology .CPUDetails .CPUsInNUMANodes (numaID ))
619+
620+ // First handle blocks for NUMA-bound dedicated_cores containers
621+ // Reuse already allocated CPU sets when possible to minimize CPU migration
610622 for _ , block := range blocks {
611623 if block == nil {
612624 general .Warningf ("got nil block" )
613625 continue
614626 }
615627
628+ entry , ok := block .OwnerPoolEntryMap [commonstate .PoolNameDedicated ]
629+ if ! ok {
630+ continue
631+ }
632+
616633 blockID := block .BlockId
617634
618635 if _ , found := blockCPUSet [blockID ]; found {
@@ -626,6 +643,65 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
626643 blockID , err )
627644 }
628645
646+ allocationInfo := p .state .GetAllocationInfo (entry .EntryName , entry .SubEntryName )
647+ if allocationInfo == nil {
648+ continue
649+ }
650+
651+ alreadyAllocatedCPUs , ok := allocationInfo .TopologyAwareAssignments [numaID ]
652+ if ! ok {
653+ continue
654+ }
655+
656+ alreadyAllocatedCPUs = alreadyAllocatedCPUs .Intersection (numaAvailableCPUs )
657+ if alreadyAllocatedCPUs .Size () >= blockResult {
658+ cpuset , err := calculator .TakeByTopology (machineInfo , alreadyAllocatedCPUs , blockResult , true )
659+ if err != nil {
660+ return nil , fmt .Errorf ("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d" ,
661+ blockID , numaID , err , numaAvailableCPUs .Size (), numaAvailableCPUs .String (), blockResult )
662+ }
663+
664+ blockCPUSet [blockID ] = cpuset
665+ availableCPUs = availableCPUs .Difference (cpuset )
666+ } else {
667+ cpuset , err := calculator .TakeByTopology (machineInfo , numaAvailableCPUs .Difference (alreadyAllocatedCPUs ), blockResult - alreadyAllocatedCPUs .Size (), true )
668+ if err != nil {
669+ return nil , fmt .Errorf ("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d" ,
670+ blockID , numaID , err , numaAvailableCPUs .Size (), numaAvailableCPUs .String (), blockResult )
671+ }
672+
673+ blockCPUSet [blockID ] = cpuset .Union (alreadyAllocatedCPUs )
674+ availableCPUs = availableCPUs .Difference (cpuset )
675+ }
676+
677+ withNUMABindingShareOrDedicatedPod = true
678+ }
679+
680+ // Then handle blocks for NUMA-bound shared_cores containers and reclaimed_cores containers colocated with them
681+ // These containers can share NUMA nodes with dedicated_cores containers
682+ for _ , block := range blocks {
683+ if block == nil {
684+ general .Warningf ("got nil block" )
685+ continue
686+ }
687+
688+ _ , ok := block .OwnerPoolEntryMap [commonstate .PoolNameDedicated ]
689+ if ok {
690+ continue
691+ }
692+
693+ blockID := block .BlockId
694+ if _ , found := blockCPUSet [blockID ]; found {
695+ general .Warningf ("block: %v already allocated" , blockID )
696+ continue
697+ }
698+
699+ blockResult , err := general .CovertUInt64ToInt (block .Result )
700+ if err != nil {
701+ return nil , fmt .Errorf ("parse block: %s result failed with error: %v" ,
702+ blockID , err )
703+ }
704+
629705 cpuset , err := calculator .TakeByTopology (machineInfo , numaAvailableCPUs , blockResult , false )
630706 if err != nil {
631707 return nil , fmt .Errorf ("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d" ,
@@ -635,11 +711,26 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
635711 blockCPUSet [blockID ] = cpuset
636712 numaAvailableCPUs = numaAvailableCPUs .Difference (cpuset )
637713 availableCPUs = availableCPUs .Difference (cpuset )
714+
715+ _ , ok = block .OwnerPoolEntryMap [commonstate .PoolNameShare ]
716+ if ok {
717+ withNUMABindingShareOrDedicatedPod = true
718+ }
719+ }
720+
721+ // Finally, if there are NUMA-bound containers on this NUMA node,
722+ // deduct all numaAvailableCPUs from availableCPUs to ensure that
723+ // NUMA-bound pods don't share the same NUMA node with non-NUMA-bound pods
724+ if withNUMABindingShareOrDedicatedPod {
725+ // Because numaAvailableCPUs is a subset of availableCPUs,
726+ // we need to deduct all numaAvailableCPUs from availableCPUs
727+ availableCPUs = availableCPUs .Difference (numaAvailableCPUs )
638728 }
639729 }
640730
641- // walk through all blocks without specified NUMA id
642- // for each block, add them into blockCPUSet (if not exist) and renew availableCPUs
731+ // Walk through all blocks without specified NUMA ID (non-NUMA-bound containers)
732+ // For each block, allocate CPUs using NUMA balance strategy to minimize
733+ // memory access latency and CPU migrations
643734 for _ , block := range numaToBlocks [commonstate .FakedNUMAID ] {
644735 if block == nil {
645736 general .Warningf ("got nil block" )
@@ -659,17 +750,16 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
659750 blockID , err )
660751 }
661752
662- // use NUMA balance strategy to aviod changing memset as much as possible
663- // for blocks with faked NUMA id
664- var cpuset machine.CPUSet
665- cpuset , availableCPUs , err = calculator .TakeByNUMABalance (machineInfo , availableCPUs , blockResult )
753+ // Use NUMA balance strategy to avoid changing memory affinity (memset) as much as possible
754+ // for blocks with faked NUMA ID (non-NUMA-bound containers)
755+ resultCPUSet , _ , err := calculator .TakeByNUMABalance (machineInfo , availableCPUs , blockResult )
666756 if err != nil {
667757 return nil , fmt .Errorf ("allocate cpuset for non NUMA Aware block: %s failed with error: %v, availableCPUs: %d(%s), blockResult: %d" ,
668758 blockID , err , availableCPUs .Size (), availableCPUs .String (), blockResult )
669759 }
670760
671- blockCPUSet [blockID ] = cpuset
672- availableCPUs = availableCPUs .Difference (cpuset )
761+ blockCPUSet [blockID ] = resultCPUSet
762+ availableCPUs = availableCPUs .Difference (resultCPUSet )
673763 }
674764
675765 return blockCPUSet , nil
0 commit comments