@@ -810,13 +810,19 @@ func (p *DynamicPolicy) applyAllSubCgroupQuotaToUnLimit(containerRelativePath st
810810 })
811811}
812812
813- // generateBlockCPUSet generates BlockCPUSet from cpu-advisor response
814- // and the logic contains three main steps
815- // 1. handle blocks for static pools
816- // 2. handle blocks with specified NUMA ids (probably be blocks for
817- // numa_binding dedicated_cores containers and reclaimed_cores containers colocated with them)
818- // 3. handle blocks without specified NUMA id (probably be blocks for
819- // not numa_binding dedicated_cores containers and pools of shared_cores and reclaimed_cores containers)
813+ // generateBlockCPUSet generates BlockCPUSet from cpu-advisor response.
814+ // The logic contains the following main steps:
815+ // 1. Handle blocks for static pools and forbidden pools
816+ // 2. Handle blocks with specified NUMA IDs (for NUMA-bound dedicated_cores containers
817+ // and reclaimed_cores containers colocated with them)
818+ // 3. Handle blocks without specified NUMA ID (for non-NUMA-bound containers including
819+ // dedicated_cores, shared_cores and reclaimed_cores containers)
820+ //
821+ // For each block, the function allocates CPU sets based on:
822+ // - Already allocated CPUs for dedicated cores
823+ // - Available CPUs considering already allocated static/forbidden pools
824+ // - NUMA topology awareness for better performance
825+ // - CPU allocation strategies that minimize CPU migrations
820826func (p * DynamicPolicy ) generateBlockCPUSet (resp * advisorapi.ListAndWatchResponse ) (advisorapi.BlockCPUSet , error ) {
821827 if resp == nil {
822828 return nil , fmt .Errorf ("got nil resp" )
@@ -831,8 +837,9 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
831837 topology := machineInfo .CPUTopology
832838 availableCPUs := topology .CPUDetails .CPUs ()
833839
834- // walk through static pools to construct blockCPUSet (for static pool),
835- // and calculate availableCPUs after deducting static pools
840+ // Walk through static pools to construct blockCPUSet (for static pool),
841+ // and calculate availableCPUs after deducting static pools.
842+ // Static pools are predefined pools that should not be changed during runtime.
836843 blockCPUSet := advisorapi .NewBlockCPUSet ()
837844 for _ , poolName := range state .StaticPools .List () {
838845 allocationInfo := p .state .GetAllocationInfo (poolName , commonstate .FakedContainerName )
@@ -850,8 +857,8 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
850857 availableCPUs = availableCPUs .Difference (blockCPUSet [blockID ])
851858 }
852859
853- // walk through forbidden pools to construct blockCPUSet (for forbidden pool),
854- // and calculate availableCPUs after deducting forbidden pools
860+ // Walk through forbidden pools and deduct their CPUs from availableCPUs.
861+ // Forbidden pools are reserved pools that should not be allocated to any containers.
855862 for _ , poolName := range state .ForbiddenPools .List () {
856863 allocationInfo := p .state .GetAllocationInfo (poolName , commonstate .FakedContainerName )
857864 if allocationInfo == nil {
@@ -861,20 +868,30 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
861868 availableCPUs = availableCPUs .Difference (allocationInfo .AllocationResult .Clone ())
862869 }
863870
864- // walk through all blocks with specified NUMA ids
865- // for each block, add them into blockCPUSet (if not exist) and renew availableCPUs
871+ // Process blocks with specified NUMA IDs (for NUMA-bound containers)
872+ // These are typically dedicated_cores containers with NUMA binding and
873+ // reclaimed_cores containers colocated with them
866874 for numaID , blocks := range numaToBlocks {
867875 if numaID == commonstate .FakedNUMAID {
868876 continue
869877 }
870878
879+ withNUMABindingShareOrDedicatedPod := false
871880 numaAvailableCPUs := availableCPUs .Intersection (topology .CPUDetails .CPUsInNUMANodes (numaID ))
881+
882+ // First handle blocks for NUMA-bound dedicated_cores containers
883+ // Reuse already allocated CPU sets when possible to minimize CPU migration
872884 for _ , block := range blocks {
873885 if block == nil {
874886 general .Warningf ("got nil block" )
875887 continue
876888 }
877889
890+ entry , ok := block .OwnerPoolEntryMap [commonstate .PoolNameDedicated ]
891+ if ! ok {
892+ continue
893+ }
894+
878895 blockID := block .BlockId
879896
880897 if _ , found := blockCPUSet [blockID ]; found {
@@ -888,6 +905,64 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
888905 blockID , err )
889906 }
890907
908+ allocationInfo := p .state .GetAllocationInfo (entry .EntryName , entry .SubEntryName )
909+ if allocationInfo == nil {
910+ continue
911+ }
912+
913+ alreadyAllocatedCPUs , ok := allocationInfo .TopologyAwareAssignments [numaID ]
914+ if ! ok {
915+ continue
916+ }
917+
918+ var cpuset machine.CPUSet
919+ alreadyAllocatedCPUs = alreadyAllocatedCPUs .Intersection (numaAvailableCPUs )
920+ if alreadyAllocatedCPUs .Size () >= blockResult {
921+ cpuset , err = calculator .TakeByTopology (machineInfo , alreadyAllocatedCPUs , blockResult , true )
922+ if err != nil {
923+ return nil , fmt .Errorf ("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d" ,
924+ blockID , numaID , err , numaAvailableCPUs .Size (), numaAvailableCPUs .String (), blockResult )
925+ }
926+ } else {
927+ cpuset , err = calculator .TakeByTopology (machineInfo , numaAvailableCPUs .Difference (alreadyAllocatedCPUs ), blockResult - alreadyAllocatedCPUs .Size (), true )
928+ if err != nil {
929+ return nil , fmt .Errorf ("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d" ,
930+ blockID , numaID , err , numaAvailableCPUs .Size (), numaAvailableCPUs .String (), blockResult )
931+ }
932+ cpuset = cpuset .Union (alreadyAllocatedCPUs )
933+ }
934+
935+ blockCPUSet [blockID ] = cpuset
936+ numaAvailableCPUs = numaAvailableCPUs .Difference (cpuset )
937+ availableCPUs = availableCPUs .Difference (cpuset )
938+ withNUMABindingShareOrDedicatedPod = true
939+ }
940+
941+ // Then handle blocks for NUMA-bound shared_cores containers and reclaimed_cores containers colocated with them
942+ // These containers can share NUMA nodes with dedicated_cores containers
943+ for _ , block := range blocks {
944+ if block == nil {
945+ general .Warningf ("got nil block" )
946+ continue
947+ }
948+
949+ _ , ok := block .OwnerPoolEntryMap [commonstate .PoolNameDedicated ]
950+ if ok {
951+ continue
952+ }
953+
954+ blockID := block .BlockId
955+ if _ , found := blockCPUSet [blockID ]; found {
956+ general .Warningf ("block: %v already allocated" , blockID )
957+ continue
958+ }
959+
960+ blockResult , err := general .CovertUInt64ToInt (block .Result )
961+ if err != nil {
962+ return nil , fmt .Errorf ("parse block: %s result failed with error: %v" ,
963+ blockID , err )
964+ }
965+
891966 cpuset , err := calculator .TakeByTopology (machineInfo , numaAvailableCPUs , blockResult , false )
892967 if err != nil {
893968 return nil , fmt .Errorf ("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d" ,
@@ -897,11 +972,26 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
897972 blockCPUSet [blockID ] = cpuset
898973 numaAvailableCPUs = numaAvailableCPUs .Difference (cpuset )
899974 availableCPUs = availableCPUs .Difference (cpuset )
975+
976+ _ , ok = block .OwnerPoolEntryMap [commonstate .PoolNameShare ]
977+ if ok {
978+ withNUMABindingShareOrDedicatedPod = true
979+ }
980+ }
981+
982+ // Finally, if there are NUMA-bound containers on this NUMA node,
983+ // deduct all numaAvailableCPUs from availableCPUs to ensure that
984+ // NUMA-bound pods don't share the same NUMA node with non-NUMA-bound pods
985+ if withNUMABindingShareOrDedicatedPod {
986+ // Because numaAvailableCPUs is a subset of availableCPUs,
987+ // we need to deduct all numaAvailableCPUs from availableCPUs
988+ availableCPUs = availableCPUs .Difference (numaAvailableCPUs )
900989 }
901990 }
902991
903- // walk through all blocks without specified NUMA id
904- // for each block, add them into blockCPUSet (if not exist) and renew availableCPUs
992+ // Walk through all blocks without specified NUMA ID (non-NUMA-bound containers)
993+ // For each block, allocate CPUs using NUMA balance strategy to minimize
994+ // memory access latency and CPU migrations
905995 for _ , block := range numaToBlocks [commonstate .FakedNUMAID ] {
906996 if block == nil {
907997 general .Warningf ("got nil block" )
@@ -921,17 +1011,16 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons
9211011 blockID , err )
9221012 }
9231013
924- // use NUMA balance strategy to aviod changing memset as much as possible
925- // for blocks with faked NUMA id
926- var cpuset machine.CPUSet
927- cpuset , availableCPUs , err = calculator .TakeByNUMABalance (machineInfo , availableCPUs , blockResult )
1014+ // Use NUMA balance strategy to avoid changing memory affinity (memset) as much as possible
1015+ // for blocks with faked NUMA ID (non-NUMA-bound containers)
1016+ resultCPUSet , _ , err := calculator .TakeByNUMABalance (machineInfo , availableCPUs , blockResult )
9281017 if err != nil {
9291018 return nil , fmt .Errorf ("allocate cpuset for non NUMA Aware block: %s failed with error: %v, availableCPUs: %d(%s), blockResult: %d" ,
9301019 blockID , err , availableCPUs .Size (), availableCPUs .String (), blockResult )
9311020 }
9321021
933- blockCPUSet [blockID ] = cpuset
934- availableCPUs = availableCPUs .Difference (cpuset )
1022+ blockCPUSet [blockID ] = resultCPUSet
1023+ availableCPUs = availableCPUs .Difference (resultCPUSet )
9351024 }
9361025
9371026 return blockCPUSet , nil
0 commit comments