diff --git a/go.mod b/go.mod index 7daa6e248c..2ef112de2f 100644 --- a/go.mod +++ b/go.mod @@ -175,6 +175,7 @@ require ( ) replace ( + github.com/kubewharf/katalyst-api => github.com/yehlemias/katalyst-api v0.0.0-20260123040154-7734ccb2ac3f k8s.io/api => k8s.io/api v0.24.6 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6 k8s.io/apimachinery => k8s.io/apimachinery v0.24.6 diff --git a/go.sum b/go.sum index 9c5313ce5a..4b929ca8b0 100644 --- a/go.sum +++ b/go.sum @@ -574,8 +574,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubewharf/katalyst-api v0.5.9-0.20260108125536-85e136f5902c h1:ohKHA5TOlW9487menKnKH2M14LeIq1xQ1yW4xp8x9o8= -github.com/kubewharf/katalyst-api v0.5.9-0.20260108125536-85e136f5902c/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k= github.com/kubewharf/kubelet v1.24.6-kubewharf.9 h1:jOTYZt7h/J7I8xQMKMUcJjKf5UFBv37jHWvNp5VRFGc= github.com/kubewharf/kubelet v1.24.6-kubewharf.9/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8= @@ -937,6 +935,8 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5 github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca/go.mod h1:ce1O1j6UtZfjr22oyGxGLbauSBp2YVXpARAosm7dHBg= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/yehlemias/katalyst-api v0.0.0-20260123040154-7734ccb2ac3f h1:W8u1tV9FcEUze0qAA0IfMAzUOWPHoGoHv1yqAfy/Qs0= +github.com/yehlemias/katalyst-api v0.0.0-20260123040154-7734ccb2ac3f/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/pkg/agent/qrm-plugins/commonstate/pool.go b/pkg/agent/qrm-plugins/commonstate/pool.go index 5e68b7cead..badee9fec4 100644 --- a/pkg/agent/qrm-plugins/commonstate/pool.go +++ b/pkg/agent/qrm-plugins/commonstate/pool.go @@ -23,6 +23,7 @@ import ( apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" "github.com/kubewharf/katalyst-core/pkg/util/machine" + qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos" ) // notice that pool-name may not have direct mapping relations with qos-level, for instance @@ -46,10 +47,11 @@ const ( // FakedContainerName represents a placeholder since pool entry has no container-level // FakedNUMAID represents a placeholder since pools like shared/reclaimed will not contain a specific numa const ( - FakedContainerName = "" - FakedNUMAID = -1 - NameSeparator = "#" - NUMAPoolInfix = "-NUMA" + FakedContainerName = "" + FakedNUMAID = -1 + NameSeparator = "#" + NUMAPoolInfix = "-NUMA" + NUMAAffinityPoolInfix = "-Affinity" ) const ( @@ -68,6 +70,11 @@ func IsShareNUMABindingPool(poolName string) bool { return strings.Contains(poolName, NUMAPoolInfix) } +// IsShareNUMAAffinityPool checks whether the pool is numa affinity pool +func IsShareNUMAAffinityPool(poolName string) bool { + return strings.Contains(poolName, NUMAAffinityPoolInfix) || IsShareNUMABindingPool(poolName) +} + func GetPoolType(poolName string) string { if IsIsolationPool(poolName) { return PoolNamePrefixIsolation @@ -101,8 +108,8 @@ func GetSpecifiedPoolName(qosLevel, cpusetEnhancementValue string) string { } } -// GetSpecifiedNUMABindingNUMAID parses the numa id for AllocationInfo -func GetSpecifiedNUMABindingNUMAID(annotations map[string]string) (int, error) { +// GetSpecifiedNUMAID parses the numa id for AllocationInfo +func GetSpecifiedNUMAID(annotations map[string]string) (int, error) { if _, ok := annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint]; !ok { return FakedNUMAID, nil } @@ -118,15 +125,17 @@ func GetSpecifiedNUMABindingNUMAID(annotations map[string]string) (int, error) { return numaSet.ToSliceNoSortInt()[0], nil } -// GetSpecifiedNUMABindingPoolName get numa_binding pool name -// for numa_binding shared_cores according to enhancements and NUMA hint -func GetSpecifiedNUMABindingPoolName(qosLevel string, annotations map[string]string) (string, error) { +// GetSpecifiedNUMAPoolName get numa pool name: +// - NUMAPoolInfix for numa_binding shared_cores, e.g. share-NUMA0 +// - NUMAAffinityPoolInfix for numa_affinity shared_cores, e.g. share-Affinity0 +func GetSpecifiedNUMAPoolName(qosLevel string, annotations map[string]string) (string, error) { if qosLevel != apiconsts.PodAnnotationQoSLevelSharedCores || - annotations[apiconsts.PodAnnotationMemoryEnhancementNumaBinding] != apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable { - return EmptyOwnerPoolName, fmt.Errorf("GetSpecifiedNUMABindingPoolName is only for numa_binding shared_cores") + (annotations[apiconsts.PodAnnotationMemoryEnhancementNumaBinding] != apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable && + annotations[apiconsts.PodAnnotationCPUEnhancementNumaAffinity] != apiconsts.PodAnnotationCPUEnhancementNumaAffinityEnable) { + return EmptyOwnerPoolName, fmt.Errorf("GetSpecifiedNUMAPoolName is only for numa_binding or numa_affinity shared_cores") } - numaID, err := GetSpecifiedNUMABindingNUMAID(annotations) + numaID, err := GetSpecifiedNUMAID(annotations) if err != nil { return EmptyOwnerPoolName, err } @@ -141,5 +150,10 @@ func GetSpecifiedNUMABindingPoolName(qosLevel string, annotations map[string]str return EmptyOwnerPoolName, fmt.Errorf("empty specifiedPoolName") } - return GetNUMAPoolName(specifiedPoolName, numaID), nil + numaPoolInfix := NUMAAffinityPoolInfix + if qosutil.AnnotationsIndicateNUMABinding(annotations) { + numaPoolInfix = NUMAPoolInfix + } + + return GetNUMAPoolName(specifiedPoolName, numaPoolInfix, numaID), nil } diff --git a/pkg/agent/qrm-plugins/commonstate/state.go b/pkg/agent/qrm-plugins/commonstate/state.go index 8bb71a6704..b3620476a2 100644 --- a/pkg/agent/qrm-plugins/commonstate/state.go +++ b/pkg/agent/qrm-plugins/commonstate/state.go @@ -136,17 +136,17 @@ func (am *AllocationMeta) GetSpecifiedPoolName() string { return GetSpecifiedPoolName(am.QoSLevel, am.Annotations[consts.PodAnnotationCPUEnhancementCPUSet]) } -// GetSpecifiedNUMABindingNUMAID parses the numa id for AllocationInfo -func (am *AllocationMeta) GetSpecifiedNUMABindingNUMAID() (int, error) { +// GetSpecifiedNUMAID parses the numa id for AllocationInfo +func (am *AllocationMeta) GetSpecifiedNUMAID() (int, error) { if am == nil { return FakedNUMAID, fmt.Errorf("empty am") } - return GetSpecifiedNUMABindingNUMAID(am.Annotations) + return GetSpecifiedNUMAID(am.Annotations) } -// SetSpecifiedNUMABindingNUMAID set the numa id for AllocationInfo -func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) { +// SetSpecifiedNUMAID set the numa id for AllocationInfo +func (am *AllocationMeta) SetSpecifiedNUMAID(numaID uint64) { if am == nil { return } @@ -158,14 +158,14 @@ func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) { am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(int(numaID)).String() } -// GetSpecifiedNUMABindingPoolName get numa_binding pool name -// for numa_binding shared_cores according to enhancements and NUMA hint -func (am *AllocationMeta) GetSpecifiedNUMABindingPoolName() (string, error) { - return GetSpecifiedNUMABindingPoolName(am.QoSLevel, am.Annotations) +// GetSpecifiedNUMAPoolName get numa pool name +// for numa_binding or numa_affinity shared_cores according to enhancements and NUMA hint +func (am *AllocationMeta) GetSpecifiedNUMAPoolName() (string, error) { + return GetSpecifiedNUMAPoolName(am.QoSLevel, am.Annotations) } -func GetNUMAPoolName(candidateSpecifiedPoolName string, targetNUMANode int) string { - return fmt.Sprintf("%s%s%d", candidateSpecifiedPoolName, NUMAPoolInfix, targetNUMANode) +func GetNUMAPoolName(candidateSpecifiedPoolName, poolInfix string, targetNUMANode int) string { + return fmt.Sprintf("%s%s%d", candidateSpecifiedPoolName, poolInfix, targetNUMANode) } // CheckMainContainer returns true if the AllocationInfo is for main container @@ -316,3 +316,84 @@ func (am *AllocationMeta) CheckDedicatedPool() bool { } return am.OwnerPoolName == PoolNameDedicated } + +// CheckNUMAAffinity returns true if the AllocationInfo is for pod with numa-affinity enhancement +func (am *AllocationMeta) CheckNUMAAffinity() bool { + if am == nil { + return false + } + return am.Annotations[consts.PodAnnotationCPUEnhancementNumaAffinity] == + consts.PodAnnotationCPUEnhancementNumaAffinityEnable || am.CheckNUMABinding() +} + +// CheckDedicatedNUMAAffinity returns true if the AllocationInfo is for pod with +// dedicated-qos and numa-affinity enhancement +func (am *AllocationMeta) CheckDedicatedNUMAAffinity() bool { + return am.CheckDedicated() && am.CheckNUMAAffinity() +} + +// CheckSharedNUMAAffinity returns true if the AllocationInfo is for pod with +// shared-qos and numa-affinity enhancement +func (am *AllocationMeta) CheckSharedNUMAAffinity() bool { + return am.CheckShared() && am.CheckNUMAAffinity() +} + +// CheckSharedOrDedicatedNUMAAffinity returns true if the AllocationInfo is for pod with +// shared-qos or dedicated-qos and numa-affinity enhancement +func (am *AllocationMeta) CheckSharedOrDedicatedNUMAAffinity() bool { + return am.CheckSharedNUMAAffinity() || am.CheckDedicatedNUMAAffinity() +} + +// CheckNonBindingNUMAAffinity returns true if the AllocationInfo is for pod with +// numa-affinity enhancement and without numa-binding enhancement +func (am *AllocationMeta) CheckNonBindingNUMAAffinity() bool { + if am == nil { + return false + } + return am.Annotations[consts.PodAnnotationCPUEnhancementNumaAffinity] == + consts.PodAnnotationCPUEnhancementNumaAffinityEnable && !am.CheckNUMABinding() +} + +// CheckNonBindingSharedNUMAAffinity returns true if the AllocationInfo is for pod with +// shared-qos and numa-affinity enhancement and without numa-binding enhancement +func (am *AllocationMeta) CheckNonBindingSharedNUMAAffinity() bool { + return am.CheckShared() && am.CheckNonBindingNUMAAffinity() +} + +// CheckNonBindingDedicatedNUMAAffinity returns true if the AllocationInfo is for pod with +// dedicated-qos and numa-affinity enhancement and without numa-binding enhancement +func (am *AllocationMeta) CheckNonBindingDedicatedNUMAAffinity() bool { + return am.CheckDedicated() && am.CheckNonBindingNUMAAffinity() +} + +// CheckNonBindingSharedOrDedicatedNUMAAffinity returns true if the AllocationInfo is for pod with +// shared-qos or dedicated-qos and numa-affinity enhancement and without numa-binding enhancement +func (am *AllocationMeta) CheckNonBindingSharedOrDedicatedNUMAAffinity() bool { + return am.CheckNonBindingSharedNUMAAffinity() || am.CheckNonBindingDedicatedNUMAAffinity() +} + +// CheckSharedNumaBindingOrDedicatedNUMAAffinity returns true if the AllocationInfo is for pod with +// shared numa-bind or dedicated numa-affinity. +func (am *AllocationMeta) CheckSharedNumaBindingOrDedicatedNUMAAffinity() bool { + return am.CheckSharedNUMABinding() || am.CheckDedicatedNUMAAffinity() +} + +// CheckReclaimedNUMAAffinity returns true if the AllocationInfo is for pod with +// reclaimed numa-bind or dedicated numa-affinity. +func (am *AllocationMeta) CheckReclaimedNUMAAffinity() bool { + return am.CheckReclaimed() && am.CheckNUMAAffinity() +} + +// CheckReclaimedActualNUMAAffinity returns true if the AllocationInfo is for pod with +// reclaimed-qos and numa-affinity enhancement and numa hint is not empty, which means +// the container is allocated on a specific NUMA node +func (am *AllocationMeta) CheckReclaimedActualNUMAAffinity() bool { + return am.CheckReclaimedNUMAAffinity() && am.CheckActualNUMABinding() +} + +// CheckReclaimedNonActualNUMAAffinity returns true if the AllocationInfo is for pod with +// reclaimed-qos and numa-affinity enhancement and numa hint is empty, which means +// the container can be allocated on multi NUMA nodes +func (am *AllocationMeta) CheckReclaimedNonActualNUMAAffinity() bool { + return am.CheckReclaimed() && !am.CheckActualNUMABinding() +} diff --git a/pkg/agent/qrm-plugins/commonstate/util.go b/pkg/agent/qrm-plugins/commonstate/util.go index 515564783c..bb713ca699 100644 --- a/pkg/agent/qrm-plugins/commonstate/util.go +++ b/pkg/agent/qrm-plugins/commonstate/util.go @@ -60,6 +60,58 @@ func CheckNUMABindingAntiAffinity(meta *AllocationMeta, annotations map[string]s return false } +func CheckNUMABindingWithAffinity(meta *AllocationMeta, annotations map[string]string) bool { + if meta == nil { + return false + } else if len(annotations) == 0 { + return false + } + + // if qos level is same as candidate, filter out this numa + if meta.GetQoSLevel() != annotations[consts.PodAnnotationQoSLevelKey] { + return false + } + + if meta.CheckNUMANotShare() || + qos.AnnotationsIndicateNUMANotShare(annotations) { + return false + } + + if meta.CheckNUMABinding() { + // considering isolation, use specified pool instead of actual pool name here + candidateSpecifiedPoolName := GetSpecifiedPoolName(consts.PodAnnotationQoSLevelSharedCores, + annotations[consts.PodAnnotationCPUEnhancementCPUSet]) + aiSpecifiedPoolName := meta.GetSpecifiedPoolName() + + // shared_cores with numa binding doesn't support two share type pools with same specified name existing at same NUMA + return candidateSpecifiedPoolName == aiSpecifiedPoolName + } + return false +} + +// CheckNonCPUAffinityNUMA returns true, if the AllocationMeta indicates that the meta is numa affinity candidate. +func CheckNonCPUAffinityNUMA(meta *AllocationMeta) bool { + if meta == nil { + return false + } + + return meta.CheckNUMAAffinity() +} + +// CheckNonBindingCPUAffinityNUMA returns true, if the AllocationMeta indicates that the meta is numa affinity candidate. +// Now we consume that different numa affinity cpu pool can share the same numa node. +func CheckNonBindingCPUAffinityNUMA(meta *AllocationMeta, annotations map[string]string) bool { + if meta == nil { + return false + } + + if meta.CheckNUMANotShare() || qos.AnnotationsIndicateNUMANotShare(annotations) { + return false + } + + return meta.CheckNonBindingNUMAAffinity() +} + // GenerateGenericContainerAllocationMeta generates a generic container's allocation metadata. // This function populates the AllocationMeta struct using data from the resource request and other parameters. // Parameters: diff --git a/pkg/agent/qrm-plugins/commonstate/util_test.go b/pkg/agent/qrm-plugins/commonstate/util_test.go index e930659087..62192aa33f 100644 --- a/pkg/agent/qrm-plugins/commonstate/util_test.go +++ b/pkg/agent/qrm-plugins/commonstate/util_test.go @@ -19,6 +19,8 @@ package commonstate import ( "testing" + "github.com/stretchr/testify/assert" + pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" @@ -312,3 +314,234 @@ func TestCheckNUMABindingSharedCoresAntiAffinity(t *testing.T) { }) } } + +func TestCheckNonCPUAffinityNUMA(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + meta *AllocationMeta + want bool + }{ + { + name: "nil meta", + meta: nil, + want: false, + }, + { + name: "empty annotations", + meta: &AllocationMeta{ + Annotations: map[string]string{}, + }, + want: false, + }, + { + name: "only cpu numa affinity", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNumaAffinity: consts.PodAnnotationCPUEnhancementNumaAffinityEnable, + }, + }, + want: true, + }, + { + name: "only memory numa binding", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + want: true, + }, + { + name: "both cpu and memory numa settings", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNumaAffinity: consts.PodAnnotationCPUEnhancementNumaAffinityEnable, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + want: true, + }, + { + name: "incorrect cpu numa affinity value", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNumaAffinity: "false", + }, + }, + want: false, + }, + { + name: "incorrect memory numa binding value", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: "false", + }, + }, + want: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := CheckNonCPUAffinityNUMA(tt.meta); got != tt.want { + t.Errorf("CheckNonCPUAffinityNUMA() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCheckNonBindingCPUAffinityNUMA(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + meta *AllocationMeta + annotations map[string]string + want bool + }{ + { + name: "nil meta", + meta: nil, + annotations: nil, + want: false, + }, + { + name: "meta has numa not share", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNUMAShare: consts.PodAnnotationCPUEnhancementNUMAShareDisable, + }, + }, + annotations: nil, + want: false, + }, + { + name: "annotations has numa not share", + meta: &AllocationMeta{ + Annotations: map[string]string{}, + }, + annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNUMAShare: consts.PodAnnotationCPUEnhancementNUMAShareDisable, + }, + want: false, + }, + { + name: "both have numa not share", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNUMAShare: consts.PodAnnotationCPUEnhancementNUMAShareDisable, + }, + }, + annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNUMAShare: consts.PodAnnotationCPUEnhancementNUMAShareDisable, + }, + want: false, + }, + { + name: "only cpu numa affinity", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNumaAffinity: consts.PodAnnotationCPUEnhancementNumaAffinityEnable, + }, + }, + annotations: nil, + want: true, + }, + { + name: "cpu numa affinity and memory numa binding", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNumaAffinity: consts.PodAnnotationCPUEnhancementNumaAffinityEnable, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + annotations: nil, + want: false, + }, + { + name: "no relevant annotations", + meta: &AllocationMeta{ + Annotations: map[string]string{}, + }, + annotations: nil, + want: false, + }, + { + name: "incorrect annotation values", + meta: &AllocationMeta{ + Annotations: map[string]string{ + consts.PodAnnotationCPUEnhancementNumaAffinity: "false", + }, + }, + annotations: nil, + want: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := CheckNonBindingCPUAffinityNUMA(tt.meta, tt.annotations); got != tt.want { + t.Errorf("CheckNonBindingCPUAffinityNUMA() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCheckNUMABindingWithAffinity(t *testing.T) { + t.Parallel() + // case 1: meta is nil + assert.False(t, CheckNUMABindingWithAffinity(nil, map[string]string{})) + + // case 2: annotations is empty + meta := &AllocationMeta{} + assert.False(t, CheckNUMABindingWithAffinity(meta, map[string]string{})) + + // case 3: QoS level not match + meta = &AllocationMeta{} + annotations := map[string]string{ + consts.PodAnnotationQoSLevelKey: "shared_cores", + } + meta.QoSLevel = "dedicated_cores" + assert.False(t, CheckNUMABindingWithAffinity(meta, annotations)) + + // case 4: NUMANotShare + meta = &AllocationMeta{} + meta.QoSLevel = "shared_cores" + meta.Annotations = map[string]string{ + consts.PodAnnotationCPUEnhancementNUMAShare: consts.PodAnnotationCPUEnhancementNUMAShareDisable, + } + assert.False(t, CheckNUMABindingWithAffinity(meta, annotations)) + + // case 5: NonNumaBinding + meta = &AllocationMeta{} + meta.Annotations = map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: "false", + } + assert.False(t, CheckNUMABindingWithAffinity(meta, annotations)) + + // case 6: PoolNameMatch + meta = &AllocationMeta{} + meta.QoSLevel = "shared_cores" + meta.Annotations = map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementCPUSet: "pool1", + } + annotations = map[string]string{ + consts.PodAnnotationQoSLevelKey: "shared_cores", + consts.PodAnnotationCPUEnhancementCPUSet: "pool1", + } + assert.True(t, CheckNUMABindingWithAffinity(meta, annotations)) + + // case 7: PoolNameNotMatch + meta = &AllocationMeta{} + annotations = map[string]string{ + consts.PodAnnotationCPUEnhancementCPUSet: "pool2", + } + assert.False(t, CheckNUMABindingWithAffinity(meta, annotations)) +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator/cpu_assignment.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator/cpu_assignment.go index e68287c6b9..c1a4deb34f 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator/cpu_assignment.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator/cpu_assignment.go @@ -19,6 +19,7 @@ package calculator import ( "errors" "fmt" + "math" "sort" "k8s.io/klog/v2" @@ -403,6 +404,27 @@ func TakeByTopology(info *machine.KatalystMachineInfo, availableCPUs machine.CPU return machine.NewCPUSet(), fmt.Errorf("topology-aware allocation failed: requested %d CPUs, exhausted all allocation strategies", cpuRequirement) } +// TakeByTopologyWithSpreading implements a topology-aware CPU allocation strategy that +// spreads the allocated CPUs across NUMA nodes. +func TakeByTopologyWithSpreading(info *machine.KatalystMachineInfo, + availableCPUs map[int]machine.CPUSet, + cpuRequirement int, alignByL3Caches bool, +) (machine.CPUSet, error) { + alignedAvailableCPUs := machine.CPUSet{} + for _, availableCPUsInNuma := range availableCPUs { + // allocate cpu for numa affinity pod, prefer to allocate cpus spread across NUMA nodes, + // if cpu requirement cannot be divided evenly among numa nodes, + // round up to ensure pod request can be satisfied + requestCPUsInNuma := math.Ceil(float64(cpuRequirement) / float64(len(availableCPUs))) + result, err := TakeByTopology(info, availableCPUsInNuma, int(requestCPUsInNuma), alignByL3Caches) + if err != nil { + return machine.NewCPUSet(), err + } + alignedAvailableCPUs.Union(result) + } + return alignedAvailableCPUs, nil +} + // TakeByNUMABalance tries to make the allocated cpu spread on different // sockets, and it uses cpu Cores as the basic allocation unit func TakeByNUMABalance(info *machine.KatalystMachineInfo, availableCPUs machine.CPUSet, diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/helper.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/helper.go index 8170cd8776..2094a3bed2 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/helper.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/helper.go @@ -34,7 +34,7 @@ func NewBlockCPUSet() BlockCPUSet { return make(BlockCPUSet) } -func (ci *CalculationInfo) IsSharedNUMABindingPool() bool { +func (ci *CalculationInfo) IsSharedNUMAAffinityPool() bool { if ci == nil { return false } @@ -50,7 +50,7 @@ func (ce *CalculationEntries) IsPoolEntry() bool { return len(ce.Entries) == 1 && ce.Entries[commonstate.FakedContainerName] != nil } -func (ce *CalculationEntries) IsSharedNUMABindingPoolEntry() bool { +func (ce *CalculationEntries) IsSharedNUMAAffinityPoolEntry() bool { if !ce.IsPoolEntry() { return false } @@ -59,7 +59,7 @@ func (ce *CalculationEntries) IsSharedNUMABindingPoolEntry() bool { ce.Entries[commonstate.FakedContainerName].CalculationResultsByNumas[commonstate.FakedNUMAID] == nil } -func (lwr *ListAndWatchResponse) GetSharedBindingNUMAs() (sets.Int, error) { +func (lwr *ListAndWatchResponse) GetSharedNUMAAffinityNUMAs() (sets.Int, error) { if lwr == nil { return sets.NewInt(), fmt.Errorf("got nil ListAndWatchResponse") } @@ -68,7 +68,7 @@ func (lwr *ListAndWatchResponse) GetSharedBindingNUMAs() (sets.Int, error) { for _, entry := range lwr.Entries { - if !entry.IsSharedNUMABindingPoolEntry() { + if !entry.IsSharedNUMAAffinityPoolEntry() { continue } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go index a93009ea79..f210dcce3a 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go @@ -114,17 +114,17 @@ func (p *CPUPressureSuppression) GetEvictPods(_ context.Context, request *plugin now := time.Now() evictPods := make([]*v1alpha1.EvictPod, 0) - nonActualNUMABindingPods, err := p.evictNonActualNUMABindingPods(now, filteredPods, poolCPUSet, dynamicConfig.CPUPressureEvictionConfiguration) + nonActualNUMAAffinityPods, err := p.evictNonActualNUMAAffinityPods(now, filteredPods, poolCPUSet, dynamicConfig.CPUPressureEvictionConfiguration) if err != nil { return nil, err } - evictPods = append(evictPods, nonActualNUMABindingPods...) + evictPods = append(evictPods, nonActualNUMAAffinityPods...) - actualNUMABindingPods, err := p.evictActualNUMABindingPods(now, filteredPods, poolCPUSet, dynamicConfig.CPUPressureEvictionConfiguration) + actualNUMAAffinityPods, err := p.evictActualNUMAAffinityPods(now, filteredPods, poolCPUSet, dynamicConfig.CPUPressureEvictionConfiguration) if err != nil { return nil, err } - evictPods = append(evictPods, actualNUMABindingPods...) + evictPods = append(evictPods, actualNUMAAffinityPods...) // clear inactive filtered pod from lastToleranceTime filteredPodsMap := native.GetPodKeyMap(filteredPods, native.GenerateUniqObjectNameKey) @@ -138,17 +138,17 @@ func (p *CPUPressureSuppression) GetEvictPods(_ context.Context, request *plugin return &pluginapi.GetEvictPodsResponse{EvictPods: evictPods}, nil } -func (p *CPUPressureSuppression) evictNonActualNUMABindingPods(now time.Time, filteredPods []*v1.Pod, poolCPUSet machine.CPUSet, +func (p *CPUPressureSuppression) evictNonActualNUMAAffinityPods(now time.Time, filteredPods []*v1.Pod, poolCPUSet machine.CPUSet, evictionConfiguration *eviction.CPUPressureEvictionConfiguration, ) ([]*v1alpha1.EvictPod, error) { - nonActualNUMABindingCPUSet := machine.NewCPUSet() - nonActualNUMABindingNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding)) - for _, numaID := range nonActualNUMABindingNUMAs.ToSliceNoSortInt() { - nonActualNUMABindingCPUSet = nonActualNUMABindingCPUSet.Union(poolCPUSet.Intersection(p.metaServer.CPUDetails.CPUsInNUMANodes(numaID))) + nonActualNUMAAffinityCPUSet := machine.NewCPUSet() + nonActualNUMAAffinityNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMAAffinity)) + for _, numaID := range nonActualNUMAAffinityNUMAs.ToSliceNoSortInt() { + nonActualNUMAAffinityCPUSet = nonActualNUMAAffinityCPUSet.Union(poolCPUSet.Intersection(p.metaServer.CPUDetails.CPUsInNUMANodes(numaID))) } // get reclaim metrics - reclaimMetrics, err := helper.GetReclaimMetrics(nonActualNUMABindingCPUSet, p.conf.ReclaimRelativeRootCgroupPath, p.metaServer.MetricsFetcher) + reclaimMetrics, err := helper.GetReclaimMetrics(nonActualNUMAAffinityCPUSet, p.conf.ReclaimRelativeRootCgroupPath, p.metaServer.MetricsFetcher) if err != nil { return nil, fmt.Errorf("get reclaim metrics failed: %s", err) } @@ -163,11 +163,11 @@ func (p *CPUPressureSuppression) evictNonActualNUMABindingPods(now time.Time, fi }) general.InfoS("filterPods", "cpuSet", - nonActualNUMABindingCPUSet.String(), "podCount", len(filterPods)) + nonActualNUMAAffinityCPUSet.String(), "podCount", len(filterPods)) return p.evictPodsByReclaimMetrics(now, filterPods, reclaimMetrics, evictionConfiguration) } -func (p *CPUPressureSuppression) evictActualNUMABindingPods(now time.Time, filteredPods []*v1.Pod, poolCPUSet machine.CPUSet, +func (p *CPUPressureSuppression) evictActualNUMAAffinityPods(now time.Time, filteredPods []*v1.Pod, poolCPUSet machine.CPUSet, evictionConfiguration *eviction.CPUPressureEvictionConfiguration, ) ([]*v1alpha1.EvictPod, error) { var evictPods []*v1alpha1.EvictPod @@ -176,10 +176,10 @@ func (p *CPUPressureSuppression) evictActualNUMABindingPods(now time.Time, filte continue } - actualNUMABindingCPUSet := poolCPUSet.Intersection(p.metaServer.CPUDetails.CPUsInNUMANodes(numaID)) + actualNUMAAffinityCPUSet := poolCPUSet.Intersection(p.metaServer.CPUDetails.CPUsInNUMANodes(numaID)) // get reclaim metrics - reclaimMetrics, err := helper.GetReclaimMetrics(actualNUMABindingCPUSet, + reclaimMetrics, err := helper.GetReclaimMetrics(actualNUMAAffinityCPUSet, reclaimRelativeRootCgroupPath, p.metaServer.MetricsFetcher) if err != nil { return nil, fmt.Errorf("get reclaim metrics failed: %s", err) @@ -195,7 +195,7 @@ func (p *CPUPressureSuppression) evictActualNUMABindingPods(now time.Time, filte }) general.InfoS("filterPods", "numaID", numaID, "cpuSet", - actualNUMABindingCPUSet.String(), "podCount", len(filterPods)) + actualNUMAAffinityCPUSet.String(), "podCount", len(filterPods)) pods, err := p.evictPodsByReclaimMetrics(now, filterPods, reclaimMetrics, evictionConfiguration) if err != nil { return nil, err diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_usage_numa.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_usage_numa.go index 06708473e7..50fa660722 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_usage_numa.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_usage_numa.go @@ -309,10 +309,10 @@ func (p *NumaCPUPressureEviction) update(_ context.Context) { // numa -> pod -> ring for numaID := 0; numaID < p.metaServer.NumNUMANodes; numaID++ { numaSize := p.metaServer.NUMAToCPUs.CPUSizeInNUMAs(numaID) - snbEntries := machineState[numaID].PodEntries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMABinding)) + podEntries := machineState[numaID].PodEntries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMAAffinity)) sum := 0.0 - for podUID, containerEntries := range snbEntries { + for podUID, containerEntries := range podEntries { for containerName := range containerEntries { val, err := p.metaServer.GetContainerMetric(podUID, containerName, metricName) if err != nil { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/canonical/optimizer.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/canonical/optimizer.go index 60a97f2494..dd3bd797c1 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/canonical/optimizer.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/canonical/optimizer.go @@ -121,7 +121,7 @@ func (o *canonicalHintOptimizer) populateHintsByPreferPolicy(numaNodes []int, pr } availableCPUQuantity := machineState[nodeID].GetAvailableCPUQuantity(o.reservedCPUs) if !cpuutil.CPUIsSufficient(request, availableCPUQuantity) { - general.Warningf("numa_binding shared_cores container skip NUMA: %d available: %.3f request: %.3f", + general.Warningf("numa_affinity shared_cores container skip NUMA: %d available: %.3f request: %.3f", nodeID, availableCPUQuantity, request) continue } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/memorybandwidth/optimizer.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/memorybandwidth/optimizer.go index 3a1dd010d5..0c3a9631f1 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/memorybandwidth/optimizer.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/memorybandwidth/optimizer.go @@ -173,7 +173,7 @@ func (o *memoryBandwidthOptimizer) getNUMAAllocatedMemBW(machineState state.NUMA numaAllocatedMemBW := make(map[int]int) podUIDToMemBWReq := make(map[string]int) - podUIDToBindingNUMAs := make(map[string]sets.Int) + podUIDToAffinityNUMAs := make(map[string]sets.Int) for numaID, numaState := range machineState { if numaState == nil { @@ -186,7 +186,7 @@ func (o *memoryBandwidthOptimizer) getNUMAAllocatedMemBW(machineState state.NUMA continue } - if !(allocationInfo.CheckNUMABinding() && allocationInfo.CheckMainContainer()) { + if !(allocationInfo.CheckNUMAAffinity() && allocationInfo.CheckMainContainer()) { continue } @@ -205,19 +205,19 @@ func (o *memoryBandwidthOptimizer) getNUMAAllocatedMemBW(machineState state.NUMA podUIDToMemBWReq[allocationInfo.PodUid] = containerMemoryBandwidthRequest } - if podUIDToBindingNUMAs[podUID] == nil { // Use podUID from outer loop key - podUIDToBindingNUMAs[podUID] = sets.NewInt() + if podUIDToAffinityNUMAs[podUID] == nil { // Use podUID from outer loop key + podUIDToAffinityNUMAs[podUID] = sets.NewInt() } - podUIDToBindingNUMAs[podUID].Insert(numaID) + podUIDToAffinityNUMAs[podUID].Insert(numaID) } } } - for podUID, numaSet := range podUIDToBindingNUMAs { + for podUID, numaSet := range podUIDToAffinityNUMAs { podMemBWReq, found := podUIDToMemBWReq[podUID] if !found { // This might happen if we couldn't get fullPodAllocationInfo earlier - general.Warningf("pod: %s is found in podUIDToBindingNUMAs, but not found in podUIDToMemBWReq, skipping", podUID) + general.Warningf("pod: %s is found in podUIDToAffinityNUMAs, but not found in podUIDToMemBWReq, skipping", podUID) continue } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/metricbased/optimizer.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/metricbased/optimizer.go index 1fe590ce2d..0d8b8261fc 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/metricbased/optimizer.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/metricbased/optimizer.go @@ -231,7 +231,7 @@ func (o *metricBasedHintOptimizer) getNUMAMetric(numa int, resourceName string, return 0.0, fmt.Errorf("invalid machineState") } - snbEntries := machineState[numa].PodEntries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMABinding)) + snbEntries := machineState[numa].PodEntries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMAAffinity)) sum := 0.0 for podUID, containerEntries := range snbEntries { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go index 3446fcaa4f..f61e1662df 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go @@ -47,10 +47,12 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/registry" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/irqtuner" irqtuingcontroller "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/irqtuner/controller" + cpureactor "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/reactor" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/validator" cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/reactor" "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/featuregatenegotiation" "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler" "github.com/kubewharf/katalyst-core/pkg/config" @@ -139,8 +141,10 @@ type DynamicPolicy struct { reservedReclaimedCPUSet machine.CPUSet reservedReclaimedTopologyAwareAssignments map[int]machine.CPUSet - sharedCoresNUMABindingHintOptimizer hintoptimizer.HintOptimizer - dedicatedCoresNUMABindingHintOptimizer hintoptimizer.HintOptimizer + sharedCoresNUMAAffinityHintOptimizer hintoptimizer.HintOptimizer + dedicatedCoresNUMAAffinityHintOptimizer hintoptimizer.HintOptimizer + + numaAllocationReactor reactor.AllocationReactor } func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration, @@ -295,6 +299,14 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration return false, agent.ComponentStub{}, fmt.Errorf("dynamic policy new plugin wrapper failed with error: %v", err) } + if conf.EnableNUMAAllocationReactor { + policyImplement.numaAllocationReactor = cpureactor.NewNUMAPodAllocationReactor( + reactor.NewPodAllocationReactor( + agentCtx.MetaServer.PodFetcher, + agentCtx.Client.KubeClient, + )) + } + return true, &agent.PluginWrapper{GenericPlugin: pluginWrapper}, nil } @@ -452,14 +464,14 @@ func (p *DynamicPolicy) Start() (err error) { go wait.BackoffUntil(communicateWithCPUAdvisorServer, wait.NewExponentialBackoffManager(800*time.Millisecond, 30*time.Second, 2*time.Minute, 2.0, 0, &clock.RealClock{}), true, p.stopCh) - err = p.sharedCoresNUMABindingHintOptimizer.Run(p.stopCh) + err = p.sharedCoresNUMAAffinityHintOptimizer.Run(p.stopCh) if err != nil { - return fmt.Errorf("sharedCoresNUMABindingHintOptimizer.Run failed with error: %v", err) + return fmt.Errorf("sharedCoresNUMAAffinityHintOptimizer.Run failed with error: %v", err) } - err = p.dedicatedCoresNUMABindingHintOptimizer.Run(p.stopCh) + err = p.dedicatedCoresNUMAAffinityHintOptimizer.Run(p.stopCh) if err != nil { - return fmt.Errorf("dedicatedCoresNUMABindingHintOptimizer.Run failed with error: %v", err) + return fmt.Errorf("dedicatedCoresNUMAAffinityHintOptimizer.Run failed with error: %v", err) } return nil @@ -511,9 +523,9 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context, // rumpUpPooledCPUs is the total available cpu cores minus those that are reserved rumpUpPooledCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, func(ai *state.AllocationInfo) bool { - return ai.CheckDedicated() || ai.CheckSharedNUMABinding() + return ai.CheckDedicated() || ai.CheckSharedNUMAAffinity() }, - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicatedNUMABinding)) + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicatedNUMAAffinity)) rumpUpPooledCPUsTopologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, rumpUpPooledCPUs) if err != nil { return nil, fmt.Errorf("GetNumaAwareAssignments err: %v", err) @@ -546,7 +558,7 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context, initTs, tsErr := time.Parse(util.QRMTimeFormat, allocationInfo.InitTimestamp) if tsErr != nil { - if allocationInfo.CheckShared() && !allocationInfo.CheckNUMABinding() { + if allocationInfo.CheckShared() && !allocationInfo.CheckNUMAAffinity() { general.Errorf("pod: %s/%s, container: %s init timestamp parsed failed with error: %v, re-ramp-up it", allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, tsErr) @@ -1139,13 +1151,13 @@ func (p *DynamicPolicy) initAdvisorClientConn() (err error) { func (p *DynamicPolicy) initHintOptimizers() error { var err error - p.sharedCoresNUMABindingHintOptimizer, err = registry.SharedCoresHintOptimizerRegistry.HintOptimizer(p.conf.SharedCoresHintOptimizerPolicies, + p.sharedCoresNUMAAffinityHintOptimizer, err = registry.SharedCoresHintOptimizerRegistry.HintOptimizer(p.conf.SharedCoresHintOptimizerPolicies, p.generateHintOptimizerFactoryOptions()) if err != nil { return fmt.Errorf("SharedCoresHintOptimizerRegistry.HintOptimizer failed with error: %v", err) } - p.dedicatedCoresNUMABindingHintOptimizer, err = registry.DedicatedCoresHintOptimizerRegistry.HintOptimizer(p.conf.DedicatedCoresHintOptimizerPolicies, + p.dedicatedCoresNUMAAffinityHintOptimizer, err = registry.DedicatedCoresHintOptimizerRegistry.HintOptimizer(p.conf.DedicatedCoresHintOptimizerPolicies, p.generateHintOptimizerFactoryOptions()) if err != nil { return fmt.Errorf("DedicatedCoresHintOptimizerRegistry.HintOptimizer failed with error: %v", err) @@ -1269,9 +1281,9 @@ func (p *DynamicPolicy) initReclaimPool() error { machineState := p.state.GetMachineState() availableCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, func(ai *state.AllocationInfo) bool { - return ai.CheckDedicated() || ai.CheckSharedNUMABinding() + return ai.CheckDedicated() || ai.CheckSharedNUMAAffinity() }, - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicatedNUMABinding)).Difference(noneResidentCPUs) + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicatedNUMAAffinity)).Difference(noneResidentCPUs) var initReclaimedCPUSetSize int if availableCPUs.Size() >= p.reservedReclaimedCPUsSize { @@ -1333,27 +1345,27 @@ func (p *DynamicPolicy) getContainerRequestedCores(allocationInfo *state.Allocat return cpuutil.GetContainerRequestedCores(p.metaServer, allocationInfo) } -func (p *DynamicPolicy) checkNonBindingShareCoresCpuResource(req *pluginapi.ResourceRequest) (bool, error) { +func (p *DynamicPolicy) checkNonCPUAffinityShareCoresCpuResource(req *pluginapi.ResourceRequest) (bool, error) { _, reqFloat64, err := util.GetPodAggregatedRequestResource(req) if err != nil { return false, fmt.Errorf("GetQuantityFromResourceReq failed with error: %v", err) } - shareCoresAllocatedInt := state.GetNonBindingSharedRequestedQuantityFromPodEntries(p.state.GetPodEntries(), map[string]float64{req.PodUid: reqFloat64}, p.getContainerRequestedCores) + shareCoresAllocatedInt := state.GetNonCPUAffinitySharedRequestedQuantityFromPodEntries(p.state.GetPodEntries(), map[string]float64{req.PodUid: reqFloat64}, p.getContainerRequestedCores) machineState := p.state.GetMachineState() pooledCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicated), - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMABinding)) + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMAAffinity)) - general.Infof("[checkNonBindingShareCoresCpuResource] node cpu allocated: %d, allocatable: %d", shareCoresAllocatedInt, pooledCPUs.Size()) + general.Infof("[checkNonCPUAffinityShareCoresCpuResource] node cpu allocated: %d, allocatable: %d", shareCoresAllocatedInt, pooledCPUs.Size()) if shareCoresAllocatedInt > pooledCPUs.Size() { - general.Warningf("[checkNonBindingShareCoresCpuResource] no enough cpu resource for non-binding share cores pod: %s/%s, container: %s (request: %.02f, node allocated: %d, node allocatable: %d)", + general.Warningf("[checkNonCPUAffinityShareCoresCpuResource] no enough cpu resource for non-cpu--affinity share cores pod: %s/%s, container: %s (request: %.02f, node allocated: %d, node allocatable: %d)", req.PodNamespace, req.PodName, req.ContainerName, reqFloat64, shareCoresAllocatedInt, pooledCPUs.Size()) return false, nil } - general.InfoS("checkNonBindingShareCoresCpuResource cpu successfully", + general.InfoS("checkNonCPUAffinityShareCoresCpuResource cpu successfully", "podNamespace", req.PodNamespace, "podName", req.PodName, "containerName", req.ContainerName, diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go index 1a56ec2364..6117ff2da9 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go @@ -869,14 +869,14 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons } // Process blocks with specified NUMA IDs (for NUMA-bound containers) - // These are typically dedicated_cores containers with NUMA binding and + // These are typically dedicated_cores containers with NUMA affinity and // reclaimed_cores containers colocated with them for numaID, blocks := range numaToBlocks { if numaID == commonstate.FakedNUMAID { continue } - withNUMABindingShareOrDedicatedPod := false + withNUMAAffinityShareOrDedicatedPod := false numaAvailableCPUs := availableCPUs.Intersection(topology.CPUDetails.CPUsInNUMANodes(numaID)) // First handle blocks for NUMA-bound dedicated_cores containers @@ -935,7 +935,7 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons blockCPUSet[blockID] = cpuset numaAvailableCPUs = numaAvailableCPUs.Difference(cpuset) availableCPUs = availableCPUs.Difference(cpuset) - withNUMABindingShareOrDedicatedPod = true + withNUMAAffinityShareOrDedicatedPod = true } // Then handle blocks for NUMA-bound shared_cores containers and reclaimed_cores containers colocated with them @@ -974,8 +974,8 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons availableCPUs = availableCPUs.Difference(cpuset) for poolName := range block.OwnerPoolEntryMap { - if commonstate.IsIsolationPool(poolName) || commonstate.IsShareNUMABindingPool(poolName) { - withNUMABindingShareOrDedicatedPod = true + if commonstate.IsIsolationPool(poolName) || commonstate.IsShareNUMAAffinityPool(poolName) { + withNUMAAffinityShareOrDedicatedPod = true break } } @@ -984,7 +984,7 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons // Finally, if there are NUMA-bound containers on this NUMA node, // deduct all numaAvailableCPUs from availableCPUs to ensure that // NUMA-bound pods don't share the same NUMA node with non-NUMA-bound pods - if withNUMABindingShareOrDedicatedPod { + if withNUMAAffinityShareOrDedicatedPod { // Because numaAvailableCPUs is a subset of availableCPUs, // we need to deduct all numaAvailableCPUs from availableCPUs availableCPUs = availableCPUs.Difference(numaAvailableCPUs) @@ -1043,8 +1043,8 @@ func (p *DynamicPolicy) applyBlocks(blockCPUSet advisorapi.BlockCPUSet, resp *ad dedicatedCPUSet := machine.NewCPUSet() pooledUnionDedicatedCPUSet := machine.NewCPUSet() - // calculate NUMAs without actual numa_binding reclaimed pods - nonReclaimActualBindingNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding)) + // calculate NUMAs without actual numa_affinity reclaimed pods + nonReclaimActualAffinityNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMAAffinity)) // deal with blocks of dedicated_cores and pools for entryName, entry := range resp.Entries { @@ -1137,23 +1137,23 @@ func (p *DynamicPolicy) applyBlocks(blockCPUSet advisorapi.BlockCPUSet, resp *ad } // revise reclaim pool size to avoid reclaimed_cores and numa_binding dedicated_cores containers - // in NUMAs without cpuset actual binding - err := p.reviseReclaimPool(newEntries, nonReclaimActualBindingNUMAs, pooledUnionDedicatedCPUSet) + // in NUMAs without cpuset actual affinity + err := p.reviseReclaimPool(newEntries, nonReclaimActualAffinityNUMAs, pooledUnionDedicatedCPUSet) if err != nil { return err } // calculate rampUpCPUs - sharedBindingNUMAs, err := resp.GetSharedBindingNUMAs() + sharedNUMAAffinityNUMAs, err := resp.GetSharedNUMAAffinityNUMAs() if err != nil { - return fmt.Errorf("GetSharedBindingNUMAs failed with error: %v", err) + return fmt.Errorf("GetSharedNUMAAffinityNUMAs failed with error: %v", err) } - sharedBindingNUMACPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(sharedBindingNUMAs.UnsortedList()...) + sharedNUMAAffinityNUMACPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(sharedNUMAAffinityNUMAs.UnsortedList()...) // rampUpCPUs include reclaim pool in NUMAs without NUMA_binding cpus rampUpCPUs := p.machineInfo.CPUDetails.CPUs(). Difference(p.reservedCPUs). Difference(dedicatedCPUSet). - Difference(sharedBindingNUMACPUs) + Difference(sharedNUMAAffinityNUMACPUs) rampUpCPUsTopologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, rampUpCPUs) if err != nil { @@ -1227,6 +1227,8 @@ func (p *DynamicPolicy) applyBlocks(blockCPUSet advisorapi.BlockCPUSet, resp *ad return err } + // since share cores with numa-binding and share cores with numa-affinity are put into different pools, + // we need to check which type it is and set corresponding qosLevel and annotations. if allocationInfo.CheckSharedNUMABinding() { poolEntry.QoSLevel = apiconsts.PodAnnotationQoSLevelSharedCores // set SharedNUMABinding declarations to pool entry containing SharedNUMABinding containers, @@ -1234,6 +1236,13 @@ func (p *DynamicPolicy) applyBlocks(blockCPUSet advisorapi.BlockCPUSet, resp *ad poolEntry.Annotations = general.MergeMap(poolEntry.Annotations, map[string]string{ apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, }) + } else if allocationInfo.CheckSharedNUMAAffinity() { + // set SharedNUMAAffinity declarations to pool entry containing SharedNUMAAffinity containers, + // in order to differentiate them from non-affinity share cores pools during GetFilteredPoolsCPUSetMap. + poolEntry.QoSLevel = apiconsts.PodAnnotationQoSLevelSharedCores + poolEntry.Annotations = general.MergeMap(poolEntry.Annotations, map[string]string{ + apiconsts.PodAnnotationCPUEnhancementNumaAffinity: apiconsts.PodAnnotationCPUEnhancementNumaAffinityEnable, + }) } general.Infof("put pod: %s/%s container: %s to pool: %s, set its allocation result from %s to %s", @@ -1252,7 +1261,7 @@ func (p *DynamicPolicy) applyBlocks(blockCPUSet advisorapi.BlockCPUSet, resp *ad return err } - err = p.updateReclaimAllocationResultByPoolEntry(newEntries[podUID][containerName], poolEntry, nonReclaimActualBindingNUMAs) + err = p.updateReclaimAllocationResultByPoolEntry(newEntries[podUID][containerName], poolEntry, nonReclaimActualAffinityNUMAs) if err != nil { return err } @@ -1333,7 +1342,7 @@ func (p *DynamicPolicy) applyNUMAHeadroom(resp *advisorapi.ListAndWatchResponse) return nil } -func (p *DynamicPolicy) reviseReclaimPool(newEntries state.PodEntries, nonReclaimActualBindingNUMAs, pooledUnionDedicatedCPUSet machine.CPUSet) error { +func (p *DynamicPolicy) reviseReclaimPool(newEntries state.PodEntries, nonReclaimActualAffinityNUMAs, pooledUnionDedicatedCPUSet machine.CPUSet) error { forbiddenCPUs, err := state.GetUnitedPoolsCPUs(state.ForbiddenPools, p.state.GetPodEntries()) if err != nil { return fmt.Errorf("GetUnitedPoolsCPUs for forbidden pools failed with error: %v", err) @@ -1381,7 +1390,7 @@ func (p *DynamicPolicy) reviseReclaimPool(newEntries state.PodEntries, nonReclai // revise reclaim pool for RNB NUMAs for _, numaID := range p.machineInfo.CPUDetails.NUMANodes().ToSliceInt() { - if nonReclaimActualBindingNUMAs.Contains(numaID) { + if nonReclaimActualAffinityNUMAs.Contains(numaID) { continue } @@ -1394,15 +1403,15 @@ func (p *DynamicPolicy) reviseReclaimPool(newEntries state.PodEntries, nonReclai } // revise reclaim pool for non-RNB NUMAs - nonReclaimActualBindingNUMAsAllocation := machine.NewCPUSet() + nonReclaimActualAffinityNUMAsAllocation := machine.NewCPUSet() for _, numaID := range p.machineInfo.CPUDetails.NUMANodes().ToSliceInt() { - if nonReclaimActualBindingNUMAs.Contains(numaID) { - nonReclaimActualBindingNUMAsAllocation = nonReclaimActualBindingNUMAsAllocation.Union(reclaimPool.TopologyAwareAssignments[numaID]) + if nonReclaimActualAffinityNUMAs.Contains(numaID) { + nonReclaimActualAffinityNUMAsAllocation = nonReclaimActualAffinityNUMAsAllocation.Union(reclaimPool.TopologyAwareAssignments[numaID]) } } - if nonReclaimActualBindingNUMAsAllocation.IsEmpty() { - for _, numaID := range nonReclaimActualBindingNUMAsAllocation.ToSliceInt() { + if nonReclaimActualAffinityNUMAsAllocation.IsEmpty() { + for _, numaID := range nonReclaimActualAffinityNUMAsAllocation.ToSliceInt() { reclaimPool.AllocationResult = reclaimPool.AllocationResult.Union(p.reservedReclaimedTopologyAwareAssignments[numaID]) reclaimPool.OriginalAllocationResult = reclaimPool.OriginalAllocationResult.Union(p.reservedReclaimedTopologyAwareAssignments[numaID]) reclaimPool.TopologyAwareAssignments[numaID] = p.reservedReclaimedTopologyAwareAssignments[numaID].Clone() diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go index 67b9be4a6d..7f7b4a093f 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go @@ -50,15 +50,13 @@ func (p *DynamicPolicy) sharedCoresAllocationHandler(ctx context.Context, return nil, fmt.Errorf("sharedCoresAllocationHandler got nil req") } - switch req.Annotations[apiconsts.PodAnnotationMemoryEnhancementNumaBinding] { - case apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable: - return p.sharedCoresWithNUMABindingAllocationHandler(ctx, req, persistCheckpoint) - default: - return p.sharedCoresWithoutNUMABindingAllocationHandler(ctx, req, persistCheckpoint) + if qosutil.AnnotationsIndicateNUMABinding(req.Annotations) || qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) { + return p.sharedCoresWithNUMAAffinityAllocationHandler(ctx, req, persistCheckpoint) } + return p.sharedCoresWithoutNUMAAffinityAllocationHandler(ctx, req, persistCheckpoint) } -func (p *DynamicPolicy) sharedCoresWithoutNUMABindingAllocationHandler(_ context.Context, +func (p *DynamicPolicy) sharedCoresWithoutNUMAAffinityAllocationHandler(_ context.Context, req *pluginapi.ResourceRequest, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { if req == nil { @@ -73,7 +71,7 @@ func (p *DynamicPolicy) sharedCoresWithoutNUMABindingAllocationHandler(_ context machineState := p.state.GetMachineState() pooledCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicated), - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMABinding)) + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMAAffinity)) // cores that are forbidden from user binding need to be deducted from the pool. forbiddenCPUs, err := state.GetUnitedPoolsCPUs(state.ForbiddenPools, p.state.GetPodEntries()) if err != nil { @@ -246,8 +244,8 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, } machineState := p.state.GetMachineState() - // calculate NUMAs without actual numa_binding reclaimed pods - nonReclaimActualBindingNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding)) + // calculate NUMAs without actual numa_affinity reclaimed pods + nonReclaimActualAffinityNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMAAffinity)) if allocationInfo != nil { general.Infof("pod: %s/%s, container: %s with old allocation result: %s, allocate by reclaimedCPUSet: %s", @@ -263,17 +261,17 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, RequestQuantity: reqFloat64, } - // calculate NUMAs without non-actual numa_binding reclaimed pods - reclaimActualBindingNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMABinding)) - // set reclaimed numa_binding NUMA ID to allocationInfo - if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualBindingNUMAs.Contains(int(req.Hint.Nodes[0])) || - !nonReclaimActualBindingNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) { - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) + // calculate NUMAs without non-actual numa_affinity reclaimed pods + reclaimActualAffinityNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMAAffinity)) + // set reclaimed numa_affinity NUMA ID to allocationInfo + if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualAffinityNUMAs.Contains(int(req.Hint.Nodes[0])) || + !nonReclaimActualAffinityNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) { + allocationInfo.SetSpecifiedNUMAID(req.Hint.Nodes[0]) } } // update reclaimed allocation result by pool entry - err = p.updateReclaimAllocationResultByPoolEntry(allocationInfo, reclaimedAllocationInfo, nonReclaimActualBindingNUMAs) + err = p.updateReclaimAllocationResultByPoolEntry(allocationInfo, reclaimedAllocationInfo, nonReclaimActualAffinityNUMAs) if err != nil { return nil, err } @@ -282,14 +280,14 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, // if one of subsequent steps is failed, we will delete current allocationInfo from podEntries in defer function of allocation function. p.state.SetAllocationInfo(allocationInfo.PodUid, allocationInfo.ContainerName, allocationInfo, persistCheckpoint) - // update reclaim non-actual numa_binding reclaim cores allocations if it needs to transfer a non-RNB numa to RNB numa + // update reclaim non-actual numa_affinity reclaim cores allocations if it needs to transfer a non-RNB numa to RNB numa podEntries := p.state.GetPodEntries() if allocationInfo.CheckActualNUMABinding() && - nonReclaimActualBindingNUMAs.Intersection(allocationInfo.AllocationResult).Size() > 0 { - updatedNonReclaimActualBindingNUMAs := nonReclaimActualBindingNUMAs.Difference(allocationInfo.AllocationResult) - err := p.updateNonActualNUMABindingReclaimCoresAllocations(podEntries, updatedNonReclaimActualBindingNUMAs, reclaimedAllocationInfo) + nonReclaimActualAffinityNUMAs.Intersection(allocationInfo.AllocationResult).Size() > 0 { + updatedNonReclaimActualAffinityNUMAs := nonReclaimActualAffinityNUMAs.Difference(allocationInfo.AllocationResult) + err := p.updateNonActualNUMAAffinityReclaimCoresAllocations(podEntries, updatedNonReclaimActualAffinityNUMAs, reclaimedAllocationInfo) if err != nil { - general.Errorf("pod: %s/%s, container: %s updateNonActualNUMABindingReclaimCoresAllocations failed with error: %v", + general.Errorf("pod: %s/%s, container: %s updateNonActualNUMAAffinityReclaimCoresAllocations failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) return nil, err } @@ -313,14 +311,14 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, return resp, nil } -// updateReclaimAllocationResultByPoolEntry updates non-actual numa binding reclaimed allocation result by pool entry -func (p *DynamicPolicy) updateNonActualNUMABindingReclaimCoresAllocations(podEntries state.PodEntries, - nonReclaimActualBindingNUMAs machine.CPUSet, poolEntry *state.AllocationInfo, +// updateReclaimAllocationResultByPoolEntry updates non-actual numa affinity reclaimed allocation result by pool entry +func (p *DynamicPolicy) updateNonActualNUMAAffinityReclaimCoresAllocations(podEntries state.PodEntries, + nonReclaimActualAffinityNUMAs machine.CPUSet, poolEntry *state.AllocationInfo, ) error { - nonActualNUMABindingAllocations := podEntries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMABinding)) - for _, containerEntries := range nonActualNUMABindingAllocations { + nonActualNUMAAffinityAllocations := podEntries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMAAffinity)) + for _, containerEntries := range nonActualNUMAAffinityAllocations { for _, allocationInfo := range containerEntries { - err := p.updateReclaimAllocationResultByPoolEntry(allocationInfo, poolEntry, nonReclaimActualBindingNUMAs) + err := p.updateReclaimAllocationResultByPoolEntry(allocationInfo, poolEntry, nonReclaimActualAffinityNUMAs) if err != nil { return fmt.Errorf("updateReclaimAllocationResultByPoolEntry with error: %v", err) } @@ -340,22 +338,22 @@ func (p *DynamicPolicy) dedicatedCoresAllocationHandler(ctx context.Context, return nil, fmt.Errorf("not support inplace update resize for dedicated cores") } - switch req.Annotations[apiconsts.PodAnnotationMemoryEnhancementNumaBinding] { - case apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable: - return p.dedicatedCoresWithNUMABindingAllocationHandler(ctx, req, persistCheckpoint) - default: - return p.dedicatedCoresWithoutNUMABindingAllocationHandler(ctx, req, persistCheckpoint) + if qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) || + qosutil.AnnotationsIndicateNUMABinding(req.Annotations) { + return p.dedicatedCoresWithNUMAAffinityAllocationHandler(ctx, req, persistCheckpoint) } + + return p.dedicatedCoresWithoutNUMAAffinityAllocationHandler(ctx, req, persistCheckpoint) } -func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingAllocationHandler(_ context.Context, +func (p *DynamicPolicy) dedicatedCoresWithoutNUMAAffinityAllocationHandler(_ context.Context, _ *pluginapi.ResourceRequest, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { - // todo: support dedicated_cores without NUMA binding - return nil, fmt.Errorf("not support dedicated_cores without NUMA binding") + // todo: support dedicated_cores without NUMA affinity + return nil, fmt.Errorf("not support dedicated_cores without NUMA affinity") } -func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx context.Context, +func (p *DynamicPolicy) dedicatedCoresWithNUMAAffinityAllocationHandler(ctx context.Context, req *pluginapi.ResourceRequest, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { if req.ContainerType == pluginapi.ContainerType_SIDECAR { @@ -389,7 +387,7 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) } - result, err := p.allocateNumaBindingCPUs(podAggregatedRequest, req.Hint, machineState, req.Annotations) + result, err := p.allocateNumaAffinityCPUs(podAggregatedRequest, req.Hint, machineState, req.Annotations) if err != nil { general.ErrorS(err, "unable to allocate CPUs", "podNamespace", req.PodNamespace, @@ -447,7 +445,7 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+ "not equal to 1", len(req.Hint.Nodes)) } - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMAID(req.Hint.Nodes[0]) } // update pod entries directly. @@ -476,6 +474,17 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte req.PodNamespace, req.PodName, req.ContainerName, err) return nil, fmt.Errorf("PackResourceAllocationResponseByAllocationInfo failed with error: %v", err) } + + // if numa affinity is indicated, but numa binding is not, update numa allocation result + if qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) && !qosutil.AnnotationsIndicateNUMABinding(req.Annotations) { + err = p.numaAllocationReactor.UpdateAllocation(ctx, allocationInfo) + if err != nil { + general.Errorf("pod: %s/%s, container: %s UpdateNUMAAllocationResult failed with error: %v", + req.PodNamespace, req.PodName, req.ContainerName, err) + return nil, fmt.Errorf("UpdateNUMAAllocationResult failed with error: %v", err) + } + } + return resp, nil } @@ -535,7 +544,7 @@ func (p *DynamicPolicy) allocationSidecarHandler(_ context.Context, return resp, nil } -func (p *DynamicPolicy) sharedCoresWithNUMABindingAllocationHandler(ctx context.Context, +func (p *DynamicPolicy) sharedCoresWithNUMAAffinityAllocationHandler(ctx context.Context, req *pluginapi.ResourceRequest, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { if req.ContainerType == pluginapi.ContainerType_SIDECAR { @@ -543,8 +552,8 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingAllocationHandler(ctx context. } // there is no need to delete old allocationInfo for the container if it exists, - // allocateSharedNumaBindingCPUs will re-calculate pool size and avoid counting same entry twice - allocationInfo, err := p.allocateSharedNumaBindingCPUs(req, req.Hint, persistCheckpoint) + // allocateSharedNumaAffinityCPUs will re-calculate pool size and avoid counting same entry twice + allocationInfo, err := p.allocateSharedNumaAffinityCPUs(req, req.Hint, persistCheckpoint) if err != nil || allocationInfo == nil { general.ErrorS(err, "unable to allocate CPUs", "podNamespace", req.PodNamespace, @@ -560,7 +569,7 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingAllocationHandler(ctx context. "result", allocationInfo.AllocationResult.String()) // there is no need to call SetPodEntries and SetMachineState, - // since they are already done in doAndCheckPutAllocationInfo of allocateSharedNumaBindingCPUs + // since they are already done in doAndCheckPutAllocationInfo of allocateSharedNumaAffinityCPUs resp, err := cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, false, true, req) if err != nil { @@ -568,35 +577,52 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingAllocationHandler(ctx context. req.PodNamespace, req.PodName, req.ContainerName, err) return nil, fmt.Errorf("PackResourceAllocationResponseByAllocationInfo failed with error: %v", err) } + + // if numa affinity is indicated, but numa binding is not, update numa allocation result + if qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) && !qosutil.AnnotationsIndicateNUMABinding(req.Annotations) { + err = p.numaAllocationReactor.UpdateAllocation(ctx, allocationInfo) + if err != nil { + general.Errorf("pod: %s/%s, container: %s UpdateNUMAAllocationResult failed with error: %v", + req.PodNamespace, req.PodName, req.ContainerName, err) + return nil, fmt.Errorf("UpdateNUMAAllocationResult failed with error: %v", err) + } + } + return resp, nil } -func (p *DynamicPolicy) allocateNumaBindingCPUs(numCPUs int, hint *pluginapi.TopologyHint, +func (p *DynamicPolicy) allocateNumaAffinityCPUs(numCPUs int, hint *pluginapi.TopologyHint, machineState state.NUMANodeMap, reqAnnotations map[string]string, ) (machine.CPUSet, error) { + numaBinding := qosutil.AnnotationsIndicateNUMABinding(reqAnnotations) + numaAffinity := qosutil.AnnotationsIndicateNUMAAffinity(reqAnnotations) + numaExclusive := qosutil.AnnotationsIndicateNUMAExclusive(reqAnnotations) + if hint == nil { return machine.NewCPUSet(), fmt.Errorf("hint is nil") } else if len(hint.Nodes) == 0 { return machine.NewCPUSet(), fmt.Errorf("hint is empty") - } else if qosutil.AnnotationsIndicateNUMABinding(reqAnnotations) && - !qosutil.AnnotationsIndicateNUMAExclusive(reqAnnotations) && - len(hint.Nodes) > 1 { + } else if numaBinding && !numaExclusive && len(hint.Nodes) > 1 { return machine.NewCPUSet(), fmt.Errorf("NUMA not exclusive binding container has request larger than 1 NUMA") } result := machine.NewCPUSet() alignedAvailableCPUs := machine.CPUSet{} + alignedAvailableCPUsInNuma := map[int]machine.CPUSet{} for _, numaNode := range hint.Nodes { - alignedAvailableCPUs = alignedAvailableCPUs.Union(machineState[int(numaNode)].GetAvailableCPUSet(p.reservedCPUs)) + alignedAvailableCPUsInNuma[int(numaNode)] = machineState[int(numaNode)].GetAvailableCPUSet(p.reservedCPUs) + alignedAvailableCPUs = alignedAvailableCPUs.Union(alignedAvailableCPUsInNuma[int(numaNode)]) } var alignedCPUs machine.CPUSet - if qosutil.AnnotationsIndicateNUMAExclusive(reqAnnotations) { + if numaExclusive { // todo: currently we hack dedicated_cores with NUMA binding take up whole NUMA, // and we will modify strategy here if assumption above breaks. alignedCPUs = alignedAvailableCPUs.Clone() - } else { + } else if numaBinding { + // allocate cpu for numa binding pod or numa affinity pod + // if numa affinity pod, prefer to allocate cpus spread across NUMA nodes var err error alignedCPUs, err = calculator.TakeByTopology(p.machineInfo, alignedAvailableCPUs, numCPUs, true) if err != nil { @@ -607,6 +633,18 @@ func (p *DynamicPolicy) allocateNumaBindingCPUs(numCPUs int, hint *pluginapi.Top return machine.NewCPUSet(), fmt.Errorf("take cpu for NUMA not exclusive binding container failed with err: %v", err) } + } else if numaAffinity { + // allocate cpu for numa affinity pod, prefer to allocate cpus spread across NUMA nodes + var err error + alignedCPUs, err = calculator.TakeByTopologyWithSpreading(p.machineInfo, alignedAvailableCPUsInNuma, numCPUs, true) + if err != nil { + general.ErrorS(err, "take cpu for NUMA affinity container failed", + "hints", hint.Nodes, + "alignedAvailableCPUs", alignedAvailableCPUs.String()) + + return machine.NewCPUSet(), + fmt.Errorf("take cpu for NUMA affinity container failed with err: %v", err) + } } general.InfoS("allocate by hints", @@ -627,7 +665,7 @@ func (p *DynamicPolicy) allocateNumaBindingCPUs(numCPUs int, hint *pluginapi.Top return result, nil } -func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceRequest, +func (p *DynamicPolicy) allocateSharedNumaAffinityCPUs(req *pluginapi.ResourceRequest, hint *pluginapi.TopologyHint, persistCheckpoint bool, ) (*state.AllocationInfo, error) { if req == nil { @@ -637,7 +675,7 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq } else if len(hint.Nodes) == 0 { return nil, fmt.Errorf("hint is empty") } else if len(hint.Nodes) > 1 { - return nil, fmt.Errorf("shared_cores with numa_binding container has request larger than 1 NUMA") + return nil, fmt.Errorf("shared_cores with numa_affinity container has request larger than 1 NUMA") } reqInt, reqFloat64, err := util.GetQuantityFromResourceReq(req) @@ -645,7 +683,7 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) } - general.InfoS("allocateSharedNumaBindingCPUs by hints", + general.InfoS("allocateSharedNumaAffinityCPUs by hints", "hints", hint.Nodes, "numCPUsInt", reqInt, "numCPUsFloat64", reqFloat64) @@ -657,7 +695,7 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq InitTimestamp: time.Now().Format(util.QRMTimeFormat), RequestQuantity: reqFloat64, } - allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMAID(hint.Nodes[0]) if util.PodInplaceUpdateResizing(req) { originAllocationInfo := p.state.GetAllocationInfo(allocationInfo.PodUid, allocationInfo.ContainerName) @@ -842,14 +880,14 @@ func (p *DynamicPolicy) calcPoolResizeRequest(originAllocation, allocation *stat allocation.PodNamespace, allocation.PodName, allocation.ContainerName, originPodAggregatedRequest, podAggregatedRequest) } - // only support share cores inplace update resize now (include non-binding share cores and share cores with NUMA binding) - if allocation.CheckSharedNUMABinding() { + // only support share cores inplace update resize now + if allocation.CheckSharedNUMAAffinity() { // check snb numa migrate for inplace update resize - originTargetNumaID, err := state.GetSharedNUMABindingTargetNuma(originAllocation) + originTargetNumaID, err := state.GetSharedNUMAAffinityTargetNuma(originAllocation) if err != nil { return "", 0, 0, fmt.Errorf("failed to get origin target NUMA") } - targetNumaID, err = state.GetSharedNUMABindingTargetNuma(allocation) + targetNumaID, err = state.GetSharedNUMAAffinityTargetNuma(allocation) if err != nil { return "", 0, 0, fmt.Errorf("failed to get target NUMA") } @@ -864,9 +902,9 @@ func (p *DynamicPolicy) calcPoolResizeRequest(originAllocation, allocation *stat } // get snb pool name - poolName, err = allocation.GetSpecifiedNUMABindingPoolName() + poolName, err = allocation.GetSpecifiedNUMAPoolName() if err != nil { - return "", 0, 0, fmt.Errorf("GetSpecifiedNUMABindingPoolName for %s/%s/%s failed with error: %v", + return "", 0, 0, fmt.Errorf("GetSpecifiedNUMAPoolName for %s/%s/%s failed with error: %v", allocation.PodNamespace, allocation.PodName, allocation.ContainerName, err) } } @@ -920,7 +958,7 @@ func (p *DynamicPolicy) adjustAllocationEntries(persistCheckpoint bool) error { // adjustPoolsAndIsolatedEntries works for the following steps // 1. calculate pools and isolated cpusets according to expectant quantities -// 2. make reclaimed overlap with numa-binding +// 2. make reclaimed overlap with numa-affinity // 3. apply them to local state // 4. clean pools func (p *DynamicPolicy) adjustPoolsAndIsolatedEntries( @@ -952,13 +990,13 @@ func (p *DynamicPolicy) adjustPoolsAndIsolatedEntries( return fmt.Errorf("generatePoolsAndIsolation failed with error: %v", err) } - err = p.reclaimOverlapNUMABinding(poolsCPUSet, entries) + err = p.reclaimOverlapNUMAAffinity(poolsCPUSet, entries) if err != nil { - return fmt.Errorf("reclaimOverlapNUMABinding failed with error: %v", err) + return fmt.Errorf("reclaimOverlapNUMAAffinity failed with error: %v", err) } err = p.applyPoolsAndIsolatedInfo(poolsCPUSet, isolatedCPUSet, entries, - machineState, state.GetSharedBindingNUMAsFromQuantityMap(poolsQuantityMap), persistCheckpoint) + machineState, state.GetSharedAffinityNUMAsFromQuantityMap(poolsQuantityMap), persistCheckpoint) if err != nil { return fmt.Errorf("applyPoolsAndIsolatedInfo failed with error: %v", err) } @@ -971,10 +1009,10 @@ func (p *DynamicPolicy) adjustPoolsAndIsolatedEntries( return nil } -// reclaimOverlapNUMABinding unions calculated reclaim pool in empty NUMAs -// with the intersection of previous reclaim pool and non-ramp-up dedicated_cores numa_binding containers -func (p *DynamicPolicy) reclaimOverlapNUMABinding(poolsCPUSet map[string]machine.CPUSet, entries state.PodEntries) error { - // reclaimOverlapNUMABinding only works with cpu advisor and reclaim enabled +// reclaimOverlapNUMAAffinity unions calculated reclaim pool in empty NUMAs +// with the intersection of previous reclaim pool and non-ramp-up dedicated_cores numa_affinity containers +func (p *DynamicPolicy) reclaimOverlapNUMAAffinity(poolsCPUSet map[string]machine.CPUSet, entries state.PodEntries) error { + // reclaimOverlapNUMAAffinity only works with cpu advisor and reclaim enabled if !(p.enableCPUAdvisor && p.dynamicConfig.GetDynamicConfiguration().EnableReclaim) { return nil } @@ -993,10 +1031,10 @@ func (p *DynamicPolicy) reclaimOverlapNUMABinding(poolsCPUSet map[string]machine } for _, allocationInfo := range containerEntries { - if !(allocationInfo != nil && allocationInfo.CheckDedicatedNUMABinding() && allocationInfo.CheckMainContainer()) { + if !(allocationInfo != nil && allocationInfo.CheckDedicatedNUMAAffinity() && allocationInfo.CheckMainContainer()) { continue } else if allocationInfo.RampUp { - general.Infof("dedicated numa_binding pod: %s/%s container: %s is in ramp up, not to overlap reclaim pool with it", + general.Infof("dedicated numa_affinity pod: %s/%s container: %s is in ramp up, not to overlap reclaim pool with it", allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName) continue } @@ -1006,7 +1044,7 @@ func (p *DynamicPolicy) reclaimOverlapNUMABinding(poolsCPUSet map[string]machine } if poolsCPUSet[commonstate.PoolNameReclaim].IsEmpty() { - return fmt.Errorf("reclaim pool is empty after overlapping with dedicated_cores numa_binding containers") + return fmt.Errorf("reclaim pool is empty after overlapping with dedicated_cores numa_affinity containers") } general.Infof("nonOverlapReclaimCPUSet: %s, finalReclaimCPUSet: %s", nonOverlapReclaimCPUSet.String(), poolsCPUSet[commonstate.PoolNameReclaim].String()) @@ -1014,27 +1052,27 @@ func (p *DynamicPolicy) reclaimOverlapNUMABinding(poolsCPUSet map[string]machine } // applyPoolsAndIsolatedInfo generates the latest checkpoint by pools and isolated cpusets calculation results. -// 1. construct entries for isolated containers (probably be dedicated_cores not numa_binding ) +// 1. construct entries for isolated containers (probably be dedicated_cores not numa_affinity ) // 2. construct entries for all pools -// 3. construct entries for shared_cores, reclaimed_cores, numa_binding dedicated_cores containers +// 3. construct entries for shared_cores, reclaimed_cores, numa_affinity dedicated_cores containers func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine.CPUSet, isolatedCPUSet map[string]map[string]machine.CPUSet, curEntries state.PodEntries, - machineState state.NUMANodeMap, sharedBindingNUMAs sets.Int, persistCheckpoint bool, + machineState state.NUMANodeMap, sharedAffinityNUMAs sets.Int, persistCheckpoint bool, ) error { newPodEntries := make(state.PodEntries) unionDedicatedIsolatedCPUSet := machine.NewCPUSet() - // calculate NUMAs without actual numa_binding reclaimed pods - nonReclaimActualBindingNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding)) - // 1. construct entries for isolated containers (probably be dedicated_cores not numa_binding ) + // calculate NUMAs without actual numa_affinity reclaimed pods + nonReclaimActualAffinityNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMAAffinity)) + // 1. construct entries for isolated containers (probably be dedicated_cores not numa_affinity ) for podUID, containerEntries := range isolatedCPUSet { for containerName, isolatedCPUs := range containerEntries { allocationInfo := curEntries[podUID][containerName] if allocationInfo == nil { general.Errorf("isolated pod: %s, container: %s without entry in current checkpoint", podUID, containerName) continue - } else if !allocationInfo.CheckDedicated() || allocationInfo.CheckNUMABinding() { - general.Errorf("isolated pod: %s, container: %s isn't dedicated_cores without NUMA binding", podUID, containerName) + } else if !allocationInfo.CheckDedicated() || allocationInfo.CheckNUMAAffinity() { + general.Errorf("isolated pod: %s, container: %s isn't dedicated_cores without NUMA affinity", podUID, containerName) continue } @@ -1110,20 +1148,20 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine } } - // revise reclaim pool size to avoid reclaimed_cores and numa_binding containers - // in NUMAs without cpuset actual binding - err := p.reviseReclaimPool(newPodEntries, nonReclaimActualBindingNUMAs, unionDedicatedIsolatedCPUSet) + // revise reclaim pool size to avoid reclaimed_cores and numa_affinity containers + // in NUMAs without cpuset actual affinity + err := p.reviseReclaimPool(newPodEntries, nonReclaimActualAffinityNUMAs, unionDedicatedIsolatedCPUSet) if err != nil { return err } - sharedBindingNUMACPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(sharedBindingNUMAs.UnsortedList()...) - // rampUpCPUs include reclaim pool in NUMAs without NUMA_binding cpus + sharedAffinityNUMACPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(sharedAffinityNUMAs.UnsortedList()...) + // rampUpCPUs include reclaim pool in NUMAs without NUMA_affinity cpus rampUpCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, nil, state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicatedNUMABindingNUMAExclusive)). Difference(unionDedicatedIsolatedCPUSet). - Difference(sharedBindingNUMACPUs) + Difference(sharedAffinityNUMACPUs) forbiddenPoolsCPUs, err := state.GetUnitedPoolsCPUs(state.ForbiddenPools, newPodEntries) if err != nil { return fmt.Errorf("get forbidden united pools‘ cpus failed with error: %v", err) @@ -1136,7 +1174,7 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine rampUpCPUs.String(), err) } - // 3. construct entries for shared_cores, reclaimed_cores, numa_binding dedicated_cores containers + // 3. construct entries for shared_cores, reclaimed_cores, numa_affinity dedicated_cores containers for podUID, containerEntries := range curEntries { if containerEntries.IsPoolEntry() { continue @@ -1166,13 +1204,13 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine case apiconsts.PodAnnotationQoSLevelDedicatedCores: newPodEntries[podUID][containerName].OwnerPoolName = allocationInfo.GetPoolName() - // for numa_binding containers, we just clone checkpoint already exist - if allocationInfo.CheckDedicatedNUMABinding() { + // for numa_affinity containers, we just clone checkpoint already exist + if allocationInfo.CheckDedicatedNUMAAffinity() { continue containerLoop } - // dedicated_cores without numa_binding is not isolated, we will try to isolate it in next adjustment. - general.Warningf("pod: %s/%s, container: %s is dedicated_cores without numa_binding but not isolated, "+ + // dedicated_cores without numa_affinity is not isolated, we will try to isolate it in next adjustment. + general.Warningf("pod: %s/%s, container: %s is dedicated_cores without numa_affinity but not isolated, "+ "we put it into fallback pool: %s temporary", allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, rampUpCPUs.String()) @@ -1198,22 +1236,22 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine case apiconsts.PodAnnotationQoSLevelSharedCores: var ownerPoolName string - if allocationInfo.CheckSharedNUMABinding() { + if allocationInfo.CheckSharedNUMAAffinity() { ownerPoolName = allocationInfo.GetOwnerPoolName() if ownerPoolName == commonstate.EmptyOwnerPoolName { var err error - // why do we integrate GetOwnerPoolName + GetSpecifiedNUMABindingPoolName into GetPoolName for SharedNUMABinding containers? - // it's because we reply on GetSpecifiedPoolName (in GetPoolName) when calling CheckNUMABindingAntiAffinity, + // why do we integrate GetOwnerPoolName + GetSpecifiedNUMAAPoolName into GetPoolName for SharedNUMAAffinity containers? + // it's because we reply on GetSpecifiedPoolName (in GetPoolName) when calling CheckNUMAAffinityAntiAffinity, // At that time, NUMA hint for the candidate container isn't confirmed, so we can't implement NUMA hint aware logic in GetSpecifiedPoolName. - ownerPoolName, err = allocationInfo.GetSpecifiedNUMABindingPoolName() + ownerPoolName, err = allocationInfo.GetSpecifiedNUMAPoolName() if err != nil { - return fmt.Errorf("pod: %s/%s, container: %s is shared_cores with numa_binding, "+ - "GetSpecifiedNUMABindingPoolName failed with error: %v", + return fmt.Errorf("pod: %s/%s, container: %s is shared_cores with numa_affinity, "+ + "GetSpecifiedNUMAPoolName failed with error: %v", allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, err) } - } // else already in a numa_binding share pool or isolated + } // else already in a numa_affinity share pool or isolated } else { ownerPoolName = allocationInfo.GetPoolName() } @@ -1238,6 +1276,9 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, ownerPoolName, allocationInfo.AllocationResult.String(), poolEntry.AllocationResult.String()) + // Notice: we assume that SharedNUMAAffinity and SharedNUMABinding pod cannot run in the same pool, + // so need to distinguish SharedNUMAAffinity and SharedNUMABinding pool name here to enable + // differentiate SharedNUMAAffinity and SharedNUMABinding containers in GetFilteredPoolsCPUSetMap. if allocationInfo.CheckSharedNUMABinding() { poolEntry.QoSLevel = apiconsts.PodAnnotationQoSLevelSharedCores // set SharedNUMABinding declarations to pool entry containing SharedNUMABinding containers, @@ -1245,6 +1286,13 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine poolEntry.Annotations = general.MergeMap(poolEntry.Annotations, map[string]string{ apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, }) + } else if allocationInfo.CheckNonBindingNUMAAffinity() { + poolEntry.QoSLevel = apiconsts.PodAnnotationQoSLevelSharedCores + // set SharedNUMAAffinity declarations to pool entry containing SharedNUMAAffinity containers, + // in order to differentiate them from non-binding numa affinity share cores pools during GetFilteredPoolsCPUSetMap. + poolEntry.Annotations = general.MergeMap(poolEntry.Annotations, map[string]string{ + apiconsts.PodAnnotationCPUEnhancementNumaAffinity: apiconsts.PodAnnotationCPUEnhancementNumaAffinityEnable, + }) } newPodEntries[podUID][containerName].OwnerPoolName = ownerPoolName @@ -1259,7 +1307,7 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine return err } - err = p.updateReclaimAllocationResultByPoolEntry(newPodEntries[podUID][containerName], poolEntry, nonReclaimActualBindingNUMAs) + err = p.updateReclaimAllocationResultByPoolEntry(newPodEntries[podUID][containerName], poolEntry, nonReclaimActualAffinityNUMAs) if err != nil { return err } @@ -1287,7 +1335,7 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine return nil } -func (p *DynamicPolicy) generateNUMABindingPoolsCPUSetInPlace(poolsCPUSet map[string]machine.CPUSet, +func (p *DynamicPolicy) generateNUMAPoolsCPUSetInPlace(poolsCPUSet map[string]machine.CPUSet, poolsQuantityMap map[string]map[int]int, availableCPUs machine.CPUSet, ) (machine.CPUSet, error) { numaToPoolQuantityMap := make(map[int]map[string]int) @@ -1297,7 +1345,7 @@ func (p *DynamicPolicy) generateNUMABindingPoolsCPUSetInPlace(poolsCPUSet map[st for poolName, numaToQuantity := range poolsQuantityMap { for numaID, quantity := range numaToQuantity { if numaID == commonstate.FakedNUMAID { - // only deal with numa_binding pools + // only deal with numa_affinity pools continue } @@ -1323,7 +1371,7 @@ func (p *DynamicPolicy) generateNUMABindingPoolsCPUSetInPlace(poolsCPUSet map[st if numaPoolsTotalQuantity <= availableSize && enableReclaim && !p.state.GetAllowSharedCoresOverlapReclaimedCores() { leftCPUs, tErr = p.takeCPUsForPoolsInPlace(numaPoolsToQuantityMap, poolsCPUSet, numaAvailableCPUs) if tErr != nil { - return originalAvailableCPUSet, fmt.Errorf("allocate cpus for numa_binding pools in NUMA: %d failed with error: %v", + return originalAvailableCPUSet, fmt.Errorf("allocate cpus for numa_affinity pools in NUMA: %d failed with error: %v", numaID, tErr) } } else { @@ -1332,7 +1380,7 @@ func (p *DynamicPolicy) generateNUMABindingPoolsCPUSetInPlace(poolsCPUSet map[st leftCPUs, tErr = p.generateProportionalPoolsCPUSetInPlace(numaPoolsToQuantityMap, poolsCPUSet, numaAvailableCPUs) if tErr != nil { - return originalAvailableCPUSet, fmt.Errorf("generateProportionalPoolsCPUSetInPlace for numa_binding pools in NUMA: %d failed with error: %v", + return originalAvailableCPUSet, fmt.Errorf("generateProportionalPoolsCPUSetInPlace for numa_affinity pools in NUMA: %d failed with error: %v", numaID, tErr) } } @@ -1352,9 +1400,9 @@ func (p *DynamicPolicy) generatePoolsAndIsolation(poolsQuantityMap map[string]ma reclaimOverlapShareRatio map[string]float64) (poolsCPUSet map[string]machine.CPUSet, isolatedCPUSet map[string]map[string]machine.CPUSet, err error, ) { - poolsBindingNUMAs := sets.NewInt() + poolsAffinityNUMAs := sets.NewInt() poolsToSkip := make([]string, 0, len(poolsQuantityMap)) - nonBindingPoolsQuantityMap := make(map[string]int) + nonAffinityPoolsQuantityMap := make(map[string]int) for poolName, numaToQuantity := range poolsQuantityMap { if len(numaToQuantity) > 1 { err = fmt.Errorf("pool: %s cross NUMAs: %+v", poolName, numaToQuantity) @@ -1365,9 +1413,9 @@ func (p *DynamicPolicy) generatePoolsAndIsolation(poolsQuantityMap map[string]ma poolsToSkip = append(poolsToSkip, poolName) } else { if numaID != commonstate.FakedNUMAID { - poolsBindingNUMAs.Insert(numaID) + poolsAffinityNUMAs.Insert(numaID) } else { - nonBindingPoolsQuantityMap[poolName] = quantity + nonAffinityPoolsQuantityMap[poolName] = quantity } } } @@ -1397,78 +1445,78 @@ func (p *DynamicPolicy) generatePoolsAndIsolation(poolsQuantityMap map[string]ma poolsCPUSet = make(map[string]machine.CPUSet) var nbpErr error - availableCPUs, nbpErr = p.generateNUMABindingPoolsCPUSetInPlace(poolsCPUSet, poolsQuantityMap, availableCPUs) + availableCPUs, nbpErr = p.generateNUMAPoolsCPUSetInPlace(poolsCPUSet, poolsQuantityMap, availableCPUs) if nbpErr != nil { - err = fmt.Errorf("generateNUMABindingPoolsCPUSetInPlace failed with error: %v", nbpErr) + err = fmt.Errorf("generateNUMAPoolsCPUSetInPlace failed with error: %v", nbpErr) return } - nonBindingAvailableCPUs := machine.NewCPUSet() + nonAffinityAvailableCPUs := machine.NewCPUSet() for _, numaID := range p.machineInfo.CPUDetails.NUMANodes().ToSliceNoSortInt() { - if poolsBindingNUMAs.Has(numaID) { + if poolsAffinityNUMAs.Has(numaID) { continue } - nonBindingAvailableCPUs = nonBindingAvailableCPUs.Union(p.machineInfo.CPUDetails.CPUsInNUMANodes(numaID).Intersection(availableCPUs)) + nonAffinityAvailableCPUs = nonAffinityAvailableCPUs.Union(p.machineInfo.CPUDetails.CPUsInNUMANodes(numaID).Intersection(availableCPUs)) } - availableCPUs = availableCPUs.Difference(nonBindingAvailableCPUs) + availableCPUs = availableCPUs.Difference(nonAffinityAvailableCPUs) - nonBindingAvailableSize := nonBindingAvailableCPUs.Size() - nonBindingPoolsTotalQuantity := general.SumUpMapValues(nonBindingPoolsQuantityMap) + nonAffinityAvailableSize := nonAffinityAvailableCPUs.Size() + nonAffinityPoolsTotalQuantity := general.SumUpMapValues(nonAffinityPoolsQuantityMap) isolatedCPUSet = make(map[string]map[string]machine.CPUSet) isolatedTotalQuantity := general.SumUpMultipleMapValues(isolatedQuantityMap) - general.Infof("isolatedTotalQuantity: %d, nonBindingPoolsTotalQuantity: %d, nonBindingAvailableSize: %d", - isolatedTotalQuantity, nonBindingPoolsTotalQuantity, nonBindingAvailableSize) + general.Infof("isolatedTotalQuantity: %d, nonAffinityPoolsTotalQuantity: %d, nonAffinityAvailableSize: %d", + isolatedTotalQuantity, nonAffinityPoolsTotalQuantity, nonAffinityAvailableSize) var tErr error - if nonBindingPoolsTotalQuantity+isolatedTotalQuantity <= nonBindingAvailableSize { + if nonAffinityPoolsTotalQuantity+isolatedTotalQuantity <= nonAffinityAvailableSize { general.Infof("all pools and isolated containers could be allocated") - isolatedCPUSet, nonBindingAvailableCPUs, tErr = p.takeCPUsForContainers(isolatedQuantityMap, nonBindingAvailableCPUs) + isolatedCPUSet, nonAffinityAvailableCPUs, tErr = p.takeCPUsForContainers(isolatedQuantityMap, nonAffinityAvailableCPUs) if tErr != nil { err = fmt.Errorf("allocate isolated cpus for dedicated_cores failed with error: %v", tErr) return } if !p.state.GetAllowSharedCoresOverlapReclaimedCores() { - nonBindingAvailableCPUs, tErr = p.takeCPUsForPoolsInPlace(nonBindingPoolsQuantityMap, poolsCPUSet, nonBindingAvailableCPUs) + nonAffinityAvailableCPUs, tErr = p.takeCPUsForPoolsInPlace(nonAffinityPoolsQuantityMap, poolsCPUSet, nonAffinityAvailableCPUs) if tErr != nil { err = fmt.Errorf("allocate cpus for pools failed with error: %v", tErr) return } } else { - general.Infof("allowSharedCoresOverlapReclaimedCores is true, take all nonBindingAvailableCPUs for pools") - nonBindingAvailableCPUs, tErr = p.generateProportionalPoolsCPUSetInPlace(nonBindingPoolsQuantityMap, poolsCPUSet, nonBindingAvailableCPUs) + general.Infof("allowSharedCoresOverlapReclaimedCores is true, take all nonAffinityAvailableCPUs for pools") + nonAffinityAvailableCPUs, tErr = p.generateProportionalPoolsCPUSetInPlace(nonAffinityPoolsQuantityMap, poolsCPUSet, nonAffinityAvailableCPUs) if tErr != nil { err = fmt.Errorf("generateProportionalPoolsCPUSetInPlace pools failed with error: %v", tErr) return } } - } else if nonBindingPoolsTotalQuantity <= nonBindingAvailableSize { + } else if nonAffinityPoolsTotalQuantity <= nonAffinityAvailableSize { general.Infof("all pools could be allocated, all isolated containers would be put to pools") if !p.state.GetAllowSharedCoresOverlapReclaimedCores() { - nonBindingAvailableCPUs, tErr = p.takeCPUsForPoolsInPlace(nonBindingPoolsQuantityMap, poolsCPUSet, nonBindingAvailableCPUs) + nonAffinityAvailableCPUs, tErr = p.takeCPUsForPoolsInPlace(nonAffinityPoolsQuantityMap, poolsCPUSet, nonAffinityAvailableCPUs) if tErr != nil { err = fmt.Errorf("allocate cpus for pools failed with error: %v", tErr) return } } else { - general.Infof("allowSharedCoresOverlapReclaimedCores is true, take all nonBindingAvailableCPUs for pools") - nonBindingAvailableCPUs, tErr = p.generateProportionalPoolsCPUSetInPlace(nonBindingPoolsQuantityMap, poolsCPUSet, nonBindingAvailableCPUs) + general.Infof("allowSharedCoresOverlapReclaimedCores is true, take all nonAffinityAvailableCPUs for pools") + nonAffinityAvailableCPUs, tErr = p.generateProportionalPoolsCPUSetInPlace(nonAffinityPoolsQuantityMap, poolsCPUSet, nonAffinityAvailableCPUs) if tErr != nil { err = fmt.Errorf("generateProportionalPoolsCPUSetInPlace pools failed with error: %v", tErr) return } } - } else if nonBindingPoolsTotalQuantity > 0 { + } else if nonAffinityPoolsTotalQuantity > 0 { general.Infof("can't allocate for all pools") - nonBindingAvailableCPUs, tErr = p.generateProportionalPoolsCPUSetInPlace(nonBindingPoolsQuantityMap, poolsCPUSet, nonBindingAvailableCPUs) + nonAffinityAvailableCPUs, tErr = p.generateProportionalPoolsCPUSetInPlace(nonAffinityPoolsQuantityMap, poolsCPUSet, nonAffinityAvailableCPUs) if tErr != nil { err = fmt.Errorf("generateProportionalPoolsCPUSetInPlace pools failed with error: %v", tErr) @@ -1476,7 +1524,7 @@ func (p *DynamicPolicy) generatePoolsAndIsolation(poolsQuantityMap map[string]ma } } - availableCPUs = availableCPUs.Union(nonBindingAvailableCPUs) + availableCPUs = availableCPUs.Union(nonAffinityAvailableCPUs) // deal with reserve pool if poolsCPUSet[commonstate.PoolNameReserve].IsEmpty() { @@ -1496,7 +1544,7 @@ func (p *DynamicPolicy) generatePoolsAndIsolation(poolsQuantityMap map[string]ma enableReclaim := p.dynamicConfig.GetDynamicConfiguration().EnableReclaim if !enableReclaim && poolsCPUSet[commonstate.PoolNameReclaim].Size() > p.reservedReclaimedCPUsSize { poolsCPUSet[commonstate.PoolNameReclaim] = p.apportionReclaimedPool( - poolsCPUSet, poolsCPUSet[commonstate.PoolNameReclaim].Clone(), nonBindingPoolsQuantityMap) + poolsCPUSet, poolsCPUSet[commonstate.PoolNameReclaim].Clone(), nonAffinityPoolsQuantityMap) general.Infof("apportionReclaimedPool finished, current %s pool: %s", commonstate.PoolNameReclaim, poolsCPUSet[commonstate.PoolNameReclaim].String()) } @@ -1625,16 +1673,16 @@ func getProportionalPoolsQuantityMap(originalPoolsQuantityMap map[string]int, av return proportionalPoolsQuantityMap, totalProportionalPoolsQuantity } -// apportionReclaimedPool tries to allocate reclaimed cores to none-binding && none-reclaimed pools. +// apportionReclaimedPool tries to allocate reclaimed cores to none-affinity && none-reclaimed pools. // if we disable reclaim on current node, this could be used a down-grade strategy // to disable reclaimed workloads in emergency -func (p *DynamicPolicy) apportionReclaimedPool(poolsCPUSet map[string]machine.CPUSet, reclaimedCPUs machine.CPUSet, nonBindingPoolsQuantityMap map[string]int) machine.CPUSet { +func (p *DynamicPolicy) apportionReclaimedPool(poolsCPUSet map[string]machine.CPUSet, reclaimedCPUs machine.CPUSet, nonAffinityPoolsQuantityMap map[string]int) machine.CPUSet { totalSize := 0 for poolName, poolCPUs := range poolsCPUSet { if state.ResidentPools.Has(poolName) { continue - } else if _, found := nonBindingPoolsQuantityMap[poolName]; !found { - // numa-binding && none-reclaimed pools already handled in generateNUMABindingPoolsCPUSetInPlace + } else if _, found := nonAffinityPoolsQuantityMap[poolName]; !found { + // numa-affinity && none-reclaimed pools already handled in generateNUMAAffinityPoolsCPUSetInPlace continue } totalSize += poolCPUs.Size() @@ -1648,8 +1696,8 @@ func (p *DynamicPolicy) apportionReclaimedPool(poolsCPUSet map[string]machine.CP for poolName, poolCPUs := range poolsCPUSet { if state.ResidentPools.Has(poolName) { continue - } else if _, found := nonBindingPoolsQuantityMap[poolName]; !found { - // numa-binding && none-reclaimed pools already handled in generateNUMABindingPoolsCPUSetInPlace + } else if _, found := nonAffinityPoolsQuantityMap[poolName]; !found { + // numa-affinity && none-reclaimed pools already handled in generateNUMAAffinityPoolsCPUSetInPlace continue } @@ -1964,12 +2012,12 @@ func (p *DynamicPolicy) getSystemPoolCPUSetAndNumaAwareAssignments(podEntries st // if pool set is empty, try to get default cpuset if poolCPUSet.IsEmpty() { - // if the pod is numa binding, get the default cpuset from machine state - if allocationInfo.CheckNUMABinding() { + // if the pod is numa affinity, get the default cpuset from machine state + if allocationInfo.CheckNUMAAffinity() { poolCPUSet = p.state.GetMachineState().GetAvailableCPUSet(p.reservedCPUs) } - // if the default cpuset is empty or no numa binding, use all cpuset as default cpuset + // if the default cpuset is empty or no numa affinity, use all cpuset as default cpuset if poolCPUSet.IsEmpty() { poolCPUSet = p.machineInfo.CPUDetails.CPUs() } @@ -2009,14 +2057,14 @@ func (p *DynamicPolicy) getAllocationPoolEntry(allocationInfo *state.AllocationI } func (p *DynamicPolicy) updateReclaimAllocationResultByPoolEntry(allocationInfo *state.AllocationInfo, - poolEntry *state.AllocationInfo, nonReclaimActualBindingNUMAs machine.CPUSet, + poolEntry *state.AllocationInfo, nonReclaimActualAffinityNUMAs machine.CPUSet, ) error { - numaID, err := allocationInfo.GetSpecifiedNUMABindingNUMAID() + numaID, err := allocationInfo.GetSpecifiedNUMAID() if err != nil { return err } - getActualNUMABindingResult := func(topologyAwareAssignments map[int]machine.CPUSet) (machine.CPUSet, map[int]machine.CPUSet, error) { + getActualNUMAAffinityResult := func(topologyAwareAssignments map[int]machine.CPUSet) (machine.CPUSet, map[int]machine.CPUSet, error) { var ( actualTopologyAwareAssignments map[int]machine.CPUSet actualAllocationResult machine.CPUSet @@ -2024,7 +2072,7 @@ func (p *DynamicPolicy) updateReclaimAllocationResultByPoolEntry(allocationInfo if numaID != commonstate.FakedNUMAID { cpuSet, ok := topologyAwareAssignments[numaID] if !ok { - return machine.CPUSet{}, nil, fmt.Errorf("pod: %s/%s container: %s is reclaimed_cores with numa_binding specified numa: %d not found in topologyAwareAssignments: %v", + return machine.CPUSet{}, nil, fmt.Errorf("pod: %s/%s container: %s is reclaimed_cores with numa_affinity specified numa: %d not found in topologyAwareAssignments: %v", allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, numaID, topologyAwareAssignments) } actualAllocationResult = cpuSet.Clone() @@ -2036,7 +2084,7 @@ func (p *DynamicPolicy) updateReclaimAllocationResultByPoolEntry(allocationInfo numaSet := machine.NewCPUSet() newTopologyAwareAssignments := make(map[int]machine.CPUSet) for numaNode, cpuSet := range topologyAwareAssignments { - if !nonReclaimActualBindingNUMAs.Contains(numaNode) { + if !nonReclaimActualAffinityNUMAs.Contains(numaNode) { continue } @@ -2052,14 +2100,14 @@ func (p *DynamicPolicy) updateReclaimAllocationResultByPoolEntry(allocationInfo return actualAllocationResult, actualTopologyAwareAssignments, nil } - actualAllocationResult, actualTopologyAwareAssignments, err := getActualNUMABindingResult(machine.DeepcopyCPUAssignment(poolEntry.TopologyAwareAssignments)) + actualAllocationResult, actualTopologyAwareAssignments, err := getActualNUMAAffinityResult(machine.DeepcopyCPUAssignment(poolEntry.TopologyAwareAssignments)) if err != nil { - return fmt.Errorf("get actual NUMA binding result: %v", err) + return fmt.Errorf("get actual NUMA affinity result: %v", err) } - actualOriginalAllocationResult, actualOriginalTopologyAwareAssignments, err := getActualNUMABindingResult(machine.DeepcopyCPUAssignment(poolEntry.OriginalTopologyAwareAssignments)) + actualOriginalAllocationResult, actualOriginalTopologyAwareAssignments, err := getActualNUMAAffinityResult(machine.DeepcopyCPUAssignment(poolEntry.OriginalTopologyAwareAssignments)) if err != nil { - return fmt.Errorf("get original actual NUMA binding result: %v", err) + return fmt.Errorf("get original actual NUMA affinity result: %v", err) } general.Infof("put pod: %s/%s container: %s to pool: %s, set its allocation result from %s to %s", diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go index eec3541133..7ae2c0d729 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go @@ -46,7 +46,7 @@ import ( ) const ( - metricsNamePodTotalRequestLargerThanBindingCPUSet = "pod_total_request_larger_than_cpu_set" + metricsNamePodTotalRequestLargerThanAllocatedCPUSet = "pod_total_request_larger_than_cpu_set" ) // checkCPUSet emit errors if the memory allocation falls into unexpected results @@ -305,14 +305,14 @@ func (p *DynamicPolicy) emitExceededMetrics( continue } - // check if the pod exceeds the binding cpuset by more than 1 core + // check if the pod exceeds the affinity cpuset by more than 1 core outOfTolerance := int64(cs.cpuset.Size()+1) < (cs.totalMilliCPURequest / 1000) general.Errorf("pod: %s/%s, ownerPoolName: %s, qosLevel: %s, cpuset: %s, size %d, exceeds total cpu request: %.3f, exceeded ratio: %.3f, outOfTolerance: %v", pod.Namespace, pod.Name, mainContainerEntry.OwnerPoolName, mainContainerEntry.QoSLevel, cpuset, cs.cpuset.Size(), float64(cs.totalMilliCPURequest)/1000, exceededRatio, outOfTolerance) - _ = p.emitter.StoreFloat64(metricsNamePodTotalRequestLargerThanBindingCPUSet, exceededRatio, metrics.MetricTypeNameRaw, []metrics.MetricTag{ + _ = p.emitter.StoreFloat64(metricsNamePodTotalRequestLargerThanAllocatedCPUSet, exceededRatio, metrics.MetricTypeNameRaw, []metrics.MetricTag{ {Key: "podNamespace", Val: pod.Namespace}, {Key: "podName", Val: pod.Name}, {Key: "qosLevel", Val: mainContainerEntry.QoSLevel}, diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go index 3c92c89626..b729826b5e 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go @@ -22,10 +22,10 @@ import ( "sort" "time" + apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" v1 "k8s.io/api/core/v1" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" - apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" @@ -45,13 +45,13 @@ func (p *DynamicPolicy) sharedCoresHintHandler(ctx context.Context, return nil, fmt.Errorf("got nil request") } - if qosutil.AnnotationsIndicateNUMABinding(req.Annotations) { - return p.sharedCoresWithNUMABindingHintHandler(ctx, req) + if qosutil.AnnotationsIndicateNUMABinding(req.Annotations) || qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) { + return p.sharedCoresWithNUMAAffinityHintHandler(ctx, req) } - // TODO: support sidecar follow main container for non-binding share cores in future + // TODO: support sidecar follow main container for non-affinity share cores in future if req.ContainerType == pluginapi.ContainerType_MAIN { - ok, err := p.checkNonBindingShareCoresCpuResource(req) + ok, err := p.checkNonCPUAffinityShareCoresCpuResource(req) if err != nil { general.Errorf("failed to check share cores cpu resource for pod: %s/%s, container: %s", req.PodNamespace, req.PodName, req.ContainerName) @@ -59,11 +59,12 @@ func (p *DynamicPolicy) sharedCoresHintHandler(ctx context.Context, } if !ok { - _ = p.emitter.StoreInt64(util.MetricNameShareCoresNoEnoughResourceFailed, 1, metrics.MetricTypeNameCount, metrics.ConvertMapToTags(map[string]string{ + _ = p.emitter.StoreInt64(util.MetricNameNoEnoughNUMAResourceFailed, 1, metrics.MetricTypeNameCount, metrics.ConvertMapToTags(map[string]string{ "resource": v1.ResourceCPU.String(), "podNamespace": req.PodNamespace, "podName": req.PodName, "containerName": req.ContainerName, + "qosLevel": req.Annotations[apiconsts.PodAnnotationQoSLevelKey], })...) return nil, cpuutil.ErrNoAvailableCPUHints } @@ -86,9 +87,9 @@ func (p *DynamicPolicy) reclaimedCoresHintHandler(ctx context.Context, return nil, fmt.Errorf("not support inplace update resize for reclaimed cores") } - if qosutil.AnnotationsIndicateNUMABinding(req.Annotations) && + if (qosutil.AnnotationsIndicateNUMABinding(req.GetAnnotations()) || qosutil.AnnotationsIndicateNUMAAffinity(req.GetAnnotations())) && p.enableReclaimNUMABinding { - return p.reclaimedCoresWithNUMABindingHintHandler(ctx, req) + return p.reclaimedCoresWithNUMAAffinityHintHandler(ctx, req) } return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), @@ -108,15 +109,14 @@ func (p *DynamicPolicy) dedicatedCoresHintHandler(ctx context.Context, return nil, fmt.Errorf("not support inplace update resize for dedicated cores") } - switch req.Annotations[apiconsts.PodAnnotationMemoryEnhancementNumaBinding] { - case apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable: - return p.dedicatedCoresWithNUMABindingHintHandler(ctx, req) - default: - return p.dedicatedCoresWithoutNUMABindingHintHandler(ctx, req) + if qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) || + qosutil.AnnotationsIndicateNUMABinding(req.Annotations) { + return p.dedicatedCoresWithNUMAAffinityHintHandler(ctx, req) } + return p.dedicatedCoresWithoutNUMAAffinityHintHandler(ctx, req) } -func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Context, +func (p *DynamicPolicy) dedicatedCoresWithNUMAAffinityHintHandler(_ context.Context, req *pluginapi.ResourceRequest, ) (*pluginapi.ResourceHintsResponse, error) { // currently, we set cpuset of sidecar to the cpuset of its main container, @@ -184,11 +184,11 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Conte return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), hints) } -func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingHintHandler(_ context.Context, +func (p *DynamicPolicy) dedicatedCoresWithoutNUMAAffinityHintHandler(_ context.Context, _ *pluginapi.ResourceRequest, ) (*pluginapi.ResourceHintsResponse, error) { - // todo: support dedicated_cores without NUMA binding - return nil, fmt.Errorf("not support dedicated_cores without NUMA binding") + // todo: support dedicated_cores without NUMA affinity + return nil, fmt.Errorf("not support dedicated_cores without NUMA affinity") } // calculateHints is a helper function to calculate the topology hints @@ -215,6 +215,7 @@ func (p *DynamicPolicy) calculateHints( numaBinding := qosutil.AnnotationsIndicateNUMABinding(req.Annotations) numaExclusive := qosutil.AnnotationsIndicateNUMAExclusive(req.Annotations) + numaAffinity := qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) // because it's hard to control memory allocation accurately, // we only support numa_binding but not exclusive container with request smaller than 1 NUMA @@ -229,7 +230,7 @@ func (p *DynamicPolicy) calculateHints( numaToAvailableCPUCount := make(map[int]int, len(numaNodes)) - availableNUMAs := p.filterNUMANodesByNonBinding(request, podEntries, machineState, req) + availableNUMAs := p.filterNUMANodesByCPUAffinity(request, podEntries, machineState, req) for _, nodeID := range numaNodes { if machineState[nodeID] == nil { general.Warningf("NUMA: %d has nil state", nodeID) @@ -241,9 +242,10 @@ func (p *DynamicPolicy) calculateHints( numaToAvailableCPUCount[nodeID] = 0 general.Warningf("numa_exclusive container skip NUMA: %d allocated: %d", nodeID, machineState[nodeID].AllocatedCPUSet.Size()) - } else if numaBinding && !availableNUMAs.Contains(nodeID) { + } else if (numaAffinity || numaBinding) && !availableNUMAs.Contains(nodeID) { + // if numaAffinity or numaBinding is true, we should not filter out the numa node numaToAvailableCPUCount[nodeID] = 0 - general.Warningf("numa_binding container skip NUMA: %d, allocated: %d", + general.Warningf("numa_affinity container skip NUMA: %d, allocated: %d", nodeID, machineState[nodeID].AllocatedCPUSet.Size()) } else { numaToAvailableCPUCount[nodeID] = machineState[nodeID].GetAvailableCPUSet(p.reservedCPUs).Size() @@ -322,7 +324,7 @@ func (p *DynamicPolicy) calculateHints( Hints: availableNumaHints, } - err = p.dedicatedCoresNUMABindingHintOptimizer.OptimizeHints( + err = p.dedicatedCoresNUMAAffinityHintOptimizer.OptimizeHints( hintoptimizer.Request{ ResourceRequest: req, CPURequest: request, @@ -336,7 +338,7 @@ func (p *DynamicPolicy) calculateHints( }, nil } -func (p *DynamicPolicy) reclaimedCoresWithNUMABindingHintHandler(_ context.Context, +func (p *DynamicPolicy) reclaimedCoresWithNUMAAffinityHintHandler(_ context.Context, req *pluginapi.ResourceRequest, ) (*pluginapi.ResourceHintsResponse, error) { // currently, we set cpuset of sidecar to the cpuset of its main container, @@ -375,9 +377,9 @@ func (p *DynamicPolicy) reclaimedCoresWithNUMABindingHintHandler(_ context.Conte if hints == nil { var calculateErr error - hints, calculateErr = p.calculateHintsForNUMABindingReclaimedCores(request, podEntries, machineState, numaHeadroomState) + hints, calculateErr = p.calculateHintsForNUMAAffinityReclaimedCores(request, podEntries, machineState, numaHeadroomState) if calculateErr != nil { - return nil, fmt.Errorf("calculateHintsForNUMABindingReclaimedCores failed with error: %v", calculateErr) + return nil, fmt.Errorf("calculateHintsForNUMAAffinityReclaimedCores failed with error: %v", calculateErr) } } @@ -387,25 +389,25 @@ func (p *DynamicPolicy) reclaimedCoresWithNUMABindingHintHandler(_ context.Conte return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), hints) } -func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqFloat float64, podEntries state.PodEntries, +func (p *DynamicPolicy) calculateHintsForNUMAAffinityReclaimedCores(reqFloat float64, podEntries state.PodEntries, machineState state.NUMANodeMap, numaHeadroomState map[int]float64, ) (map[string]*pluginapi.ListOfTopologyHints, error) { // Determine the set of NUMA nodes currently hosting non-RNB pods - nonActualBindingNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding)) + nonActualAffinityNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMAAffinity)) // Calculate the total requested resources for non-RNB reclaimed pods - nonActualBindingReclaimedRequestedQuantity := state.GetRequestedQuantityFromPodEntries(podEntries, - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMABinding), + nonActualAffinityReclaimedRequestedQuantity := state.GetRequestedQuantityFromPodEntries(podEntries, + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedNonActualNUMAAffinity), p.getContainerRequestedCores) // Compute the total available headroom for non-RNB NUMA nodes - nonActualBindingReclaimedNUMAHeadroom := state.GetReclaimedNUMAHeadroom(numaHeadroomState, nonActualBindingNUMAs) + nonActualAffinityReclaimedNUMAHeadroom := state.GetReclaimedNUMAHeadroom(numaHeadroomState, nonActualAffinityNUMAs) - // Identify candidate NUMA nodes for RNB (Reclaimed NUMA Binding) cores + // Identify candidate NUMA nodes for RNB (Reclaimed NUMA Affinity) cores // This includes both RNB NUMA nodes and NUMA nodes that can shrink from the non-RNB set - candidateNUMANodes := p.filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonActualBindingReclaimedRequestedQuantity, - nonActualBindingReclaimedNUMAHeadroom, nonActualBindingNUMAs, machineState, numaHeadroomState) + candidateNUMANodes := p.filterNUMANodesByNonAffinityReclaimedRequestedQuantity(nonActualAffinityReclaimedRequestedQuantity, + nonActualAffinityReclaimedNUMAHeadroom, nonActualAffinityNUMAs, machineState, numaHeadroomState) candidateLeft := p.calculateNUMANodesLeft(candidateNUMANodes, machineState, numaHeadroomState, reqFloat) @@ -414,14 +416,14 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqFloat floa p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateLeft) // If no valid hints are generated and this is not a single-NUMA scenario, return an error - if len(hints.Hints) == 0 && !(p.metaServer.NumNUMANodes == 1 && nonActualBindingNUMAs.Size() > 0) { + if len(hints.Hints) == 0 && !(p.metaServer.NumNUMANodes == 1 && nonActualAffinityNUMAs.Size() > 0) { return nil, cpuutil.ErrNoAvailableCPUHints } general.InfoS("calculate numa hints for reclaimed cores success", - "nonActualBindingNUMAs", nonActualBindingNUMAs.String(), - "nonActualBindingReclaimedRequestedQuantity", nonActualBindingReclaimedRequestedQuantity, - "nonActualBindingReclaimedNUMAHeadroom", nonActualBindingReclaimedNUMAHeadroom, + "nonActualAffinityNUMAs", nonActualAffinityNUMAs.String(), + "nonActualAffinityReclaimedRequestedQuantity", nonActualAffinityReclaimedRequestedQuantity, + "nonActualAffinityReclaimedNUMAHeadroom", nonActualAffinityReclaimedNUMAHeadroom, "numaHeadroomState", numaHeadroomState, "candidateNUMANodes", candidateNUMANodes, "candidateLeft", candidateLeft) @@ -431,7 +433,7 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqFloat floa }, nil } -func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context, +func (p *DynamicPolicy) sharedCoresWithNUMAAffinityHintHandler(_ context.Context, req *pluginapi.ResourceRequest, ) (*pluginapi.ResourceHintsResponse, error) { // currently, we set cpuset of sidecar to the cpuset of its main container, @@ -475,7 +477,7 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context, if numaSet.Size() != 1 { general.Errorf("pod: %s/%s, container: %s is snb, but its numa set size is %d", req.PodNamespace, req.PodName, req.ContainerName, numaSet.Size()) - return nil, fmt.Errorf("snb port not support cross numa") + return nil, fmt.Errorf("snb pod not support cross numa") } nodeID := numaSet.ToSliceInt()[0] availableCPUQuantity := machineState[nodeID].GetAvailableCPUQuantity(p.reservedCPUs) @@ -491,11 +493,11 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context, general.Infof("pod: %s/%s, container: %s request inplace update resize and no enough resource in current NUMA, try to migrate it to new NUMA", req.PodNamespace, req.PodName, req.ContainerName) var calculateErr error - hints, calculateErr = p.calculateHintsForNUMABindingSharedCores(request, podEntries, machineState, req) + hints, calculateErr = p.calculateHintsForNUMAAffinitySharedCores(request, podEntries, machineState, req) if calculateErr != nil { general.Errorf("pod: %s/%s, container: %s request inplace update resize and no enough resource in current NUMA, failed to migrate it to new NUMA", req.PodNamespace, req.PodName, req.ContainerName) - return nil, fmt.Errorf("calculateHintsForNUMABindingSharedCores failed in inplace update resize mode with error: %v", calculateErr) + return nil, fmt.Errorf("calculateHintsForNUMAAffinitySharedCores failed in inplace update resize mode with error: %v", calculateErr) } } else { general.Errorf("pod: %s/%s, container: %s request inplace update resize, but no enough resource for it in current NUMA", @@ -510,9 +512,9 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context, } } else { var calculateErr error - hints, calculateErr = p.calculateHintsForNUMABindingSharedCores(request, podEntries, machineState, req) + hints, calculateErr = p.calculateHintsForNUMAAffinitySharedCores(request, podEntries, machineState, req) if calculateErr != nil { - return nil, fmt.Errorf("calculateHintsForNUMABindingSharedCores failed with error: %v", calculateErr) + return nil, fmt.Errorf("calculateHintsForNUMAAffinitySharedCores failed with error: %v", calculateErr) } } @@ -534,7 +536,7 @@ func (p *DynamicPolicy) clearContainerAndRegenerateMachineState(podEntries state return machineState, nil } -func (p *DynamicPolicy) filterNUMANodesByNonBinding( +func (p *DynamicPolicy) filterNUMANodesByCPUAffinity( request float64, podEntries state.PodEntries, machineState state.NUMANodeMap, req *pluginapi.ResourceRequest, @@ -543,49 +545,60 @@ func (p *DynamicPolicy) filterNUMANodesByNonBinding( return machine.NewCPUSet() } - nonBindingNUMAsCPUQuantity := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, nil, - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMABinding)).Size() - nonBindingNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMABinding)) - nonBindingSharedRequestedQuantity := state.GetNonBindingSharedRequestedQuantityFromPodEntries(podEntries, nil, p.getContainerRequestedCores) - - return p.filterNUMANodesByNonBindingSharedRequestedQuantity( - request, nonBindingSharedRequestedQuantity, nonBindingNUMAsCPUQuantity, nonBindingNUMAs, machineState, - machineState.GetFilteredNUMASetWithAnnotations(state.WrapAllocationMetaFilterWithAnnotations(commonstate.CheckNUMABindingAntiAffinity), req.Annotations).ToSliceInt()) -} - -func (p *DynamicPolicy) filterNUMANodesByNonBindingSharedRequestedQuantity( - request float64, - nonBindingSharedRequestedQuantity, nonBindingNUMAsCPUQuantity int, - nonBindingNUMAs machine.CPUSet, - machineState state.NUMANodeMap, numaNodes []int, -) machine.CPUSet { - filteredNUMANodes := make([]int, 0, len(numaNodes)) + // 1. filter cpu quantity on non-affinity numa nodes + nonCPUAffinityNUMAsCPUQuantity := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, nil, + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMAAffinity)).Size() + // 2. filter cpu quantity requested by non-cpu-affinity pod + nonCPUAffinitySharedRequestedQuantity := state.GetNonCPUAffinitySharedRequestedQuantityFromPodEntries(podEntries, nil, p.getContainerRequestedCores) - for _, nodeID := range numaNodes { - allocatableCPUQuantity := machineState[nodeID].GetAvailableCPUQuantity(p.reservedCPUs) - if nonBindingNUMAs.Contains(nodeID) { - // take this non-binding NUMA for candidate shared_cores with numa_binding, - // won't cause non-binding shared_cores in short supply - if cpuutil.CPUIsSufficient(float64(nonBindingSharedRequestedQuantity), float64(nonBindingNUMAsCPUQuantity)-allocatableCPUQuantity) { + filteredNUMANodes := make([]int, 0, len(machineState)) + // 3.1. if pod with memory-binding enhancement, filter binding numa for pod with memory-binding enhancement + if qosutil.AnnotationsIndicateNUMABinding(req.Annotations) { + bindingNUMAs := machineState.GetMatchedNUMASetWithAnnotations(state.WrapAllocationMetaFilterWithAnnotations(commonstate.CheckNUMABindingWithAffinity), req.Annotations).ToSliceInt() + for _, nodeID := range bindingNUMAs { + allocatableCPUQuantity := machineState[nodeID].GetAvailableCPUQuantity(p.reservedCPUs) + if cpuutil.CPUIsSufficient(request, allocatableCPUQuantity) { filteredNUMANodes = append(filteredNUMANodes, nodeID) - } else { - general.Infof("filter out NUMA: %d since taking it will cause non-binding shared_cores in short supply;"+ - " nonBindingNUMAsCPUQuantity: %d, targetNUMAAllocatableCPUQuantity: %f, nonBindingSharedRequestedQuantity: %d, request: %f", - nodeID, nonBindingNUMAsCPUQuantity, allocatableCPUQuantity, nonBindingSharedRequestedQuantity, request) } - } else if cpuutil.CPUIsSufficient(request, allocatableCPUQuantity) { + } + } else if qosutil.AnnotationsIndicateNUMAAffinity(req.Annotations) { + // 3.2. if pod with cpu affinity enhancement, filter numa nodes for cpu affinity pod + cpuAffinityNUMAs := machineState.GetMatchedNUMASetWithAnnotations(state.WrapAllocationMetaFilterWithAnnotations(commonstate.CheckNonBindingCPUAffinityNUMA), req.GetAnnotations()).ToSliceInt() + for _, nodeID := range cpuAffinityNUMAs { + allocatableCPUQuantity := machineState[nodeID].GetAvailableCPUQuantity(p.reservedCPUs) + qosLevel := req.GetAnnotations()[apiconsts.PodAnnotationQoSLevelKey] + // if qosLevel is shared_cores, check if the numa node has enough cpu quantity + // shared_cores with cpu affinity pod can only use one numa node + if qosLevel == apiconsts.PodAnnotationQoSLevelSharedCores && !cpuutil.CPUIsSufficient(request, allocatableCPUQuantity) { + continue + } filteredNUMANodes = append(filteredNUMANodes, nodeID) } } + // 4. check if non affinity numa nodes can be allocated to the pod + nonCPUAffinityNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter(commonstate.CheckNonCPUAffinityNUMA)).ToSliceInt() + for _, nodeID := range nonCPUAffinityNUMAs { + allocatableCPUQuantity := machineState[nodeID].GetAvailableCPUQuantity(p.reservedCPUs) + // take this non-binding NUMA for candidate shared_cores with numa_binding, + // won't cause non-binding shared_cores in short supply + if cpuutil.CPUIsSufficient(float64(nonCPUAffinitySharedRequestedQuantity), float64(nonCPUAffinityNUMAsCPUQuantity)-allocatableCPUQuantity) { + filteredNUMANodes = append(filteredNUMANodes, nodeID) + } else { + general.Infof("filter out NUMA: %d since taking it will cause non-affinity shared_cores in short supply;"+ + " nonAffinityNUMAsCPUQuantity: %d, targetNUMAAllocatableCPUQuantity: %f, nonAffinitySharedRequestedQuantity: %d, request: %f", + nodeID, nonCPUAffinityNUMAsCPUQuantity, allocatableCPUQuantity, nonCPUAffinitySharedRequestedQuantity, request) + } + } + return machine.NewCPUSet(filteredNUMANodes...) } -func (p *DynamicPolicy) calculateHintsForNUMABindingSharedCores(request float64, podEntries state.PodEntries, +func (p *DynamicPolicy) calculateHintsForNUMAAffinitySharedCores(request float64, podEntries state.PodEntries, machineState state.NUMANodeMap, req *pluginapi.ResourceRequest, ) (map[string]*pluginapi.ListOfTopologyHints, error) { - numaNodes := p.filterNUMANodesByNonBinding(request, podEntries, machineState, req).ToSliceInt() + numaNodes := p.filterNUMANodesByCPUAffinity(request, podEntries, machineState, req).ToSliceInt() hints := &pluginapi.ListOfTopologyHints{} @@ -594,10 +607,10 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingSharedCores(request float64, return nil, fmt.Errorf("GetNUMANodesCountToFitCPUReq failed with error: %v", err) } - // if a numa_binding shared_cores has request larger than 1 NUMA, - // its performance may degrade to be like non-binding shared_cores + // if a numa_affinity shared_cores has request larger than 1 NUMA, + // its performance may degrade to be like non-affinity shared_cores if minNUMAsCountNeeded > 1 { - return nil, fmt.Errorf("numa_binding shared_cores container has request larger than 1 NUMA") + return nil, fmt.Errorf("numa_affinity shared_cores container has request larger than 1 NUMA") } if p.enableSNBHighNumaPreference { @@ -610,8 +623,8 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingSharedCores(request float64, // populate hints by available numa nodes cpuutil.PopulateHintsByAvailableNUMANodes(numaNodes, hints, true) - // optimize hints by shared_cores numa_binding hint optimizer - err = p.sharedCoresNUMABindingHintOptimizer.OptimizeHints( + // optimize hints by shared_cores numa_affinity hint optimizer + err = p.sharedCoresNUMAAffinityHintOptimizer.OptimizeHints( hintoptimizer.Request{ ResourceRequest: req, CPURequest: request, @@ -631,9 +644,9 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingSharedCores(request float64, // populate hints by already existed numa binding result if p.dynamicConfig.GetDynamicConfiguration().PreferUseExistNUMAHintResult { - err = p.populateHintsByAlreadyExistedNUMABindingResult(req, hints) + err = p.populateHintsByAlreadyExistedNUMAAffinityResult(req, hints) if err != nil { - general.Warningf("populateHintsByAlreadyExistedNUMABindingResult failed with error: %v", err) + general.Warningf("populateHintsByAlreadyExistedNUMAAffinityResult failed with error: %v", err) return nil, err } } @@ -643,8 +656,8 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingSharedCores(request float64, }, nil } -func (p *DynamicPolicy) populateHintsByAlreadyExistedNUMABindingResult(req *pluginapi.ResourceRequest, hints *pluginapi.ListOfTopologyHints) error { - result, err := p.getSharedCoresNUMABindingResultFromAnnotation(req) +func (p *DynamicPolicy) populateHintsByAlreadyExistedNUMAAffinityResult(req *pluginapi.ResourceRequest, hints *pluginapi.ListOfTopologyHints) error { + result, err := p.getSharedCoresNUMAAffinityResultFromAnnotation(req) if err != nil { return err } @@ -668,10 +681,10 @@ func (p *DynamicPolicy) populateHintsByAlreadyExistedNUMABindingResult(req *plug } if index == -1 { - general.Warningf("failed to find already existed numa binding result %s from hints %v for pod: %s/%s, container: %s", + general.Warningf("failed to find already existed numa affinity result %s from hints %v for pod: %s/%s, container: %s", result, hints.Hints, req.PodNamespace, req.PodName, req.ContainerName) } else { - general.Infof("found already existed numa binding result %s from hints %v for pod: %s/%s, container: %s", + general.Infof("found already existed numa affinity result %s from hints %v for pod: %s/%s, container: %s", result, hints.Hints, req.PodNamespace, req.PodName, req.ContainerName) for i, hint := range hints.Hints { if i == index { @@ -685,7 +698,7 @@ func (p *DynamicPolicy) populateHintsByAlreadyExistedNUMABindingResult(req *plug return nil } -func (p *DynamicPolicy) getSharedCoresNUMABindingResultFromAnnotation(req *pluginapi.ResourceRequest) (machine.CPUSet, error) { +func (p *DynamicPolicy) getSharedCoresNUMAAffinityResultFromAnnotation(req *pluginapi.ResourceRequest) (machine.CPUSet, error) { result, ok := req.Annotations[p.sharedCoresNUMABindingResultAnnotationKey] if !ok { return machine.CPUSet{}, nil @@ -699,9 +712,9 @@ func (p *DynamicPolicy) getSharedCoresNUMABindingResultFromAnnotation(req *plugi return numaSet, nil } -func (p *DynamicPolicy) filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonBindingReclaimedRequestedQuantity, - nonBindingNUMAsCPUQuantity float64, - nonBindingNUMAs machine.CPUSet, +func (p *DynamicPolicy) filterNUMANodesByNonAffinityReclaimedRequestedQuantity(nonAffinityReclaimedRequestedQuantity, + nonAffinityNUMAsCPUQuantity float64, + nonAffinityNUMAs machine.CPUSet, machineState state.NUMANodeMap, numaHeadroomState map[int]float64, ) []int { @@ -712,23 +725,23 @@ func (p *DynamicPolicy) filterNUMANodesByNonBindingReclaimedRequestedQuantity(no } } - // Sort candidate NUMA nodes based on the other qos numa binding pods and their headroom + // Sort candidate NUMA nodes based on the other qos numa affinity pods and their headroom p.sortCandidateNUMANodesForReclaimed(candidateNUMANodes, machineState, numaHeadroomState) - nonBindingNUMAs = nonBindingNUMAs.Clone() + nonAffinityNUMAs = nonAffinityNUMAs.Clone() filteredNUMANodes := make([]int, 0, len(candidateNUMANodes)) for _, nodeID := range candidateNUMANodes { - if nonBindingNUMAs.Contains(nodeID) { + if nonAffinityNUMAs.Contains(nodeID) { allocatableCPUQuantity := numaHeadroomState[nodeID] - // take this non-binding NUMA for candidate reclaimed_cores with numa_binding, - // won't cause non-actual numa binding reclaimed_cores in short supply - if cpuutil.CPUIsSufficient(nonBindingReclaimedRequestedQuantity, nonBindingNUMAsCPUQuantity-allocatableCPUQuantity) || nonBindingNUMAs.Size() > 1 { + // take this non-affinity NUMA for candidate reclaimed_cores with numa_affinity, + // won't cause non-actual numa affinity reclaimed_cores in short supply + if cpuutil.CPUIsSufficient(nonAffinityReclaimedRequestedQuantity, nonAffinityNUMAsCPUQuantity-allocatableCPUQuantity) || nonAffinityNUMAs.Size() > 1 { filteredNUMANodes = append(filteredNUMANodes, nodeID) - nonBindingNUMAs = nonBindingNUMAs.Difference(machine.NewCPUSet(nodeID)) + nonAffinityNUMAs = nonAffinityNUMAs.Difference(machine.NewCPUSet(nodeID)) } else { general.Infof("filter out NUMA: %d since taking it will cause normal reclaimed_cores in short supply;"+ - " nonBindingNUMAsCPUQuantity: %.3f, targetNUMAAllocatableCPUQuantity: %.3f, nonBindingReclaimedRequestedQuantity: %.3f", - nodeID, nonBindingNUMAsCPUQuantity, allocatableCPUQuantity, nonBindingReclaimedRequestedQuantity) + " nonAffinityNUMAsCPUQuantity: %.3f, targetNUMAAllocatableCPUQuantity: %.3f, nonAffinityReclaimedRequestedQuantity: %.3f", + nodeID, nonAffinityNUMAsCPUQuantity, allocatableCPUQuantity, nonAffinityReclaimedRequestedQuantity) } } else { filteredNUMANodes = append(filteredNUMANodes, nodeID) @@ -744,14 +757,15 @@ func (p *DynamicPolicy) sortCandidateNUMANodesForReclaimed(numaNodes []int, numaHeadroomState map[int]float64, ) { // sort candidate NUMAs by the following rules: - // 1. NUMAs with numa binding shared or dedicated pods binding to it will be placed ahead of NUMAs without numa binding shared or dedicated pods binding to it. + // 1. NUMAs with numa affinity shared or dedicated pods affinity to it will be placed ahead of NUMAs without numa affinity shared or dedicated pods affinity to it. // 2. NUMAs with higher headroom will be placed ahead of NUMAs with lower headroom. - nonSharedOrDedicatedNUMABindingNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMABinding)) + // todo: cpu affinity pod not bind numa, do we need to filter cpu affinity numas + nonSharedOrDedicatedNUMAAffinityNUMAs := machineState.GetFilteredNUMASet(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedOrDedicatedNUMAAffinity)) sort.SliceStable(numaNodes, func(i, j int) bool { - hasNUMABindingPodI := !nonSharedOrDedicatedNUMABindingNUMAs.Contains(numaNodes[i]) - hasNUMABindingPodJ := !nonSharedOrDedicatedNUMABindingNUMAs.Contains(numaNodes[j]) - if hasNUMABindingPodI != hasNUMABindingPodJ { - return hasNUMABindingPodI && !hasNUMABindingPodJ + hasNUMAAffinityPodI := !nonSharedOrDedicatedNUMAAffinityNUMAs.Contains(numaNodes[i]) + hasNUMAAffinityPodJ := !nonSharedOrDedicatedNUMAAffinityNUMAs.Contains(numaNodes[j]) + if hasNUMAAffinityPodI != hasNUMAAffinityPodJ { + return hasNUMAAffinityPodI && !hasNUMAAffinityPodJ } else { return numaHeadroomState[numaNodes[i]] > numaHeadroomState[numaNodes[j]] } @@ -765,7 +779,7 @@ func (p *DynamicPolicy) calculateNUMANodesLeft(numaNodes []int, numaNodesCPULeft := make(map[int]float64, len(numaNodes)) for _, nodeID := range numaNodes { allocatedQuantity := state.GetRequestedQuantityFromPodEntries(machineState[nodeID].PodEntries, - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding), + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMAAffinity), p.getContainerRequestedCores) availableCPUQuantity := numaHeadroomState[nodeID] - allocatedQuantity numaNodesCPULeft[nodeID] = availableCPUQuantity - reqFloat diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers_test.go index 4cca697709..45986be4bb 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers_test.go @@ -615,12 +615,12 @@ func TestCalculateHintsForNUMABindingSharedCores1(t *testing.T) { CPUTopology: cpuTopology, }, sharedCoresNUMABindingResultAnnotationKey: "katalyst-test/nume-bind-result", - sharedCoresNUMABindingHintOptimizer: &hintoptimizer.DummyHintOptimizer{}, + sharedCoresNUMAAffinityHintOptimizer: &hintoptimizer.DummyHintOptimizer{}, dynamicConfig: dynamic.NewDynamicAgentConfiguration(), } p.dynamicConfig.GetDynamicConfiguration().PreferUseExistNUMAHintResult = tt.preferUseExistNUMAHintResult - result, err := p.calculateHintsForNUMABindingSharedCores(tt.request, podEntries, machineState, tt.req) + result, err := p.calculateHintsForNUMAAffinitySharedCores(tt.request, podEntries, machineState, tt.req) if tt.expectedError { assert.Error(t, err) @@ -719,7 +719,7 @@ func TestPopulateHintsByAlreadyExistedNUMABindingResult(t *testing.T) { sharedCoresNUMABindingResultAnnotationKey: "numa_binding", } - err := p.populateHintsByAlreadyExistedNUMABindingResult(tt.req, tt.hints) + err := p.populateHintsByAlreadyExistedNUMAAffinityResult(tt.req, tt.hints) if tt.expectedError { assert.Error(t, err) } else { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go index 8046535b40..78fae6387a 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go @@ -3585,10 +3585,10 @@ func TestGetTopologyHints(t *testing.T) { } } - dynamicPolicy.sharedCoresNUMABindingHintOptimizer, err = canonical.NewCanonicalHintOptimizer(dynamicPolicy.generateHintOptimizerFactoryOptions()) + dynamicPolicy.sharedCoresNUMAAffinityHintOptimizer, err = canonical.NewCanonicalHintOptimizer(dynamicPolicy.generateHintOptimizerFactoryOptions()) as.NoError(err) - dynamicPolicy.dedicatedCoresNUMABindingHintOptimizer = &hintoptimizer.DummyHintOptimizer{} + dynamicPolicy.dedicatedCoresNUMAAffinityHintOptimizer = &hintoptimizer.DummyHintOptimizer{} resp, err := dynamicPolicy.GetTopologyHints(context.Background(), tc.req) as.Nil(err) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/reactor/numa_allocation_reactor.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/reactor/numa_allocation_reactor.go new file mode 100644 index 0000000000..7cdf7122d0 --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/reactor/numa_allocation_reactor.go @@ -0,0 +1,106 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package reactor + +import ( + "context" + "fmt" + "strconv" + "strings" + + "github.com/samber/lo" + v1 "k8s.io/api/core/v1" + + apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/reactor" +) + +type numaPodAllocationWrapper struct { + *state.AllocationInfo +} + +// UpdateAllocation updates numa allocation result for pod +func (p numaPodAllocationWrapper) UpdateAllocation(pod *v1.Pod) error { + annotations := pod.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + + numaAllocationResult, err := p.getNUMAAllocationResult() + if err != nil { + return fmt.Errorf("failed to get numa allocation result: %v", err) + } + + annotations[apiconsts.PodAnnotationNUMABindResultKey] = numaAllocationResult + pod.SetAnnotations(annotations) + + return nil +} + +func (p numaPodAllocationWrapper) getNUMAAllocationResult() (string, error) { + numaList := p.AllocationInfo.GetAllocationResultNUMASet().ToSliceInt() + if len(numaList) == 0 { + return "", fmt.Errorf("numa id is empty") + } + intSlice := make([]string, len(numaList)) + for i, numaID := range numaList { + intSlice[i] = strconv.Itoa(numaID) + } + return strings.Join(intSlice, ","), nil +} + +// NeedUpdateAllocation checks if numa allocation result needs to be updated +func (p numaPodAllocationWrapper) NeedUpdateAllocation(pod *v1.Pod) bool { + if p.CheckSideCar() { + return false + } + + if _, ok := pod.Annotations[apiconsts.PodAnnotationNUMABindResultKey]; !ok { + return true + } + + return false +} + +type numaPodAllocationReactor struct { + reactor.AllocationReactor +} + +// NewNUMAPodAllocationReactor creates a new numa pod allocation reactor +func NewNUMAPodAllocationReactor(r reactor.AllocationReactor) reactor.AllocationReactor { + return &numaPodAllocationReactor{ + AllocationReactor: r, + } +} + +// UpdateAllocation updates numa allocation result for pod +func (r *numaPodAllocationReactor) UpdateAllocation(ctx context.Context, allocation commonstate.Allocation) error { + if lo.IsNil(allocation) { + return fmt.Errorf("allocation is nil") + } + + allocationInfo, ok := allocation.(*state.AllocationInfo) + if !ok { + return fmt.Errorf("allocation info is not of type memory.AllocationInfo") + } + + return r.AllocationReactor.UpdateAllocation(ctx, numaPodAllocationWrapper{ + AllocationInfo: allocationInfo, + }) +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/reactor/numa_allocation_reactor_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/reactor/numa_allocation_reactor_test.go new file mode 100644 index 0000000000..66b479576a --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/reactor/numa_allocation_reactor_test.go @@ -0,0 +1,251 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package reactor + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/util/machine" +) + +func TestNumaPodAllocationWrapper_getNUMAAllocationResult(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + allocationInfo *state.AllocationInfo + expectedResult string + expectedError bool + }{ + { + name: "single numa node allocation", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), // NUMA node 0 with CPUs 0, 1 + }, + }, + expectedResult: "0", + expectedError: false, + }, + { + name: "multiple numa nodes allocation", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), // NUMA node 0 with CPUs 0, 1 + 1: machine.NewCPUSet(2, 3), // NUMA node 1 with CPUs 2, 3 + }, + }, + expectedResult: "0,1", // Should be sorted + expectedError: false, + }, + { + name: "empty topology assignments", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{}, + }, + expectedResult: "", + expectedError: true, + }, + { + name: "nil topology assignments", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: nil, + }, + expectedResult: "", + expectedError: true, + }, + { + name: "negative numa node id", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + -1: machine.NewCPUSet(0, 1), // Negative NUMA ID should be ignored + }, + }, + expectedResult: "", + expectedError: true, + }, + { + name: "mixed valid and invalid numa nodes", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), // Valid NUMA node 0 + -1: machine.NewCPUSet(2, 3), // Invalid NUMA node -1 (should be ignored) + 1: machine.NewCPUSet(4, 5), // Valid NUMA node 1 + }, + }, + expectedResult: "0,1", // Should only include valid NUMA nodes + expectedError: false, + }, + { + name: "topology with empty cpusets", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), // Empty CPUSet should be ignored + 1: machine.NewCPUSet(2, 3), // Valid NUMA node 1 + }, + }, + expectedResult: "1", // Should only include NUMA nodes with non-empty CPUSets + expectedError: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + wrapper := numaPodAllocationWrapper{ + AllocationInfo: tt.allocationInfo, + } + + // Call the private method using reflection + result, err := wrapper.getNUMAAllocationResult() + + if tt.expectedError { + assert.Error(t, err, "expected error for %s", tt.name) + } else { + assert.NoError(t, err, "expected no error for %s", tt.name) + } + assert.Equal(t, tt.expectedResult, result, "expected result mismatch for %s", tt.name) + }) + } +} + +func TestNumaPodAllocationWrapper_UpdateAllocation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + allocationInfo *state.AllocationInfo + initialPod *v1.Pod + expectedAnnotation string + expectedError bool + }{ + { + name: "update pod with single numa allocation", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), + }, + }, + initialPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + Annotations: map[string]string{ + "existing-key": "existing-value", + }, + }, + }, + expectedAnnotation: "0", + expectedError: false, + }, + { + name: "update pod with multiple numa allocation", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), + 1: machine.NewCPUSet(2, 3), + }, + }, + initialPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + Annotations: map[string]string{}, + }, + }, + expectedAnnotation: "0,1", + expectedError: false, + }, + { + name: "update pod with empty topology (should fail)", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{}, + }, + initialPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + Annotations: map[string]string{}, + }, + }, + expectedAnnotation: "", + expectedError: true, + }, + { + name: "update pod with nil annotations", + allocationInfo: &state.AllocationInfo{ + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), + }, + }, + initialPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + Annotations: nil, // Nil annotations should be initialized + }, + }, + expectedAnnotation: "0", + expectedError: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + wrapper := numaPodAllocationWrapper{ + AllocationInfo: tt.allocationInfo, + } + + // Create a copy of the initial pod to avoid test interference + podCopy := tt.initialPod.DeepCopy() + + err := wrapper.UpdateAllocation(podCopy) + + if tt.expectedError { + assert.Error(t, err, "expected error for %s", tt.name) + } else { + assert.NoError(t, err, "expected no error for %s", tt.name) + + // Verify the annotation was set correctly + assert.Equal(t, tt.expectedAnnotation, + podCopy.Annotations[apiconsts.PodAnnotationNUMABindResultKey], + "expected annotation mismatch for %s", tt.name) + + // Verify existing annotations are preserved + if tt.initialPod.Annotations != nil { + for key, value := range tt.initialPod.Annotations { + if key != apiconsts.PodAnnotationNUMABindResultKey { + assert.Equal(t, value, podCopy.Annotations[key], + "expected existing annotation %s to be preserved", key) + } + } + } + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go index 73b7e8dc86..5061591e55 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go @@ -368,7 +368,7 @@ func (ns *NUMANodeState) GetAvailableCPUQuantity(reservedCPUs machine.CPUSet) fl // if there is pod aggregated resource key in main container annotations, use pod aggregated resource instead. mainContainerEntry := containerEntries.GetMainContainerEntry() - if mainContainerEntry == nil || !mainContainerEntry.CheckSharedNUMABinding() { + if mainContainerEntry == nil || !mainContainerEntry.CheckSharedNUMAAffinity() { continue } @@ -380,7 +380,7 @@ func (ns *NUMANodeState) GetAvailableCPUQuantity(reservedCPUs machine.CPUSet) fl // calc pod aggregated resource request by container entries. for _, allocationInfo := range containerEntries { - if allocationInfo == nil || !allocationInfo.CheckSharedNUMABinding() { + if allocationInfo == nil || !allocationInfo.CheckSharedNUMAAffinity() { continue } @@ -588,6 +588,33 @@ func (nm NUMANodeMap) GetFilteredNUMASetWithAnnotations( return res } +// GetMatchedNUMASet return numa set matched by the predicate. +func (nm NUMANodeMap) GetMatchedNUMASet( + includeNUMAPredicate func(ai *AllocationInfo) bool, +) machine.CPUSet { + res := machine.NewCPUSet() + for numaID, numaNodeState := range nm { + if numaNodeState.ExistMatchedAllocationInfo(includeNUMAPredicate) { + res.Add(numaID) + } + } + return res +} + +// GetMatchedNUMASetWithAnnotations return numa set matched by the predicate accepting AllocationInfo in the target NUMA and input annotations of candidate. +func (nm NUMANodeMap) GetMatchedNUMASetWithAnnotations( + includeNUMAPredicate func(ai *AllocationInfo, annotations map[string]string) bool, + annotations map[string]string, +) machine.CPUSet { + res := machine.NewCPUSet() + for numaID, numaNodeState := range nm { + if numaNodeState.ExistMatchedAllocationInfoWithAnnotations(includeNUMAPredicate, annotations) { + res.Add(numaID) + } + } + return res +} + func (nm NUMANodeMap) Clone() NUMANodeMap { if nm == nil { return nil @@ -627,7 +654,7 @@ type reader interface { type writer interface { SetMachineState(numaNodeMap NUMANodeMap, persist bool) SetNUMAHeadroom(numaHeadroom map[int]float64, persist bool) - SetPodEntries(podEntries PodEntries, writeThrough bool) + SetPodEntries(podEntries PodEntries, persist bool) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo, persist bool) SetAllowSharedCoresOverlapReclaimedCores(allowSharedCoresOverlapReclaimedCores, persist bool) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go index ba93926ef5..803ce8bbbb 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go @@ -2838,7 +2838,7 @@ func TestAllocationInfo_GetSpecifiedNUMABindingPoolName(t *testing.T) { InitTimestamp: tt.fields.InitTimestamp, RequestQuantity: tt.fields.RequestQuantity, } - got, err := ai.GetSpecifiedNUMABindingPoolName() + got, err := ai.GetSpecifiedNUMAPoolName() if (err != nil) != tt.wantErr { t.Errorf("AllocationInfo.GetSpecifiedNUMABindingPoolName() error = %v, wantErr %v", err, tt.wantErr) return diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go index 745f07c80a..8ccd0a32e5 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go @@ -108,7 +108,7 @@ func GetIsolatedQuantityMapFromPodEntries(podEntries PodEntries, ignoreAllocatio containerLoop: for containerName, allocationInfo := range entries { // only filter dedicated_cores without numa_binding - if allocationInfo == nil || allocationInfo.CheckDedicatedNUMABinding() || !allocationInfo.CheckDedicated() { + if allocationInfo == nil || allocationInfo.CheckDedicatedNUMAAffinity() || !allocationInfo.CheckDedicated() { continue } @@ -180,8 +180,8 @@ func GetSharedQuantityMapFromPodEntries(podEntries PodEntries, ignoreAllocationI return poolsQuantityMap, nil } -// GetNonBindingSharedRequestedQuantityFromPodEntries returns total quantity shared_cores without numa_binding requested -func GetNonBindingSharedRequestedQuantityFromPodEntries(podEntries PodEntries, newNonBindingSharedRequestedQuantity map[string]float64, getContainerRequestedCores GetContainerRequestedCoresFunc) int { +// GetNonCPUAffinitySharedRequestedQuantityFromPodEntries returns total quantity shared_cores without numa_binding and cpu_affinity requested +func GetNonCPUAffinitySharedRequestedQuantityFromPodEntries(podEntries PodEntries, newNonBindingSharedRequestedQuantity map[string]float64, getContainerRequestedCores GetContainerRequestedCoresFunc) int { var reqFloat64 float64 = 0 for podUid, entries := range podEntries { @@ -197,7 +197,7 @@ func GetNonBindingSharedRequestedQuantityFromPodEntries(podEntries PodEntries, n } for _, allocationInfo := range entries { - if allocationInfo == nil || !allocationInfo.CheckShared() || allocationInfo.CheckNUMABinding() { + if allocationInfo == nil || !allocationInfo.CheckShared() || allocationInfo.CheckNUMABinding() || allocationInfo.CheckNUMAAffinity() { continue } @@ -318,7 +318,7 @@ func updateMachineStatePreOccPodEntries(currentMachineState, originMachineState } func GetCPUIncrRatio(allocationInfo *AllocationInfo) float64 { - if allocationInfo.CheckSharedNUMABinding() { + if allocationInfo.CheckSharedNUMAAffinity() { // multiply incrRatio for numa_binding shared_cores to allow it burst return cpuconsts.CPUIncrRatioSharedCoresNUMABinding } @@ -326,7 +326,7 @@ func GetCPUIncrRatio(allocationInfo *AllocationInfo) float64 { return cpuconsts.CPUIncrRatioDefault } -func GetSharedBindingNUMAsFromQuantityMap(poolsQuantityMap map[string]map[int]int) sets.Int { +func GetSharedAffinityNUMAsFromQuantityMap(poolsQuantityMap map[string]map[int]int) sets.Int { res := sets.NewInt() for _, quantityMap := range poolsQuantityMap { @@ -360,13 +360,13 @@ func CountAllocationInfosToPoolsQuantityMap(allocationInfos []*AllocationInfo, var targetNUMAID int var poolName string - if allocationInfo.CheckSharedNUMABinding() { + if allocationInfo.CheckSharedNUMAAffinity() { var numaSet machine.CPUSet poolName = allocationInfo.GetOwnerPoolName() if poolName == commonstate.EmptyOwnerPoolName { var pErr error - poolName, pErr = allocationInfo.GetSpecifiedNUMABindingPoolName() + poolName, pErr = allocationInfo.GetSpecifiedNUMAPoolName() if pErr != nil { return fmt.Errorf("GetSpecifiedNUMABindingPoolName for %s/%s/%s failed with error: %v", allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, pErr) @@ -446,13 +446,13 @@ func CountAllocationInfosToPoolsQuantityMap(allocationInfos []*AllocationInfo, return nil } -func GetSharedNUMABindingTargetNuma(allocationInfo *AllocationInfo) (int, error) { +func GetSharedNUMAAffinityTargetNuma(allocationInfo *AllocationInfo) (int, error) { var numaSet machine.CPUSet poolName := allocationInfo.GetOwnerPoolName() if poolName == commonstate.EmptyOwnerPoolName { var pErr error - poolName, pErr = allocationInfo.GetSpecifiedNUMABindingPoolName() + poolName, pErr = allocationInfo.GetSpecifiedNUMAPoolName() if pErr != nil { return commonstate.FakedNUMAID, fmt.Errorf("GetSpecifiedNUMABindingPoolName for %s/%s/%s failed with error: %v", allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, pErr) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/validator/validator_cpu_advisor.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/validator/validator_cpu_advisor.go index 30915e0f24..d1b028d369 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/validator/validator_cpu_advisor.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/validator/validator_cpu_advisor.go @@ -53,9 +53,9 @@ func (c *CPUAdvisorValidator) ValidateRequest(req *advisorapi.GetAdviceRequest) entries := c.state.GetPodEntries() // validate shared_cores with numa_binding entries - sharedNUMABindingAllocationInfos := entries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMABinding)) + sharedNUMAAffinityAllocationInfos := entries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMAAffinity)) - for podUID, containerEntries := range sharedNUMABindingAllocationInfos { + for podUID, containerEntries := range sharedNUMAAffinityAllocationInfos { for containerName, containerInfo := range containerEntries { if req.Entries[podUID] == nil || req.Entries[podUID].Entries[containerName] == nil { return fmt.Errorf("missing request entry for shared_cores with numa_binding pod: %s container: %s", podUID, containerName) @@ -64,7 +64,8 @@ func (c *CPUAdvisorValidator) ValidateRequest(req *advisorapi.GetAdviceRequest) // This container may have been changed from shared_cores without numa_binding to shared_cores with numa_binding. // Verify if we have included this information in the request. // If we have, sys-advisor must have observed it. - if requestInfo.Metadata.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] != consts.PodAnnotationMemoryEnhancementNumaBindingEnable { + if requestInfo.Metadata.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] != consts.PodAnnotationMemoryEnhancementNumaBindingEnable || + requestInfo.Metadata.Annotations[consts.PodAnnotationCPUEnhancementNumaAffinity] != consts.PodAnnotationCPUEnhancementNumaAffinityEnable { return fmt.Errorf( "shared_cores with numa_binding pod: %s container: %s has invalid owner pool name: %s in request, expected %s", podUID, containerName, requestInfo.AllocationInfo.OwnerPoolName, containerInfo.OwnerPoolName) @@ -78,7 +79,8 @@ func (c *CPUAdvisorValidator) ValidateRequest(req *advisorapi.GetAdviceRequest) } for containerName, requestInfo := range containerEntries.Entries { if requestInfo.Metadata.QosLevel == consts.PodAnnotationQoSLevelSharedCores && - requestInfo.Metadata.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable { + (requestInfo.Metadata.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable || + requestInfo.Metadata.Annotations[consts.PodAnnotationCPUEnhancementNumaAffinity] == consts.PodAnnotationCPUEnhancementNumaAffinityEnable) { if entries[podUID][containerName] == nil { return fmt.Errorf("missing state entry for shared_cores with numa_binding pod: %s container: %s", podUID, containerName) } @@ -124,7 +126,7 @@ func (c *CPUAdvisorValidator) validateEntries(resp *advisorapi.ListAndWatchRespo return fmt.Errorf("missing CalculationInfo for pod: %s container: %s", podUID, containerName) } - if !allocationInfo.CheckDedicatedNUMABinding() { + if !allocationInfo.CheckDedicatedNUMAAffinity() { numaCalculationQuantities, err := calculationInfo.GetNUMAQuantities() if err != nil { return fmt.Errorf("GetNUMAQuantities failed with error: %v, pod: %s container: %s", @@ -169,9 +171,9 @@ func (c *CPUAdvisorValidator) validateEntries(resp *advisorapi.ListAndWatchRespo } // validate shared_cores with numa_binding entries - sharedNUMABindingAllocationInfos := entries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMABinding)) + sharedNUMAAffinityAllocationInfos := entries.GetFilteredPodEntries(state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckSharedNUMAAffinity)) - for podUID, containerEntries := range sharedNUMABindingAllocationInfos { + for podUID, containerEntries := range sharedNUMAAffinityAllocationInfos { for containerName := range containerEntries { calculationInfo, ok := resp.GetCalculationInfo(podUID, containerName) diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go index 6db20be45e..6df2168ef2 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go @@ -1250,7 +1250,7 @@ func (p *DynamicPolicy) hasLastLevelEnhancementKey(lastLevelEnhancementKey strin return false } -func (p *DynamicPolicy) checkNonBindingShareCoresMemoryResource(req *pluginapi.ResourceRequest) (bool, error) { +func (p *DynamicPolicy) checkNonBindingMemoryResource(req *pluginapi.ResourceRequest) (bool, error) { reqInt, _, err := util.GetPodAggregatedRequestResource(req) if err != nil { return false, fmt.Errorf("GetQuantityFromResourceReq failed with error: %v", err) @@ -1279,14 +1279,14 @@ func (p *DynamicPolicy) checkNonBindingShareCoresMemoryResource(req *pluginapi.R numaAllocatableWithoutNUMABindingPods += resourceState[numaID].Allocatable } - general.Infof("[checkNonBindingShareCoresMemoryResource] node memory allocated: %d, allocatable: %d", shareCoresAllocated, numaAllocatableWithoutNUMABindingPods) + general.Infof("[checkNonBindingMemoryResource] node memory allocated: %d, allocatable: %d", shareCoresAllocated, numaAllocatableWithoutNUMABindingPods) if shareCoresAllocated > numaAllocatableWithoutNUMABindingPods { - general.Warningf("[checkNonBindingShareCoresMemoryResource] no enough memory resource for non-binding share cores pod: %s/%s, container: %s (allocated: %d, allocatable: %d)", + general.Warningf("[checkNonBindingMemoryResource] no enough memory resource for non-binding pod: %s/%s, container: %s (allocated: %d, allocatable: %d)", req.PodNamespace, req.PodName, req.ContainerName, shareCoresAllocated, numaAllocatableWithoutNUMABindingPods) return false, nil } - general.InfoS("checkNonBindingShareCoresMemoryResource memory successfully", + general.InfoS("checkNonBindingMemoryResource memory successfully", "podNamespace", req.PodNamespace, "podName", req.PodName, "containerName", req.ContainerName, diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go index 70a76e5876..c35c81563e 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go @@ -215,7 +215,7 @@ func (p *DynamicPolicy) numaBindingAllocationHandler(ctx context.Context, return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+ "not equal to 1", len(req.Hint.Nodes)) } - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMAID(req.Hint.Nodes[0]) } p.state.SetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName, allocationInfo, persistCheckpoint) @@ -321,7 +321,7 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualBindingNUMAs.Contains(int(req.Hint.Nodes[0])) || !nonReclaimActualBindingNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) { - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMAID(req.Hint.Nodes[0]) numaAllocationResult = machine.NewCPUSet(int(req.Hint.Nodes[0])) } else { numaAllocationResult = nonReclaimActualBindingNUMAs @@ -375,11 +375,10 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx return resp, nil } -func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingAllocationHandler(_ context.Context, - _ *pluginapi.ResourceRequest, persistCheckpoint bool, +func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingAllocationHandler(ctx context.Context, + req *pluginapi.ResourceRequest, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { - // todo: support dedicated_cores without NUMA binding - return nil, fmt.Errorf("not support dedicated_cores without NUMA binding") + return p.allocateNUMAsWithoutNUMABindingPods(ctx, req, apiconsts.PodAnnotationQoSLevelDedicatedCores, persistCheckpoint) } // numaBindingAllocationSidecarHandler allocates for sidecar @@ -823,7 +822,7 @@ func (p *DynamicPolicy) adjustAllocationEntriesForDedicatedCores(numaSetChangedC if !allocationInfo.CheckNUMABinding() { // not to adjust NUMA binding containers - // update container to target numa set for non-binding share cores + // update container to target numa set for non-binding dedicated cores p.updateNUMASetChangedContainers(numaSetChangedContainers, allocationInfo, numaWithoutNUMABindingPods) } } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go index ca42255029..b1fa779730 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go @@ -52,21 +52,7 @@ func (p *DynamicPolicy) sharedCoresHintHandler(ctx context.Context, // TODO: support sidecar follow main container for non-binding share cores in future if req.ContainerType == pluginapi.ContainerType_MAIN { if p.enableNonBindingShareCoresMemoryResourceCheck { - ok, err := p.checkNonBindingShareCoresMemoryResource(req) - if err != nil { - general.Errorf("failed to check share cores resource: %q", err) - return nil, fmt.Errorf("failed to check share cores resource: %q", err) - } - - if !ok { - _ = p.emitter.StoreInt64(util.MetricNameShareCoresNoEnoughResourceFailed, 1, metrics.MetricTypeNameCount, metrics.ConvertMapToTags(map[string]string{ - "resource": v1.ResourceMemory.String(), - "podNamespace": req.PodNamespace, - "podName": req.PodName, - "containerName": req.ContainerName, - })...) - return nil, errNoAvailableMemoryHints - } + return p.nonNUMABindingHintHandler(ctx, req) } } @@ -128,10 +114,36 @@ func (p *DynamicPolicy) dedicatedCoresHintHandler(ctx context.Context, case apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable: return p.numaBindingHintHandler(ctx, req) default: - return p.dedicatedCoresWithoutNUMABindingHintHandler(ctx, req) + return p.nonNUMABindingHintHandler(ctx, req) } } +func (p *DynamicPolicy) nonNUMABindingHintHandler(_ context.Context, req *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error) { + if req.ContainerType == pluginapi.ContainerType_MAIN { + ok, err := p.checkNonBindingMemoryResource(req) + if err != nil { + general.Errorf("failed to check share cores resource: %q", err) + return nil, fmt.Errorf("failed to check share cores resource: %q", err) + } + + if !ok { + _ = p.emitter.StoreInt64(util.MetricNameNoEnoughNUMAResourceFailed, 1, metrics.MetricTypeNameCount, metrics.ConvertMapToTags(map[string]string{ + "resource": v1.ResourceMemory.String(), + "podNamespace": req.PodNamespace, + "podName": req.PodName, + "containerName": req.ContainerName, + "qosClass": req.Annotations[apiconsts.PodAnnotationQoSLevelKey], + })...) + return nil, errNoAvailableMemoryHints + } + } + + return util.PackResourceHintsResponse(req, string(v1.ResourceMemory), + map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): nil, // indicates that there is no numa preference + }) +} + func (p *DynamicPolicy) numaBindingHintHandler(_ context.Context, req *pluginapi.ResourceRequest, ) (*pluginapi.ResourceHintsResponse, error) { @@ -328,11 +340,10 @@ func (p *DynamicPolicy) clearContainerAndRegenerateMachineState(req *pluginapi.R return resourcesMachineState, nil } -func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingHintHandler(_ context.Context, - _ *pluginapi.ResourceRequest, +func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingHintHandler(ctx context.Context, + req *pluginapi.ResourceRequest, ) (*pluginapi.ResourceHintsResponse, error) { - // todo: support dedicated_cores without NUMA binding - return nil, fmt.Errorf("not support dedicated_cores without NUMA binding") + return p.nonNUMABindingHintHandler(ctx, req) } // calculateHints is a helper function to calculate the topology hints @@ -531,7 +542,7 @@ func (p *DynamicPolicy) calculateNUMANodesLeft(numaNodes []int, numaNodesCPULeft := make(map[int]int64, len(numaNodes)) for _, nodeID := range numaNodes { allocatedQuantity := state.GetRequestedQuantityFromPodEntries(machineState[nodeID].PodEntries, - state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding)) + state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMAAffinity)) availableCPUQuantity := numaHeadroomState[nodeID] - allocatedQuantity numaNodesCPULeft[nodeID] = availableCPUQuantity - req } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go index b43b79b8cc..cb19847847 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go @@ -67,7 +67,7 @@ func (p numaPodAllocationWrapper) getNUMABindResult() (string, error) { } return strings.Join(intSlice, ","), nil } else { - numaID, err := p.AllocationInfo.GetSpecifiedNUMABindingNUMAID() + numaID, err := p.AllocationInfo.GetSpecifiedNUMAID() if err != nil { return "", err } diff --git a/pkg/agent/qrm-plugins/util/consts.go b/pkg/agent/qrm-plugins/util/consts.go index a2ddf583fc..55ac72bec1 100644 --- a/pkg/agent/qrm-plugins/util/consts.go +++ b/pkg/agent/qrm-plugins/util/consts.go @@ -59,7 +59,7 @@ const ( MetricNameMemoryNumaBalanceResult = "memory_numa_balance_result" // metrics for some cases - MetricNameShareCoresNoEnoughResourceFailed = "share_cores_no_enough_resource" + MetricNameNoEnoughNUMAResourceFailed = "no_enough_numa_resource" // metrics for numa allocation MetricNameMetricBasedNUMAAllocationEnabled = "metric_based_numa_allocation_enabled" diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go index 1f788b0f30..717ab65b14 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go @@ -94,11 +94,11 @@ type cpuResourceAdvisor struct { advisorUpdated bool - regionMap map[string]region.QoSRegion // map[regionName]region - reservedForReclaim map[int]int // map[numaID]reservedForReclaim - numaAvailable map[int]int // map[numaID]availableResource - numRegionsPerNuma map[int]int // map[numaID]regionQuantity - nonBindingNumas machine.CPUSet // numas without numa binding pods + regionMap map[string]region.QoSRegion // map[regionName]region + reservedForReclaim map[int]int // map[numaID]reservedForReclaim + numaAvailable map[int]int // map[numaID]availableResource + numRegionsPerNuma map[int]int // map[numaID]regionQuantity + nonCPUAffinityNUMAs machine.CPUSet // numas without numa binding or cpu affinity pod allowSharedCoresOverlapReclaimedCores bool @@ -125,11 +125,11 @@ func NewCPUResourceAdvisor(conf *config.Configuration, extraConf interface{}, me advisorUpdated: false, - regionMap: make(map[string]region.QoSRegion), - reservedForReclaim: make(map[int]int), - numaAvailable: make(map[int]int), - numRegionsPerNuma: make(map[int]int), - nonBindingNumas: machine.NewCPUSet(), + regionMap: make(map[string]region.QoSRegion), + reservedForReclaim: make(map[int]int), + numaAvailable: make(map[int]int), + numRegionsPerNuma: make(map[int]int), + nonCPUAffinityNUMAs: machine.NewCPUSet(), isolator: isolation.NewLoadIsolator(conf, extraConf, emitter, metaCache, metaServer), @@ -309,7 +309,7 @@ func (cra *cpuResourceAdvisor) setIsolatedContainers(enableIsolated bool) bool { func (cra *cpuResourceAdvisor) checkIsolationSafety() bool { shareAndIsolationPoolSize := 0 dedicatedNonExclusivePoolSize := 0 - nonBindingNumas := cra.metaServer.CPUDetails.NUMANodes() + nonCPUAffinityNUMAs := cra.metaServer.CPUDetails.NUMANodes() for _, r := range cra.regionMap { if r.Type() == configapi.QoSRegionTypeShare { controlKnob, err := r.GetProvision() @@ -328,10 +328,10 @@ func (cra *cpuResourceAdvisor) checkIsolationSafety() bool { }) } else if r.Type() == configapi.QoSRegionTypeDedicated { if r.IsNumaExclusive() { - nonBindingNumas = nonBindingNumas.Difference(r.GetBindingNumas()) - } else if r.IsNumaBinding() { - // dedicated numa-binding non-exclusive region, calculate the pool size based on binding numas - dedicatedNonExclusivePoolSize += int(math.Ceil(r.GetPodsRequest() / float64(r.GetBindingNumas().Size()))) + nonCPUAffinityNUMAs = nonCPUAffinityNUMAs.Difference(r.GetCPUAffinityNUMAs()) + } else if r.IsNUMAAffinity() { + // dedicated numa-binding non-exclusive region, calculate the pool size based on cpu affinity numas + dedicatedNonExclusivePoolSize += int(math.Ceil(r.GetPodsRequest() / float64(r.GetCPUAffinityNUMAs().Size()))) } else { // dedicated non-numa-binding non-exclusive region, calculate the pool size based on pods request dedicatedNonExclusivePoolSize += int(math.Ceil(r.GetPodsRequest())) @@ -339,7 +339,7 @@ func (cra *cpuResourceAdvisor) checkIsolationSafety() bool { } } - nonExclusiveSize := cra.metaServer.NUMAToCPUs.CPUSizeInNUMAs(cra.nonBindingNumas.ToSliceNoSortInt()...) + nonExclusiveSize := cra.metaServer.NUMAToCPUs.CPUSizeInNUMAs(cra.nonCPUAffinityNUMAs.ToSliceNoSortInt()...) klog.Infof("[qosaware-cpu] shareAndIsolationPoolSize %v, nonExclusiveSize %v,dedicatedNonExclusivePoolSize %v", shareAndIsolationPoolSize, nonExclusiveSize, dedicatedNonExclusivePoolSize) if shareAndIsolationPoolSize+dedicatedNonExclusivePoolSize > nonExclusiveSize { @@ -423,7 +423,7 @@ func (cra *cpuResourceAdvisor) assignToRegions(ci *types.ContainerInfo) ([]regio func (cra *cpuResourceAdvisor) assignShareContainerToRegions(ci *types.ContainerInfo) ([]region.QoSRegion, error) { numaID := commonstate.FakedNUMAID - if cra.conf.GenericSysAdvisorConfiguration.EnableShareCoresNumaBinding && ci.IsNumaBinding() { + if cra.conf.GenericSysAdvisorConfiguration.EnableShareCoresNumaBinding && ci.IsNUMAAffinity() { if ci.OwnerPoolName == "" { return nil, fmt.Errorf("empty owner pool name, %v/%v", ci.PodUID, ci.ContainerName) } @@ -538,21 +538,21 @@ func (cra *cpuResourceAdvisor) gcRegionMap() { } // updateAdvisorEssentials updates following essentials after assigning containers to regions: -// 1. non-binding numas, i.e. numas without numa binding containers -// 2. binding numas of non numa binding regions +// 1. non-cpu-affinity numas, i.e. numas without cpu affinity containers +// 2. cpu-affinity numas of non cpu-affinity regions // 3. region quantity of each numa func (cra *cpuResourceAdvisor) updateAdvisorEssentials() { - cra.nonBindingNumas = cra.metaServer.CPUDetails.NUMANodes() + cra.nonCPUAffinityNUMAs = cra.metaServer.CPUDetails.NUMANodes() cra.allowSharedCoresOverlapReclaimedCores = cra.conf.GetDynamicConfiguration().AllowSharedCoresOverlapReclaimedCores - // update non-binding numas + // update non-cpu-affinity numas for _, r := range cra.regionMap { - if !r.IsNumaBinding() { + if !r.IsNUMAAffinity() { continue } // ignore isolation region if r.Type() == configapi.QoSRegionTypeDedicated || r.Type() == configapi.QoSRegionTypeShare { - cra.nonBindingNumas = cra.nonBindingNumas.Difference(r.GetBindingNumas()) + cra.nonCPUAffinityNUMAs = cra.nonCPUAffinityNUMAs.Difference(r.GetCPUAffinityNUMAs()) } } @@ -562,13 +562,13 @@ func (cra *cpuResourceAdvisor) updateAdvisorEssentials() { } for _, r := range cra.regionMap { - // set binding numas for non numa binding regions - if !r.IsNumaBinding() && r.Type() == configapi.QoSRegionTypeShare { - r.SetBindingNumas(cra.nonBindingNumas) + // set cpu affinity numas for non numa binding regions + if !r.IsNUMAAffinity() && r.Type() == configapi.QoSRegionTypeShare { + r.SetCPUAffinityNUMAs(cra.nonCPUAffinityNUMAs) } // accumulate region quantity for each numa - for _, numaID := range r.GetBindingNumas().ToSliceInt() { + for _, numaID := range r.GetCPUAffinityNUMAs().ToSliceInt() { cra.numRegionsPerNuma[numaID] += 1 } } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go index e726037b52..b5721a9395 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go @@ -137,7 +137,7 @@ func (cra *cpuResourceAdvisor) initializeProvisionAssembler() error { if !ok { return fmt.Errorf("unsupported provision assembler %v", assemblerName) } - cra.provisionAssembler = initializer(cra.conf, cra.extraConf, &cra.regionMap, &cra.reservedForReclaim, &cra.numaAvailable, &cra.nonBindingNumas, &cra.allowSharedCoresOverlapReclaimedCores, cra.metaCache, cra.metaServer, cra.emitter) + cra.provisionAssembler = initializer(cra.conf, cra.extraConf, &cra.regionMap, &cra.reservedForReclaim, &cra.numaAvailable, &cra.nonCPUAffinityNUMAs, &cra.allowSharedCoresOverlapReclaimedCores, cra.metaCache, cra.metaServer, cra.emitter) return nil } @@ -150,7 +150,7 @@ func (cra *cpuResourceAdvisor) initializeHeadroomAssembler() error { if !ok { return fmt.Errorf("unsupported headroom assembler %v", assemblerName) } - cra.headroomAssembler = initializer(cra.conf, cra.extraConf, &cra.regionMap, &cra.reservedForReclaim, &cra.numaAvailable, &cra.nonBindingNumas, cra.metaCache, cra.metaServer, cra.emitter) + cra.headroomAssembler = initializer(cra.conf, cra.extraConf, &cra.regionMap, &cra.reservedForReclaim, &cra.numaAvailable, &cra.nonCPUAffinityNUMAs, cra.metaCache, cra.metaServer, cra.emitter) return nil } @@ -243,7 +243,7 @@ func (cra *cpuResourceAdvisor) getRegionMaxRequirement(r region.QoSRegion) float res = general.MaxFloat64(1, res) case configapi.QoSRegionTypeDedicated: if r.IsNumaExclusive() { - for _, numaID := range r.GetBindingNumas().ToSliceInt() { + for _, numaID := range r.GetCPUAffinityNUMAs().ToSliceInt() { res += float64(cra.numaAvailable[numaID] - cra.reservedForReclaim[numaID]) } } else { @@ -256,7 +256,7 @@ func (cra *cpuResourceAdvisor) getRegionMaxRequirement(r region.QoSRegion) float res = general.MaxFloat64(1, res) } default: - for _, numaID := range r.GetBindingNumas().ToSliceInt() { + for _, numaID := range r.GetCPUAffinityNUMAs().ToSliceInt() { res += float64(cra.numaAvailable[numaID] - cra.reservedForReclaim[numaID]) } } @@ -291,7 +291,7 @@ func (cra *cpuResourceAdvisor) getRegionMinRequirement(r region.QoSRegion) float func (cra *cpuResourceAdvisor) getRegionReservedForReclaim(r region.QoSRegion) float64 { res := 0.0 - for _, numaID := range r.GetBindingNumas().ToSliceInt() { + for _, numaID := range r.GetCPUAffinityNUMAs().ToSliceInt() { divider := cra.numRegionsPerNuma[numaID] if divider < 1 { divider = 1 @@ -303,7 +303,7 @@ func (cra *cpuResourceAdvisor) getRegionReservedForReclaim(r region.QoSRegion) f func (cra *cpuResourceAdvisor) getRegionReservedForAllocate(r region.QoSRegion) float64 { res := 0.0 - for _, numaID := range r.GetBindingNumas().ToSliceInt() { + for _, numaID := range r.GetCPUAffinityNUMAs().ToSliceInt() { divider := cra.numRegionsPerNuma[numaID] if divider < 1 { divider = 1 @@ -320,7 +320,7 @@ func (cra *cpuResourceAdvisor) updateRegionEntries() { RegionName: r.Name(), RegionType: r.Type(), OwnerPoolName: r.OwnerPoolName(), - BindingNumas: r.GetBindingNumas(), + BindingNumas: r.GetCPUAffinityNUMAs(), Pods: r.GetPods(), } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler.go index 1daefe4d6f..28b0ab6117 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler.go @@ -38,7 +38,7 @@ type HeadroomAssembler interface { } type InitFunc func(conf *config.Configuration, extraConf interface{}, regionMap *map[string]region.QoSRegion, - reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonBindingNumas *machine.CPUSet, + reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonCPUAffinityNUMAs *machine.CPUSet, metaReader metacache.MetaReader, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter) HeadroomAssembler var initializers sync.Map diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go index 0fdb05ee3b..3bfca6a5b9 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common.go @@ -36,12 +36,12 @@ import ( ) type HeadroomAssemblerCommon struct { - conf *config.Configuration - regionMap *map[string]region.QoSRegion - reservedForReclaim *map[int]int - numaAvailable *map[int]int - nonBindingNumas *machine.CPUSet - overloadState map[string]bool + conf *config.Configuration + regionMap *map[string]region.QoSRegion + reservedForReclaim *map[int]int + numaAvailable *map[int]int + nonCPUAffinityNUMAs *machine.CPUSet + overloadState map[string]bool metaReader metacache.MetaReader metaServer *metaserver.MetaServer @@ -49,16 +49,16 @@ type HeadroomAssemblerCommon struct { } func NewHeadroomAssemblerCommon(conf *config.Configuration, _ interface{}, regionMap *map[string]region.QoSRegion, - reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonBindingNumas *machine.CPUSet, metaReader metacache.MetaReader, + reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonCPUAffinityNUMAs *machine.CPUSet, metaReader metacache.MetaReader, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, ) HeadroomAssembler { return &HeadroomAssemblerCommon{ - conf: conf, - regionMap: regionMap, - reservedForReclaim: reservedForReclaim, - numaAvailable: numaAvailable, - nonBindingNumas: nonBindingNumas, - overloadState: make(map[string]bool), + conf: conf, + regionMap: regionMap, + reservedForReclaim: reservedForReclaim, + numaAvailable: numaAvailable, + nonCPUAffinityNUMAs: nonCPUAffinityNUMAs, + overloadState: make(map[string]bool), metaReader: metaReader, metaServer: metaServer, @@ -87,19 +87,19 @@ func (ha *HeadroomAssemblerCommon) getHeadroomDefault() (resource.Quantity, map[ return resource.Quantity{}, nil, fmt.Errorf("get headroom failed: reclaim pool not found") } - bindingNUMAs, nonBindingNUMAs, err := ha.getReclaimNUMABindingTopo(reclaimPoolInfo) + cpuAffinityNUMAs, nonCPUAffinityNUMAs, err := ha.getReclaimNUMAAffinityTopo(reclaimPoolInfo) if err != nil { - general.Errorf("getReclaimNUMABindingTop failed: %v", err) + general.Errorf("getReclaimNUMAAffinityTopo failed: %v", err) return resource.Quantity{}, nil, err } - general.Infof("RNB NUMA topo: %v, %v", bindingNUMAs, nonBindingNUMAs) + general.Infof("RNB NUMA topo: %v, %v", cpuAffinityNUMAs, nonCPUAffinityNUMAs) numaHeadroom := make(map[int]resource.Quantity, ha.metaServer.NumNUMANodes) totalHeadroom := resource.Quantity{} // get headroom per NUMA - for _, numaID := range bindingNUMAs { + for _, numaID := range cpuAffinityNUMAs { cpuSet, ok := reclaimPoolInfo.TopologyAwareAssignments[numaID] if !ok { return resource.Quantity{}, nil, fmt.Errorf("reclaim pool NOT found TopologyAwareAssignments with numaID: %v", numaID) @@ -117,9 +117,9 @@ func (ha *HeadroomAssemblerCommon) getHeadroomDefault() (resource.Quantity, map[ } // get global reclaim headroom - if len(nonBindingNUMAs) > 0 { + if len(nonCPUAffinityNUMAs) > 0 { cpuSets := machine.NewCPUSet() - for _, numaID := range nonBindingNUMAs { + for _, numaID := range nonCPUAffinityNUMAs { cpuSet, ok := reclaimPoolInfo.TopologyAwareAssignments[numaID] if !ok { return resource.Quantity{}, nil, fmt.Errorf("reclaim pool NOT found TopologyAwareAssignments with numaID: %v", numaID) @@ -133,8 +133,8 @@ func (ha *HeadroomAssemblerCommon) getHeadroomDefault() (resource.Quantity, map[ return resource.Quantity{}, nil, fmt.Errorf("get reclaim Metrics failed: %v", err) } - headroomPerNUMA := reclaimMetrics.ReclaimedCoresSupply / float64(len(nonBindingNUMAs)) - for _, numaID := range nonBindingNUMAs { + headroomPerNUMA := reclaimMetrics.ReclaimedCoresSupply / float64(len(nonCPUAffinityNUMAs)) + for _, numaID := range nonCPUAffinityNUMAs { q := *resource.NewQuantity(int64(headroomPerNUMA), resource.DecimalSI) numaHeadroom[numaID] = q totalHeadroom.Add(q) @@ -163,12 +163,12 @@ func (ha *HeadroomAssemblerCommon) getHeadroomByUtil() (resource.Quantity, map[i return resource.Quantity{}, nil, fmt.Errorf("get headroom by util failed: reclaim pool not found") } - bindingNUMAs, nonBindingNUMAs, err := ha.getReclaimNUMABindingTopo(reclaimPoolInfo) + cpuAffinityNUMAs, nonCPUAffinityNUMAs, err := ha.getReclaimNUMAAffinityTopo(reclaimPoolInfo) if err != nil { - general.Errorf("getReclaimNUMABindingTop failed: %v", err) + general.Errorf("getReclaimNUMAAffinityTopo failed: %v", err) return resource.Quantity{}, nil, err } - general.Infof("RNB NUMA topo: %v, %v", bindingNUMAs, nonBindingNUMAs) + general.Infof("RNB NUMA topo: %v, %v", cpuAffinityNUMAs, nonCPUAffinityNUMAs) numaHeadroom := make(map[int]resource.Quantity, ha.metaServer.NumNUMANodes) totalHeadroom := resource.Quantity{} @@ -180,7 +180,7 @@ func (ha *HeadroomAssemblerCommon) getHeadroomByUtil() (resource.Quantity, map[i } // get headroom per NUMA - for _, numaID := range bindingNUMAs { + for _, numaID := range cpuAffinityNUMAs { cpuSet, ok := reclaimPoolInfo.TopologyAwareAssignments[numaID] if !ok { return resource.Quantity{}, nil, fmt.Errorf("reclaim pool NOT found TopologyAwareAssignments with numaID: %v", numaID) @@ -205,10 +205,10 @@ func (ha *HeadroomAssemblerCommon) getHeadroomByUtil() (resource.Quantity, map[i } // get global reclaim headroom - if len(nonBindingNUMAs) > 0 { + if len(nonCPUAffinityNUMAs) > 0 { cpusets := machine.NewCPUSet() lastReclaimedCPUPerNumaForCalculate := make(map[int]float64) - for _, numaID := range nonBindingNUMAs { + for _, numaID := range nonCPUAffinityNUMAs { cpuSet, ok := reclaimPoolInfo.TopologyAwareAssignments[numaID] if !ok { return resource.Quantity{}, nil, fmt.Errorf("reclaim pool NOT found TopologyAwareAssignments with numaID: %v", numaID) @@ -223,7 +223,7 @@ func (ha *HeadroomAssemblerCommon) getHeadroomByUtil() (resource.Quantity, map[i return resource.Quantity{}, nil, fmt.Errorf("get reclaim Metrics failed: %v", err) } - totalCPUSize := ha.metaServer.NUMAToCPUs.CPUSizeInNUMAs(nonBindingNUMAs...) + totalCPUSize := ha.metaServer.NUMAToCPUs.CPUSizeInNUMAs(nonCPUAffinityNUMAs...) if totalCPUSize == 0 { return resource.Quantity{}, nil, fmt.Errorf("totalCPUSize is 0") } @@ -234,7 +234,7 @@ func (ha *HeadroomAssemblerCommon) getHeadroomByUtil() (resource.Quantity, map[i return resource.Quantity{}, nil, fmt.Errorf("get util-based headroom failed: %v", err) } - for _, numaID := range nonBindingNUMAs { + for _, numaID := range nonCPUAffinityNUMAs { numaCPUSize := ha.metaServer.NUMAToCPUs.CPUSizeInNUMAs(numaID) headroomForNUMA := float64(headroom.Value()) * float64(numaCPUSize) / float64(totalCPUSize) q := *resource.NewQuantity(int64(headroomForNUMA), resource.DecimalSI) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go index f25e40c1e0..1d557aafb9 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go @@ -981,7 +981,7 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { for name, regionInfo := range tt.fields.entries { r := region.NewQoSRegionBase(name, regionInfo.OwnerPoolName, regionInfo.RegionType, conf, nil, false, false, metaCache, metaServer, metrics.DummyMetrics{}) - r.SetBindingNumas(regionInfo.BindingNumas) + r.SetCPUAffinityNUMAs(regionInfo.BindingNumas) r.SetEssentials(types.ResourceEssentials{ EnableReclaim: tt.fields.reclaimedResourceConfiguration.EnableReclaim, }) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_util.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_util.go index e2ccf56532..c0f0abc7e3 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_util.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_util.go @@ -79,7 +79,7 @@ func (ha *HeadroomAssemblerCommon) getLastReclaimedCPUPerNUMA() (map[int]float64 return util.GetReclaimedCPUPerNUMA(cnr.Status.TopologyZone), nil } -func (ha *HeadroomAssemblerCommon) getReclaimNUMABindingTopo(reclaimPool *types.PoolInfo) (bindingNUMAs, nonBindingNumas []int, err error) { +func (ha *HeadroomAssemblerCommon) getReclaimNUMAAffinityTopo(reclaimPool *types.PoolInfo) (cpuAffinityNUMAs, nonCPUAffinityNUMAs []int, err error) { if ha.metaServer == nil { err = fmt.Errorf("invalid metaserver") return @@ -136,6 +136,7 @@ func (ha *HeadroomAssemblerCommon) getReclaimNUMABindingTopo(reclaimPool *types. switch qos { case consts.PodAnnotationQoSLevelReclaimedCores, consts.PodAnnotationQoSLevelSharedCores: + // todo: cpu numa affinity result numaRet, ok := pod.Annotations[consts.PodAnnotationNUMABindResultKey] if !ok || numaRet == FakedNUMAID { continue @@ -143,7 +144,7 @@ func (ha *HeadroomAssemblerCommon) getReclaimNUMABindingTopo(reclaimPool *types. numaID, err := strconv.Atoi(numaRet) if err != nil { - klog.Errorf("invalid numa binding result: %s, %s, %v", pod.Name, numaRet, err) + klog.Errorf("invalid numa allocation result: %s, %s, %v", pod.Name, numaRet, err) continue } @@ -172,9 +173,9 @@ func (ha *HeadroomAssemblerCommon) getReclaimNUMABindingTopo(reclaimPool *types. for numaID, bound := range numaMap { if bound { - bindingNUMAs = append(bindingNUMAs, numaID) + cpuAffinityNUMAs = append(cpuAffinityNUMAs, numaID) } else { - nonBindingNumas = append(nonBindingNumas, numaID) + nonCPUAffinityNUMAs = append(nonCPUAffinityNUMAs, numaID) } } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler.go index e7cf0b832b..410a0512df 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler.go @@ -36,7 +36,7 @@ type ProvisionAssembler interface { } type InitFunc func(conf *config.Configuration, extraConf interface{}, regionMap *map[string]region.QoSRegion, - reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonBindingNumas *machine.CPUSet, allowSharedCoresOverlapReclaimedCores *bool, + reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonCPUAffinityNUMAs *machine.CPUSet, allowSharedCoresOverlapReclaimedCores *bool, reader metacache.MetaReader, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter) ProvisionAssembler var initializers sync.Map diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go index 7bbfb608ea..f3d62203bb 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go @@ -40,7 +40,7 @@ type ProvisionAssemblerCommon struct { regionMap *map[string]region.QoSRegion reservedForReclaim *map[int]int numaAvailable *map[int]int - nonBindingNumas *machine.CPUSet + nonCPUAffinityNUMAs *machine.CPUSet allowSharedCoresOverlapReclaimedCores *bool metaReader metacache.MetaReader @@ -49,7 +49,7 @@ type ProvisionAssemblerCommon struct { } func NewProvisionAssemblerCommon(conf *config.Configuration, _ interface{}, regionMap *map[string]region.QoSRegion, - reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonBindingNumas *machine.CPUSet, allowSharedCoresOverlapReclaimedCores *bool, + reservedForReclaim *map[int]int, numaAvailable *map[int]int, nonCPUAffinityNUMAs *machine.CPUSet, allowSharedCoresOverlapReclaimedCores *bool, metaReader metacache.MetaReader, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, ) ProvisionAssembler { return &ProvisionAssemblerCommon{ @@ -57,7 +57,7 @@ func NewProvisionAssemblerCommon(conf *config.Configuration, _ interface{}, regi regionMap: regionMap, reservedForReclaim: reservedForReclaim, numaAvailable: numaAvailable, - nonBindingNumas: nonBindingNumas, + nonCPUAffinityNUMAs: nonCPUAffinityNUMAs, allowSharedCoresOverlapReclaimedCores: allowSharedCoresOverlapReclaimedCores, metaReader: metaReader, @@ -72,9 +72,9 @@ func (pa *ProvisionAssemblerCommon) assembleDedicatedNUMAExclusiveRegion(r regio return err } - regionNuma := r.GetBindingNumas().ToSliceInt()[0] // always one binding numa for this type of region - reservedForReclaim := getNUMAsResource(*pa.reservedForReclaim, r.GetBindingNumas()) - available := getNUMAsResource(*pa.numaAvailable, r.GetBindingNumas()) + regionNuma := r.GetCPUAffinityNUMAs().ToSliceInt()[0] // always one cpu affinity numa for this type of region + reservedForReclaim := getNUMAsResource(*pa.reservedForReclaim, r.GetCPUAffinityNUMAs()) + available := getNUMAsResource(*pa.numaAvailable, r.GetCPUAffinityNUMAs()) var reclaimedCoresSize int reclaimedCoresLimit := float64(-1) @@ -182,7 +182,7 @@ func (pa *ProvisionAssemblerCommon) assembleNUMABindingNUMAExclusive(regionHelpe for numaID := range *pa.numaAvailable { dedicatedNUMAExclusiveRegions := regionHelper.GetRegions(numaID, configapi.QoSRegionTypeDedicated) for _, r := range dedicatedNUMAExclusiveRegions { - if !r.IsNumaBinding() || !r.IsNumaExclusive() { + if !r.IsNUMAAffinity() || !r.IsNumaExclusive() { continue } @@ -227,7 +227,7 @@ func (pa *ProvisionAssemblerCommon) assembleWithoutNUMAExclusivePool( var numaSet machine.CPUSet if numaID == commonstate.FakedNUMAID { - numaSet = *pa.nonBindingNumas + numaSet = *pa.nonCPUAffinityNUMAs } else { numaSet = machine.NewCPUSet(numaID) } @@ -599,12 +599,12 @@ func extractDedicatedRegionInfo(regions []region.QoSRegion) (regionInfo, error) regionName := r.Name() dedicatedRequirements[regionName] = general.Max(1, int(controlKnob[configapi.ControlKnobNonReclaimedCPURequirement].Value)) - if r.IsNumaBinding() { - numaBindingSize := r.GetBindingNumas().Size() - if numaBindingSize == 0 { - return regionInfo{}, fmt.Errorf("numa binding size is zero, region name: %s", r.Name()) + if r.IsNUMAAffinity() { + cpuAffinityNUMASize := r.GetCPUAffinityNUMAs().Size() + if cpuAffinityNUMASize == 0 { + return regionInfo{}, fmt.Errorf("cpu affinity numa size is zero, region name: %s", r.Name()) } - dedicatedRequests[regionName] = int(math.Ceil(r.GetPodsRequest() / float64(numaBindingSize))) + dedicatedRequests[regionName] = int(math.Ceil(r.GetPodsRequest() / float64(cpuAffinityNUMASize))) } else { dedicatedRequests[regionName] = int(math.Ceil(r.GetPodsRequest())) } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go index eeca28b187..59f16b4699 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go @@ -45,7 +45,7 @@ type FakeRegion struct { ownerPoolName string regionType configapi.QoSRegionType bindingNumas machine.CPUSet - isNumaBinding bool + isNUMAAffinity bool isNumaExclusive bool podSets types.PodSet controlKnob types.ControlKnob @@ -87,7 +87,7 @@ func (fake *FakeRegion) IsEmpty() bool { return false } func (fake *FakeRegion) Clear() {} -func (fake *FakeRegion) GetBindingNumas() machine.CPUSet { +func (fake *FakeRegion) GetCPUAffinityNUMAs() machine.CPUSet { return fake.bindingNumas } @@ -103,7 +103,7 @@ func (fake *FakeRegion) GetPodsRequest() float64 { return 0 } -func (fake *FakeRegion) SetBindingNumas(bindingNumas machine.CPUSet) { +func (fake *FakeRegion) SetCPUAffinityNUMAs(bindingNumas machine.CPUSet) { fake.bindingNumas = bindingNumas } @@ -111,12 +111,12 @@ func (fake *FakeRegion) SetEssentials(essentials types.ResourceEssentials) { fake.essentials = essentials } -func (fake *FakeRegion) SetIsNumaBinding(isNumaBinding bool) { - fake.isNumaBinding = isNumaBinding +func (fake *FakeRegion) SetIsNumaBinding(isNUMAAffinity bool) { + fake.isNUMAAffinity = isNUMAAffinity } -func (fake *FakeRegion) IsNumaBinding() bool { - return fake.isNumaBinding +func (fake *FakeRegion) IsNUMAAffinity() bool { + return fake.isNUMAAffinity } func (fake *FakeRegion) IsNumaExclusive() bool { return fake.isNumaExclusive } func (fake *FakeRegion) SetThrottled(throttled bool) { fake.throttled = throttled } @@ -1031,15 +1031,15 @@ func TestAssembleProvision(t *testing.T) { require.True(t, ok, "pool config doesn't exist") require.NoError(t, metaCache.SetPoolInfo(poolInfo.PoolName, &poolInfo), "failed to set pool info %s", poolInfo.PoolName) region := NewFakeRegion(poolConfig.poolName, poolConfig.poolType, poolConfig.poolName) - region.SetBindingNumas(poolConfig.numa) + region.SetCPUAffinityNUMAs(poolConfig.numa) region.SetIsNumaBinding(poolConfig.isNumaBinding) region.SetProvision(poolConfig.provision) region.TryUpdateProvision() - require.Equal(t, poolConfig.isNumaBinding, region.IsNumaBinding(), "invalid numa binding state") + require.Equal(t, poolConfig.isNumaBinding, region.IsNUMAAffinity(), "invalid numa binding state") regionMap[region.name] = region - if region.IsNumaBinding() { - nonBindingNumas = nonBindingNumas.Difference(region.GetBindingNumas()) + if region.IsNUMAAffinity() { + nonBindingNumas = nonBindingNumas.Difference(region.GetCPUAffinityNUMAs()) } } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go index de23aacbf3..fb6156ae9e 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go @@ -188,8 +188,8 @@ func (rm *RegionMapHelper) GetRegions(numaID int, regionType configapi.QoSRegion func (rm *RegionMapHelper) preProcessRegions(regions map[string]region.QoSRegion) { for _, r := range regions { - if r.IsNumaBinding() { - for _, numaID := range r.GetBindingNumas().ToSliceInt() { + if r.IsNUMAAffinity() { + for _, numaID := range r.GetCPUAffinityNUMAs().ToSliceInt() { numaRecords, ok := rm.regions[numaID] if !ok { numaRecords = map[configapi.QoSRegionType][]region.QoSRegion{} diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy.go index 4f1dbbc65e..d9a405c5d2 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy.go @@ -34,8 +34,8 @@ import ( type HeadroomPolicy interface { // SetPodSet overwrites policy's pod/container record SetPodSet(types.PodSet) - // SetBindingNumas overwrites the numa ids this policy interested in - SetBindingNumas(machine.CPUSet) + // SetCPUAffinityNUMAs overwrites the numa ids this policy interested in + SetCPUAffinityNUMAs(machine.CPUSet) // SetEssentials updates essential values for policy update SetEssentials(essentials types.ResourceEssentials) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_base.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_base.go index d7558cd637..68a9117721 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_base.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_base.go @@ -28,11 +28,11 @@ import ( type PolicyBase struct { types.ResourceEssentials - regionName string - regionType configapi.QoSRegionType - ownerPoolName string - podSet types.PodSet - bindingNumas machine.CPUSet + regionName string + regionType configapi.QoSRegionType + ownerPoolName string + podSet types.PodSet + cpuAffinityNUMAs machine.CPUSet metaReader metacache.MetaReader metaServer *metaserver.MetaServer @@ -59,8 +59,8 @@ func (p *PolicyBase) SetPodSet(podSet types.PodSet) { p.podSet = podSet.Clone() } -func (p *PolicyBase) SetBindingNumas(numas machine.CPUSet) { - p.bindingNumas = numas +func (p *PolicyBase) SetCPUAffinityNUMAs(numas machine.CPUSet) { + p.cpuAffinityNUMAs = numas } func (p *PolicyBase) SetEssentials(essentials types.ResourceEssentials) { diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical.go index 25d33dbfd1..82ff4e4737 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical.go @@ -68,10 +68,10 @@ func (p *PolicyCanonical) Update() error { } // FIXME: metric server doesn't support to report cpu usage in numa granularity, - // so we split cpu usage evenly across the binding numas of container. - if p.bindingNumas.Size() > 0 { + // so we split cpu usage evenly across the cpu affinity numas of container. + if p.cpuAffinityNUMAs.Size() > 0 { cpuSize := 0 - for _, numaID := range p.bindingNumas.ToSliceInt() { + for _, numaID := range p.cpuAffinityNUMAs.ToSliceInt() { cpuSize += ci.TopologyAwareAssignments[numaID].Size() } cpuAssignmentCPUs := machine.CountCPUAssignmentCPUs(ci.TopologyAwareAssignments) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical_test.go index 1efdb3f2af..f636eeaa6b 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_canonical_test.go @@ -80,7 +80,7 @@ func newTestPolicyCanonical(t *testing.T, checkpointDir string, stateFileDir str p := NewPolicyCanonical(regionInfo.RegionName, regionInfo.RegionType, regionInfo.OwnerPoolName, conf, nil, metaCacheCanonical, metaServerCanonical, metrics.DummyMetrics{}) metaCacheCanonical.SetRegionInfo(regionInfo.RegionName, ®ionInfo) - p.SetBindingNumas(regionInfo.BindingNumas) + p.SetCPUAffinityNUMAs(regionInfo.BindingNumas) p.SetPodSet(podSet) return p diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_none.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_none.go index 616559d730..1f06217a40 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_none.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_none.go @@ -36,7 +36,7 @@ func NewPolicyNone(_ string, _ configapi.QoSRegionType, _ string, } func (p *PolicyNone) SetPodSet(types.PodSet) {} -func (p *PolicyNone) SetBindingNumas(machine.CPUSet) {} +func (p *PolicyNone) SetCPUAffinityNUMAs(machine.CPUSet) {} func (p *PolicyNone) SetEssentials(types.ResourceEssentials) {} func (p *PolicyNone) Update() error { return nil } func (p *PolicyNone) GetHeadroom() (float64, error) { return types.InvalidHeadroom, nil } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated.go index cb3ee98a1b..91dc358ab1 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated.go @@ -97,10 +97,10 @@ func (p *PolicyNUMADedicated) Update() error { } // FIXME: metric server doesn't support to report cpu usage in numa granularity, - // so we split cpu usage evenly across the binding numas of container. - if p.bindingNumas.Size() > 0 { + // so we split cpu usage evenly across the cpu affinity numas of container. + if p.cpuAffinityNUMAs.Size() > 0 { cpuSize := 0 - for _, numaID := range p.bindingNumas.ToSliceInt() { + for _, numaID := range p.cpuAffinityNUMAs.ToSliceInt() { cpuSize += ci.TopologyAwareAssignments[numaID].Size() } cpuAssignmentCPUs := machine.CountCPUAssignmentCPUs(ci.TopologyAwareAssignments) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated_test.go index 18ce48fe7a..45e639fe92 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/headroompolicy/policy_numa_dedicated_test.go @@ -80,7 +80,7 @@ func newTestPolicyNumaExclusive(t *testing.T, checkpointDir string, stateFileDir p := NewPolicyNUMADedicated(regionInfo.RegionName, regionInfo.RegionType, regionInfo.OwnerPoolName, conf, nil, metaCacheNumaExclusive, metaServerNumaExclusive, metrics.DummyMetrics{}) metaCacheNumaExclusive.SetRegionInfo(regionInfo.RegionName, ®ionInfo) - p.SetBindingNumas(regionInfo.BindingNumas) + p.SetCPUAffinityNUMAs(regionInfo.BindingNumas) p.SetPodSet(podSet) return p diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy.go index 70a9040742..3de7dcc955 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy.go @@ -34,8 +34,10 @@ type ProvisionPolicy interface { SetEssentials(resourceEssentials types.ResourceEssentials, controlEssentials types.ControlEssentials) // SetPodSet overwrites policy's pod/container record SetPodSet(types.PodSet) - // SetBindingNumas overwrites the numa ids this policy interested in - SetBindingNumas(numas machine.CPUSet, isNUMABinding bool) + // SetCPUAffinityNUMAs overwrites the numa ids this policy interested in. + // Notice: SetCPUAffinityNUMAs is the same as SetBindingNUMAs, only to keep + // compatibility with old code. + SetCPUAffinityNUMAs(numas machine.CPUSet, isNUMAAffinity bool) // Update triggers an episode of algorithm update Update() error diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_base.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_base.go index d66952867f..d270edf813 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_base.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_base.go @@ -35,8 +35,8 @@ type PolicyBase struct { regionType configapi.QoSRegionType ownerPoolName string podSet types.PodSet - bindingNumas machine.CPUSet - isNUMABinding bool + cpuAffinityNUMAs machine.CPUSet + isNUMAAffinity bool controlKnobAdjusted types.ControlKnob metaReader metacache.MetaReader @@ -67,9 +67,9 @@ func (p *PolicyBase) SetPodSet(podSet types.PodSet) { p.podSet = podSet.Clone() } -func (p *PolicyBase) SetBindingNumas(numas machine.CPUSet, isNUMABinding bool) { - p.bindingNumas = numas - p.isNUMABinding = isNUMABinding +func (p *PolicyBase) SetCPUAffinityNUMAs(numas machine.CPUSet, isNUMAAffinity bool) { + p.cpuAffinityNUMAs = numas + p.isNUMAAffinity = isNUMAAffinity } func (p *PolicyBase) GetControlKnobAdjusted() (types.ControlKnob, error) { @@ -95,5 +95,5 @@ func (p *PolicyBase) GetControlKnobAdjusted() (types.ControlKnob, error) { } func (p *PolicyBase) GetMetaInfo() string { - return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", p.regionName, p.regionType, p.ownerPoolName, p.bindingNumas.String()) + return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", p.regionName, p.regionType, p.ownerPoolName, p.cpuAffinityNUMAs.String()) } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical.go index fb1df64f2c..bb29879ad8 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical.go @@ -114,10 +114,10 @@ func (p *PolicyCanonical) estimateCPUUsage() (float64, error) { } var containerEstimation float64 - if ci.IsDedicatedNumaBinding() && !enableReclaim { + if ci.IsDedicatedNUMAAffinity() && !enableReclaim { if ci.ContainerType == v1alpha1.ContainerType_MAIN { - bindingNumas := machine.GetCPUAssignmentNUMAs(ci.TopologyAwareAssignments) - containerEstimation = float64(p.metaServer.NUMAToCPUs.CPUSizeInNUMAs(bindingNumas.ToSliceNoSortInt()...)) + cpuAffinityNUMAs := machine.GetCPUAssignmentNUMAs(ci.TopologyAwareAssignments) + containerEstimation = float64(p.metaServer.NUMAToCPUs.CPUSizeInNUMAs(cpuAffinityNUMAs.ToSliceNoSortInt()...)) klog.Infof("[qosaware-cpu-canonical] container %s/%s occupied cpu %v", ci.PodName, ci.ContainerName, containerEstimation) } else { containerEstimation = 0 @@ -131,10 +131,10 @@ func (p *PolicyCanonical) estimateCPUUsage() (float64, error) { } // FIXME: metric server doesn't support to report cpu usage in numa granularity, - // so we split cpu usage evenly across the binding numas of container. - if p.bindingNumas.Size() > 0 { + // so we split cpu usage evenly across the cpu affinity numas of container. + if p.cpuAffinityNUMAs.Size() > 0 { cpuSize := 0 - for _, numaID := range p.bindingNumas.ToSliceInt() { + for _, numaID := range p.cpuAffinityNUMAs.ToSliceInt() { cpuSize += ci.TopologyAwareAssignments[numaID].Size() } cpuAssignmentCPUs := machine.CountCPUAssignmentCPUs(ci.TopologyAwareAssignments) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical_test.go index 14606e7d87..40f7403abd 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_canonical_test.go @@ -132,7 +132,7 @@ func newTestPolicyCanonical(t *testing.T, checkpointDir string, stateFileDir str err = metaCacheTmp.SetRegionInfo(regionInfo.RegionName, ®ionInfo) assert.NoError(t, err) - p.SetBindingNumas(regionInfo.BindingNumas, false) + p.SetCPUAffinityNUMAs(regionInfo.BindingNumas, false) p.SetPodSet(podSet) return p diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_dynamic_quota.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_dynamic_quota.go index 8dec884cd1..b8d2e2eb6f 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_dynamic_quota.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_dynamic_quota.go @@ -50,7 +50,7 @@ func NewPolicyDynamicQuota(regionName string, regionType configapi.QoSRegionType } func (p *PolicyDynamicQuota) isCPUQuotaAsControlKnob() bool { - if !p.isNUMABinding { + if !p.isNUMAAffinity { return false } @@ -60,20 +60,20 @@ func (p *PolicyDynamicQuota) isCPUQuotaAsControlKnob() bool { func (p *PolicyDynamicQuota) updateForCPUQuota() error { indicator := p.Indicators[string(workloadv1alpha1.ServiceSystemIndicatorNameCPUUsageRatio)] - reclaimPath := common.GetReclaimRelativeRootCgroupPath(p.conf.ReclaimRelativeRootCgroupPath, p.bindingNumas.ToSliceInt()[0]) + reclaimPath := common.GetReclaimRelativeRootCgroupPath(p.conf.ReclaimRelativeRootCgroupPath, p.cpuAffinityNUMAs.ToSliceInt()[0]) data, err := p.metaServer.GetCgroupMetric(reclaimPath, pkgconsts.MetricCPUUsageCgroup) if err != nil { return err } reclaimCoresCPUUsage := data.Value - totalNUMACPUSize := p.metaServer.NUMAToCPUs.CPUSizeInNUMAs(p.bindingNumas.ToSliceNoSortInt()...) + totalNUMACPUSize := p.metaServer.NUMAToCPUs.CPUSizeInNUMAs(p.cpuAffinityNUMAs.ToSliceNoSortInt()...) if totalNUMACPUSize == 0 { return fmt.Errorf("invalid cpu count per numa: %d, %d", p.metaServer.NumNUMANodes, p.metaServer.NumCPUs) } quota := general.MaxFloat64(float64(totalNUMACPUSize)*(indicator.Target-indicator.Current)+reclaimCoresCPUUsage, p.ReservedForReclaim) - general.InfoS("metrics", "cpuUsage", reclaimCoresCPUUsage, "totalNUMACPUSize", totalNUMACPUSize, "target", indicator.Target, "current", indicator.Current, "quota", quota, "numas", p.bindingNumas.String()) + general.InfoS("metrics", "cpuUsage", reclaimCoresCPUUsage, "totalNUMACPUSize", totalNUMACPUSize, "target", indicator.Target, "current", indicator.Current, "quota", quota, "numas", p.cpuAffinityNUMAs.String()) p.controlKnobAdjusted = types.ControlKnob{ configapi.ControlKnobReclaimedCoresCPUQuota: types.ControlKnobItem{ diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_none.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_none.go index 73c0eb0210..4f44c6df61 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_none.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_none.go @@ -37,7 +37,7 @@ func NewPolicyNone(_ string, _ configapi.QoSRegionType, _ string, func (p *PolicyNone) SetEssentials(types.ResourceEssentials, types.ControlEssentials) {} func (p *PolicyNone) SetPodSet(types.PodSet) {} -func (p *PolicyNone) SetBindingNumas(machine.CPUSet, bool) {} +func (p *PolicyNone) SetCPUAffinityNUMAs(machine.CPUSet, bool) {} func (p *PolicyNone) Update() error { return nil } func (p *PolicyNone) GetControlKnobAdjusted() (types.ControlKnob, error) { return types.InvalidControlKnob, nil diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_rama_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_rama_test.go index 5f2ffec640..be67e19084 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_rama_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/provisionpolicy/policy_rama_test.go @@ -134,7 +134,7 @@ func newTestPolicyRama(t *testing.T, checkpointDir string, stateFileDir string, err = metaCacheTmp.SetRegionInfo(regionInfo.RegionName, ®ionInfo) assert.NoError(t, err) - p.SetBindingNumas(regionInfo.BindingNumas, false) + p.SetCPUAffinityNUMAs(regionInfo.BindingNumas, false) p.SetPodSet(podSet) return p diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go index 2ed48138d4..f6def34cbc 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go @@ -39,21 +39,23 @@ type QoSRegion interface { // Clear clears all topology and container info in region Clear() - // GetBindingNumas returns numa ids assigned to this region - GetBindingNumas() machine.CPUSet + // GetCPUAffinityNUMAs returns numa ids assigned to this region + GetCPUAffinityNUMAs() machine.CPUSet // GetPods return the latest pod set of this region GetPods() types.PodSet // GetPodsRequest returns the total CPU requests of this region GetPodsRequest() float64 - // SetBindingNumas overwrites numa ids assigned to this region - SetBindingNumas(machine.CPUSet) + // SetCPUAffinityNUMAs overwrites numa ids assigned to this region + SetCPUAffinityNUMAs(machine.CPUSet) // SetEssentials updates essential region values for policy update SetEssentials(essentials types.ResourceEssentials) - IsNumaBinding() bool IsNumaExclusive() bool + // IsNUMAAffinity returns true if this region is numa affinity + IsNUMAAffinity() bool + // SetThrottled overwrites region's throttled status SetThrottled(throttled bool) // AddContainer stores a container keyed by pod uid and container name to region @@ -100,7 +102,7 @@ func GetRegionBasicMetricTags(r QoSRegion) []metrics.MetricTag { {Key: "region_type", Val: string(r.Type())}, {Key: "owner_pool_name", Val: r.OwnerPoolName()}, {Key: "pool_type", Val: commonstate.GetPoolType(r.OwnerPoolName())}, - {Key: "binding_numas", Val: r.GetBindingNumas().String()}, + {Key: "cpu_affinity_numas", Val: r.GetCPUAffinityNUMAs().String()}, {Key: "provision_policy_prior", Val: string(provisionPolicyPrior)}, {Key: "provision_policy_in_use", Val: string(provisionPolicyInUse)}, {Key: "headroom_policy_prior", Val: string(headroomPolicyPrior)}, diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go index 961f34c4d7..93fbe24d5f 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go @@ -177,8 +177,8 @@ type QoSRegionBase struct { types.ResourceEssentials types.ControlEssentials - // bindingNumas records numas assigned to this region - bindingNumas machine.CPUSet + // cpuAffinityNUMAs records numas assigned to this region + cpuAffinityNUMAs machine.CPUSet // podSet records current pod and containers in region keyed by pod uid and container name podSet types.PodSet // containerTopologyAwareAssignment changes dynamically by adding container @@ -223,13 +223,13 @@ type QoSRegionBase struct { // idle: true if containers in the region is not running as usual, maybe there is no incoming business traffic idle atomic.Bool - isNumaBinding bool + isNUMAAffinity bool isNumaExclusive bool } // NewQoSRegionBase returns a base qos region instance with common region methods func NewQoSRegionBase(name string, ownerPoolName string, regionType v1alpha1.QoSRegionType, - conf *config.Configuration, extraConf interface{}, isNumaBinding bool, isNumaExclusive bool, + conf *config.Configuration, extraConf interface{}, isNumaAffinity bool, isNumaExclusive bool, metaReader metacache.MetaReader, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, ) *QoSRegionBase { r := &QoSRegionBase{ @@ -238,7 +238,7 @@ func NewQoSRegionBase(name string, ownerPoolName string, regionType v1alpha1.QoS ownerPoolName: ownerPoolName, regionType: regionType, - bindingNumas: machine.NewCPUSet(), + cpuAffinityNUMAs: machine.NewCPUSet(), podSet: make(types.PodSet), containerTopologyAwareAssignment: make(types.TopologyAwareAssignment), @@ -269,7 +269,7 @@ func NewQoSRegionBase(name string, ownerPoolName string, regionType v1alpha1.QoS throttled: *atomic.NewBool(false), idle: *atomic.NewBool(false), - isNumaBinding: isNumaBinding, + isNUMAAffinity: isNumaAffinity, isNumaExclusive: isNumaExclusive, } @@ -333,14 +333,14 @@ func (r *QoSRegionBase) GetMetaInfo() string { } func (r *QoSRegionBase) getMetaInfo() string { - return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", r.name, r.regionType, r.ownerPoolName, r.bindingNumas.String()) + return fmt.Sprintf("[regionName: %s, regionType: %s, ownerPoolName: %s, NUMAs: %v]", r.name, r.regionType, r.ownerPoolName, r.cpuAffinityNUMAs.String()) } -func (r *QoSRegionBase) GetBindingNumas() machine.CPUSet { +func (r *QoSRegionBase) GetCPUAffinityNUMAs() machine.CPUSet { r.Lock() defer r.Unlock() - return r.bindingNumas.Clone() + return r.cpuAffinityNUMAs.Clone() } func (r *QoSRegionBase) GetPods() types.PodSet { @@ -372,11 +372,11 @@ func (r *QoSRegionBase) getPodsRequest() float64 { return requests } -func (r *QoSRegionBase) SetBindingNumas(numas machine.CPUSet) { +func (r *QoSRegionBase) SetCPUAffinityNUMAs(numas machine.CPUSet) { r.Lock() defer r.Unlock() - r.bindingNumas = numas + r.cpuAffinityNUMAs = numas } func (r *QoSRegionBase) SetEssentials(essentials types.ResourceEssentials) { @@ -390,8 +390,8 @@ func (r *QoSRegionBase) SetThrottled(throttled bool) { r.throttled.Store(throttled) } -func (r *QoSRegionBase) IsNumaBinding() bool { - return r.isNumaBinding +func (r *QoSRegionBase) IsNUMAAffinity() bool { + return r.isNUMAAffinity } func (r *QoSRegionBase) IsNumaExclusive() bool { @@ -430,7 +430,7 @@ func (r *QoSRegionBase) TryUpdateHeadroom() { // set essentials for policy internal.policy.SetPodSet(r.podSet) - internal.policy.SetBindingNumas(r.bindingNumas) + internal.policy.SetCPUAffinityNUMAs(r.cpuAffinityNUMAs) internal.policy.SetEssentials(r.ResourceEssentials) // run an episode of policy and calculator update @@ -498,7 +498,7 @@ func (r *QoSRegionBase) GetHeadroom() (float64, error) { metrics.ConvertMapToTags(map[string]string{ metricTagKeyRegionType: string(r.regionType), metricTagKeyRegionName: r.name, metricTagKeyPolicyName: string(internal.name), - metricTagKeyRegionNUMAs: r.bindingNumas.String(), + metricTagKeyRegionNUMAs: r.cpuAffinityNUMAs.String(), })...) r.headroomPolicyNameInUse = internal.name return headroom, nil @@ -565,7 +565,7 @@ func (r *QoSRegionBase) GetControlEssentials() types.ControlEssentials { } // getRegionNameFromMetaCache returns region name owned by container from metacache, -// to restore region info after restart. If numaID is specified, binding numas of the +// to restore region info after restart. If numaID is specified, cpu affinity numas of the // region will be checked, otherwise only one region should be owned by container. func getRegionNameFromMetaCache(ci *types.ContainerInfo, numaID int, metaReader metacache.MetaReader) string { if ci.QoSLevel == consts.PodAnnotationQoSLevelSharedCores { @@ -575,12 +575,12 @@ func getRegionNameFromMetaCache(ci *types.ContainerInfo, numaID int, metaReader regionInfo, ok := metaReader.GetRegionInfo(regionName) if ok { // the region-name is valid if it suits it follows constrains below - // - current container is numa-binding and the region is for numa-binding and - // the region's binding numa is the same as the container's numaID + // - current container is numa-affinity and the region is for numa-affinity and + // the region's cpu affinity numa is the same as the container's numaID // - current container is isolated and the region is for isolation-type // - current container isn't isolated and the region is for share-type - if ci.IsNumaBinding() { + if ci.IsNUMAAffinity() { regionNUMAs := regionInfo.BindingNumas.ToSliceInt() if len(regionNUMAs) != 1 || regionNUMAs[0] != numaID { return "" @@ -594,7 +594,7 @@ func getRegionNameFromMetaCache(ci *types.ContainerInfo, numaID int, metaReader } } } - } else if ci.IsDedicatedNumaBinding() { + } else if ci.IsDedicatedNUMAAffinity() { for regionName := range ci.RegionNames { regionInfo, ok := metaReader.GetRegionInfo(regionName) if ok && regionInfo.RegionType == v1alpha1.QoSRegionTypeDedicated { @@ -624,7 +624,7 @@ func (r *QoSRegionBase) initProvisionPolicy(conf *config.Configuration, extraCon for _, policyName := range configuredProvisionPolicy { if initializer, ok := initializers[policyName]; ok { policy := initializer(r.name, r.regionType, r.ownerPoolName, conf, extraConf, metaReader, metaServer, emitter) - policy.SetBindingNumas(r.bindingNumas, false) + policy.SetCPUAffinityNUMAs(r.cpuAffinityNUMAs, false) r.provisionPolicies = append(r.provisionPolicies, &internalProvisionPolicy{ name: policyName, policy: policy, @@ -746,7 +746,7 @@ func (r *QoSRegionBase) regulateProvisionControlKnob(originControlKnob map[types {Key: metricTagKeyControlKnobName, Val: string(knob)}, {Key: metricTagKeyControlKnobAction, Val: string(value.Action)}, }...) - klog.InfoS("[qosaware-cpu] get regulated control knob", "region", r.name, "bindingNumas", r.bindingNumas.String(), + klog.InfoS("[qosaware-cpu] get regulated control knob", "region", r.name, "cpuAffinityNUMAs", r.cpuAffinityNUMAs.String(), "policy", policy, "knob", knob, "action", value.Action, "value", value.Value) } } @@ -791,8 +791,8 @@ func (r *QoSRegionBase) getIndicators() (types.Indicator, error) { _ = r.emitter.StoreFloat64(metricRegionIndicatorTargetRaw, target, metrics.MetricTypeNameRaw, metrics.ConvertMapToTags(map[string]string{ - "indicator_name": string(indicatorName), - "binding_numas": r.bindingNumas.String(), + "indicator_name": string(indicatorName), + "cpu_affinity_numas": r.cpuAffinityNUMAs.String(), })...) } if r.conf.PolicyRama.EnableBorwein && r.provisionPolicyNameInUse == types.CPUProvisionPolicyRama { @@ -912,7 +912,7 @@ func (r *QoSRegionBase) IsIdle() bool { // available for Intel func (r *QoSRegionBase) getMemoryAccessWriteLatency() (float64, error) { latency := 0.0 - for _, numaID := range r.bindingNumas.ToSliceInt() { + for _, numaID := range r.cpuAffinityNUMAs.ToSliceInt() { data, err := r.metaReader.GetNumaMetric(numaID, pkgconsts.MetricMemLatencyWriteNuma) if err != nil { return 0, err @@ -926,7 +926,7 @@ func (r *QoSRegionBase) getMemoryAccessWriteLatency() (float64, error) { // available for Intel func (r *QoSRegionBase) getMemoryAccessReadLatency() (float64, error) { latency := 0.0 - for _, numaID := range r.bindingNumas.ToSliceInt() { + for _, numaID := range r.cpuAffinityNUMAs.ToSliceInt() { data, err := r.metaReader.GetNumaMetric(numaID, pkgconsts.MetricMemLatencyReadNuma) if err != nil { return 0, err @@ -940,7 +940,7 @@ func (r *QoSRegionBase) getMemoryAccessReadLatency() (float64, error) { // available for AMD func (r *QoSRegionBase) getMemoryL3MissLatency() (float64, error) { latency := 0.0 - for _, numaID := range r.bindingNumas.ToSliceInt() { + for _, numaID := range r.cpuAffinityNUMAs.ToSliceInt() { data, err := r.metaReader.GetNumaMetric(numaID, pkgconsts.MetricMemAMDL3MissLatencyNuma) if err != nil { return 0, err @@ -953,8 +953,8 @@ func (r *QoSRegionBase) getMemoryL3MissLatency() (float64, error) { func (r *QoSRegionBase) getEffectiveReclaimResource() (quota float64, cpusetSize int, err error) { numaID := commonstate.FakedNUMAID - if r.isNumaBinding { - numaID = r.bindingNumas.ToSliceInt()[0] + if r.isNUMAAffinity { + numaID = r.cpuAffinityNUMAs.ToSliceInt()[0] } quotaCtrlKnobEnabled, err := metacache.IsQuotaCtrlKnobEnabled(r.metaReader) @@ -973,7 +973,7 @@ func (r *QoSRegionBase) getEffectiveReclaimResource() (quota float64, cpusetSize quota = float64(cpuStats.CpuQuota) / float64(cpuStats.CpuPeriod) } - for _, numaID := range r.bindingNumas.ToSliceInt() { + for _, numaID := range r.cpuAffinityNUMAs.ToSliceInt() { if reclaimedInfo, ok := r.metaReader.GetPoolInfo(commonstate.PoolNameReclaim); ok { cpusetSize += reclaimedInfo.TopologyAwareAssignments[numaID].Size() } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go index 62cef677ae..3b2ff72172 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go @@ -61,18 +61,18 @@ func NewQoSRegionDedicated(ci *types.ContainerInfo, conf *config.Configuration, regionName = string(configapi.QoSRegionTypeDedicated) + types.RegionNameSeparator + string(uuid.NewUUID()) } - isNumaBinding := numaID != commonstate.FakedNUMAID + isNumaAffinity := numaID != commonstate.FakedNUMAID r := &QoSRegionDedicated{ QoSRegionBase: NewQoSRegionBase(regionName, ci.OwnerPoolName, configapi.QoSRegionTypeDedicated, conf, extraConf, - isNumaBinding, ci.IsDedicatedNumaExclusive(), metaReader, metaServer, emitter), + isNumaAffinity, ci.IsDedicatedNumaExclusive(), metaReader, metaServer, emitter), } - if isNumaBinding { - r.bindingNumas = machine.NewCPUSet(numaID) + if isNumaAffinity { + r.cpuAffinityNUMAs = machine.NewCPUSet(numaID) } else { - r.bindingNumas = machine.NewCPUSet() + r.cpuAffinityNUMAs = machine.NewCPUSet() for numaID := range ci.TopologyAwareAssignments { - r.bindingNumas.Add(numaID) + r.cpuAffinityNUMAs.Add(numaID) } } @@ -147,7 +147,8 @@ func (r *QoSRegionDedicated) updateProvisionPolicy() { // set essentials for policy and regulator internal.policy.SetPodSet(r.podSet) - internal.policy.SetBindingNumas(r.bindingNumas, r.isNumaBinding) + internal.policy.SetCPUAffinityNUMAs(r.cpuAffinityNUMAs, r.isNUMAAffinity) + internal.policy.SetEssentials(r.ResourceEssentials, r.ControlEssentials) // run an episode of policy update @@ -207,7 +208,7 @@ func (r *QoSRegionDedicated) getEffectiveControlKnobs() types.ControlKnob { if r.isNumaExclusive { reclaimedCPUSize := 0 if reclaimedInfo, ok := r.metaReader.GetPoolInfo(commonstate.PoolNameReclaim); ok { - for _, numaID := range r.bindingNumas.ToSliceInt() { + for _, numaID := range r.cpuAffinityNUMAs.ToSliceInt() { reclaimedCPUSize += reclaimedInfo.TopologyAwareAssignments[numaID].Size() } } @@ -221,7 +222,7 @@ func (r *QoSRegionDedicated) getEffectiveControlKnobs() types.ControlKnob { } if !apiequality.Semantic.DeepEqual(regionInfo.Pods, r.podSet) || - !r.bindingNumas.Equals(regionInfo.BindingNumas) { + !r.cpuAffinityNUMAs.Equals(regionInfo.BindingNumas) { return true } @@ -276,7 +277,7 @@ func (r *QoSRegionDedicated) getPodCPICurrent() (float64, error) { func (r *QoSRegionDedicated) getCPUUsageRatio() (float64, error) { usage := 0.0 nr := 0 - for _, numaID := range r.bindingNumas.ToSliceInt() { + for _, numaID := range r.cpuAffinityNUMAs.ToSliceInt() { data, err := r.metaReader.GetNumaMetric(numaID, consts.MetricCPUUsageNuma) if err != nil { return 0, err diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go index a221666598..27aae3deb9 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go @@ -50,16 +50,16 @@ func NewQoSRegionIsolation(ci *types.ContainerInfo, customRegionName string, con } } - isNumaBinding := numaID != commonstate.FakedNUMAID + isNUMAAffinity := numaID != commonstate.FakedNUMAID ownerPoolName := isolationRegionDefaultOwnerPoolName - if isNumaBinding { + if isNUMAAffinity { ownerPoolName = isolationRegionNUMAOwnerPoolName } r := &QoSRegionIsolation{ - QoSRegionBase: NewQoSRegionBase(regionName, ownerPoolName, configapi.QoSRegionTypeIsolation, conf, extraConf, isNumaBinding, false, metaReader, metaServer, emitter), + QoSRegionBase: NewQoSRegionBase(regionName, ownerPoolName, configapi.QoSRegionTypeIsolation, conf, extraConf, isNUMAAffinity, false, metaReader, metaServer, emitter), } - if isNumaBinding { - r.bindingNumas = machine.NewCPUSet(numaID) + if isNUMAAffinity { + r.cpuAffinityNUMAs = machine.NewCPUSet(numaID) } return r } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go index f0a5206746..bb78db97b0 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go @@ -57,14 +57,14 @@ func NewQoSRegionShare(ci *types.ContainerInfo, conf *config.Configuration, extr // When receive a new pod with new share pool from QRM, advisor should create a new share region with OwnerPoolName (OriginOwnerPoolName == OwnerPoolName). // Case 2. create a share pool with OriginOwnerPoolName: // When put isolation pods back to share pool, advisor should create a new share region with OriginOwnerPoolName (OriginOwnerPoolName != OwnerPoolName). - isNumaBinding := numaID != commonstate.FakedNUMAID + isNUMAAffinity := numaID != commonstate.FakedNUMAID r := &QoSRegionShare{ - QoSRegionBase: NewQoSRegionBase(regionName, ci.OriginOwnerPoolName, configapi.QoSRegionTypeShare, conf, extraConf, isNumaBinding, false, metaReader, metaServer, emitter), + QoSRegionBase: NewQoSRegionBase(regionName, ci.OriginOwnerPoolName, configapi.QoSRegionTypeShare, conf, extraConf, isNUMAAffinity, false, metaReader, metaServer, emitter), configTranslator: general.NewCommonSuffixTranslator(commonstate.NUMAPoolInfix), } - if isNumaBinding { - r.bindingNumas = machine.NewCPUSet(numaID) + if isNUMAAffinity { + r.cpuAffinityNUMAs = machine.NewCPUSet(numaID) } r.indicatorCurrentGetters = map[string]types.IndicatorCurrentGetter{ string(v1alpha1.ServiceSystemIndicatorNameCPUSchedWait): r.getPoolCPUSchedWait, @@ -113,7 +113,7 @@ func (r *QoSRegionShare) updateProvisionPolicy() { // set essentials for policy and regulator internal.policy.SetPodSet(r.podSet) - internal.policy.SetBindingNumas(r.bindingNumas, r.isNumaBinding) + internal.policy.SetCPUAffinityNUMAs(r.cpuAffinityNUMAs, r.isNUMAAffinity) internal.policy.SetEssentials(r.ResourceEssentials, r.ControlEssentials) // run an episode of policy update @@ -129,7 +129,7 @@ func (r *QoSRegionShare) getEffectiveControlKnobs() types.ControlKnob { quota, _, err := r.getEffectiveReclaimResource() if err != nil { klog.Errorf("[qosaware-cpu] failed to get effective reclaim resource, ignore it: %v", err) - } else if r.isNumaBinding { + } else if r.isNUMAAffinity { if quota > 0 { return types.ControlKnob{ configapi.ControlKnobReclaimedCoresCPUQuota: { diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_test.go index 439bc75885..68d49c3551 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_test.go @@ -175,14 +175,14 @@ func TestIsNumaBinding(t *testing.T) { RegionNames: sets.NewString("share-NUMA1"), } share := NewQoSRegionShare(&ci, conf, nil, 1, metaCache, metaServer, metrics.DummyMetrics{}) - require.True(t, share.IsNumaBinding(), "test IsNumaBinding failed") + require.True(t, share.IsNUMAAffinity(), "test IsNUMAAffinity failed") ci2 := types.ContainerInfo{ QoSLevel: consts.PodAnnotationQoSLevelSharedCores, RegionNames: sets.NewString("share"), } share2 := NewQoSRegionShare(&ci2, conf, nil, commonstate.FakedNUMAID, metaCache, metaServer, metrics.DummyMetrics{}) - require.False(t, share2.IsNumaBinding(), "test IsNumaBinding failed") + require.False(t, share2.IsNUMAAffinity(), "test IsNUMAAffinity failed") ci3 := types.ContainerInfo{ QoSLevel: consts.PodAnnotationQoSLevelSharedCores, @@ -190,7 +190,7 @@ func TestIsNumaBinding(t *testing.T) { Isolated: true, } isolation1 := NewQoSRegionIsolation(&ci3, "isolation-1", conf, nil, 1, metaCache, metaServer, metrics.DummyMetrics{}) - require.True(t, isolation1.IsNumaBinding(), "test IsNumaBinding failed") + require.True(t, isolation1.IsNUMAAffinity(), "test IsNUMAAffinity failed") ci4 := types.ContainerInfo{ QoSLevel: consts.PodAnnotationQoSLevelSharedCores, @@ -198,7 +198,7 @@ func TestIsNumaBinding(t *testing.T) { Isolated: true, } isolation2 := NewQoSRegionIsolation(&ci4, "isolation-1", conf, nil, commonstate.FakedNUMAID, metaCache, metaServer, metrics.DummyMetrics{}) - require.False(t, isolation2.IsNumaBinding(), "test IsNumaBinding failed") + require.False(t, isolation2.IsNUMAAffinity(), "test IsNUMAAffinity failed") } func TestRestrictProvisionControlKnob(t *testing.T) { diff --git a/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go b/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go index 2642b7b762..a5691e41b0 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go @@ -25,6 +25,7 @@ import ( "sync/atomic" "time" + qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos" "github.com/samber/lo" "google.golang.org/grpc" v1 "k8s.io/api/core/v1" @@ -312,10 +313,10 @@ func (cs *cpuServer) assembleResponse(advisorResp *types.InternalCPUCalculationR calculationEntriesMap := make(map[string]*cpuadvisor.CalculationEntries) blockID2Blocks := NewBlockSet() - // first assemble NUMABinding pod entries + // first assemble NUMAAffinity pod entries f := func(podUID string, containerName string, ci *types.ContainerInfo) bool { - if err := cs.assembleDedicatedNUMABindingPodEntries(advisorResp, calculationEntriesMap, blockID2Blocks, podUID, ci); err != nil { - klog.Errorf("[qosaware-server-cpu] assembleDedicatedNUMABindingPodEntries for pod %s/%s uid %s err: %v", ci.PodNamespace, ci.PodName, ci.PodUID, err) + if err := cs.assembleDedicatedNUMAAffinityPodEntries(advisorResp, calculationEntriesMap, blockID2Blocks, podUID, ci); err != nil { + klog.Errorf("[qosaware-server-cpu] assembleDedicatedNUMAAffinityPodEntries for pod %s/%s uid %s err: %v", ci.PodNamespace, ci.PodName, ci.PodUID, err) } return true } @@ -606,10 +607,11 @@ func (cs *cpuServer) setContainerInfoBasedOnContainerAllocationInfo( } if info.Metadata.QosLevel == consts.PodAnnotationQoSLevelSharedCores && - info.Metadata.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable { - originOwnerPoolName, err := commonstate.GetSpecifiedNUMABindingPoolName(info.Metadata.QosLevel, info.Metadata.Annotations) + (qosutil.AnnotationsIndicateNUMAAffinity(info.Metadata.Annotations) || + qosutil.AnnotationsIndicateNUMABinding(info.Metadata.Annotations)) { + originOwnerPoolName, err := commonstate.GetSpecifiedNUMAPoolName(info.Metadata.QosLevel, info.Metadata.Annotations) if err != nil { - return fmt.Errorf("get specified numa binding pool name failed: %w", err) + return fmt.Errorf("get specified numa binding or affinity pool name failed: %w", err) } ci.OriginOwnerPoolName = originOwnerPoolName } else { @@ -818,7 +820,7 @@ func (cs *cpuServer) assembleNormalPodEntries(calculationEntriesMap map[string]* CalculationResultsByNumas: nil, } - if ci.IsDedicatedNumaBinding() { + if ci.IsDedicatedNUMAAffinity() { return nil } @@ -856,12 +858,12 @@ func (cs *cpuServer) assembleNormalPodEntries(calculationEntriesMap map[string]* return nil } -func (cs *cpuServer) assembleDedicatedNUMABindingPodEntries( +func (cs *cpuServer) assembleDedicatedNUMAAffinityPodEntries( advisorResp *types.InternalCPUCalculationResult, calculationEntriesMap map[string]*cpuadvisor.CalculationEntries, bs blockSet, podUID string, ci *types.ContainerInfo, ) error { - if !ci.IsDedicatedNumaBinding() { + if !ci.IsDedicatedNUMAAffinity() { return nil } diff --git a/pkg/agent/sysadvisor/types/helper.go b/pkg/agent/sysadvisor/types/helper.go index 2ddfa8b2c7..7fd7dd58b9 100644 --- a/pkg/agent/sysadvisor/types/helper.go +++ b/pkg/agent/sysadvisor/types/helper.go @@ -29,21 +29,21 @@ import ( qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos" ) +// IsNumaBinding returns true if current container is for numa binding +// Notice: NumaBinding is a memory enhancement feature, compared with NumaAffinity, +// NumaBinding will strictly bind container to a specific numa node, both cpuset and memory. func (ci *ContainerInfo) IsNumaBinding() bool { return qosutil.AnnotationsIndicateNUMABinding(ci.Annotations) } -func (ci *ContainerInfo) IsNumaExclusive() bool { - return qosutil.AnnotationsIndicateNUMAExclusive(ci.Annotations) -} - +// IsSharedNumaBinding returns true if current container is for shared_cores with numa binding func (ci *ContainerInfo) IsSharedNumaBinding() bool { return ci.QoSLevel == consts.PodAnnotationQoSLevelSharedCores && ci.IsNumaBinding() } -// IsDedicatedNumaBinding returns true if current container is for dedicated_cores with numa binding +// IsDedicatedNumaBinding returns true if current container is for dedicated_cores with numa affinity func (ci *ContainerInfo) IsDedicatedNumaBinding() bool { - return ci.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores && ci.IsNumaBinding() + return ci.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores && ci.IsNUMAAffinity() } // GetActualNUMABindingResult returns the actual numa binding result of the container. @@ -53,11 +53,44 @@ func (ci *ContainerInfo) GetActualNUMABindingResult() (int, error) { return 0, fmt.Errorf("containerInfo is nil") } - return commonstate.GetSpecifiedNUMABindingNUMAID(ci.Annotations) + return commonstate.GetSpecifiedNUMAID(ci.Annotations) +} + +// IsNUMAAffinity returns true if current container is for numa affinity +// Notice: NumaAffinity is a cpu enhancement feature, compared with NumaBinding, +// NumaAffinity only allows container bind to a specific cpuset. +func (ci *ContainerInfo) IsNUMAAffinity() bool { + return qosutil.AnnotationsIndicateNUMABinding(ci.Annotations) || qosutil.AnnotationsIndicateNUMAAffinity(ci.Annotations) +} + +// IsNUMAExclusive returns true if current container is for numa exclusive +func (ci *ContainerInfo) IsNUMAExclusive() bool { + return qosutil.AnnotationsIndicateNUMAExclusive(ci.Annotations) +} + +// IsSharedNUMAAffinity returns true if current container is for shared_cores with numa affinity +func (ci *ContainerInfo) IsSharedNUMAAffinity() bool { + return ci.QoSLevel == consts.PodAnnotationQoSLevelSharedCores && ci.IsNUMAAffinity() +} + +// IsDedicatedNUMAAffinity returns true if current container is for dedicated_cores with numa affinity +func (ci *ContainerInfo) IsDedicatedNUMAAffinity() bool { + return ci.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores && ci.IsNUMAAffinity() +} + +// GetActualNUMAAffinityResult returns the actual numa affinity result of the container. +// If the container is not numa affinity, it will return -1. +func (ci *ContainerInfo) GetActualNUMAAffinityResult() (int, error) { + if ci == nil { + return 0, fmt.Errorf("containerInfo is nil") + } + + return commonstate.GetSpecifiedNUMAID(ci.Annotations) } +// IsDedicatedNumaExclusive returns true if current container is for dedicated_cores with numa exclusive func (ci *ContainerInfo) IsDedicatedNumaExclusive() bool { - return ci.IsDedicatedNumaBinding() && ci.IsNumaExclusive() + return ci.IsDedicatedNUMAAffinity() && ci.IsNUMAExclusive() } func (ci *ContainerInfo) Clone() *ContainerInfo { diff --git a/pkg/util/qos/cpu_enhancement.go b/pkg/util/qos/cpu_enhancement.go index b88097ef0f..09472dcd6a 100644 --- a/pkg/util/qos/cpu_enhancement.go +++ b/pkg/util/qos/cpu_enhancement.go @@ -34,6 +34,12 @@ func AnnotationsIndicateNUMANotShare(annotations map[string]string) bool { consts.PodAnnotationCPUEnhancementNUMAShareDisable } +// AnnotationsIndicateNUMAAffinity returns true if the pod indicates numa affinity +func AnnotationsIndicateNUMAAffinity(annotations map[string]string) bool { + return annotations[consts.PodAnnotationCPUEnhancementNumaAffinity] == + consts.PodAnnotationCPUEnhancementNumaAffinityEnable +} + // GetPodCPUSuppressionToleranceRate parses cpu suppression tolerance rate for the given pod, // and cpu suppression is only supported for reclaim pods. if the given is not nominated with // cpu suppression, return max to indicate that it can be suppressed for any degree.