diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go index 229f79676a..cf4ec75dff 100644 --- a/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go +++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go @@ -29,15 +29,16 @@ import ( ) type ReclaimedResourceOptions struct { - EnableReclaim bool - DisableReclaimSharePools []string - ReservedResourceForReport general.ResourceList - MinReclaimedResourceForReport general.ResourceList - MinIgnoredReclaimedResourceForReport general.ResourceList - ReservedResourceForAllocate general.ResourceList - ReservedResourceForReclaimedCores general.ResourceList - NumaMinReservedResourceRatioForAllocate general.ResourceList - NumaMinReservedResourceForAllocate general.ResourceList + EnableReclaim bool + DisableReclaimSharePools []string + DisableReclaimPinnedCPUSetResourcePackageSelector string + ReservedResourceForReport general.ResourceList + MinReclaimedResourceForReport general.ResourceList + MinIgnoredReclaimedResourceForReport general.ResourceList + ReservedResourceForAllocate general.ResourceList + ReservedResourceForReclaimedCores general.ResourceList + NumaMinReservedResourceRatioForAllocate general.ResourceList + NumaMinReservedResourceForAllocate general.ResourceList *cpuheadroom.CPUHeadroomOptions *memoryheadroom.MemoryHeadroomOptions } @@ -86,6 +87,8 @@ func (o *ReclaimedResourceOptions) AddFlags(fss *cliflag.NamedFlagSets) { "show whether enable reclaim resource from shared and agent resource") fs.StringSliceVar(&o.DisableReclaimSharePools, "disable-reclaim-share-pools", o.DisableReclaimSharePools, "disable reclaim resource from shared pools") + fs.StringVar(&o.DisableReclaimPinnedCPUSetResourcePackageSelector, "disable-reclaim-pinned-cpuset-resource-package-selector", o.DisableReclaimPinnedCPUSetResourcePackageSelector, + "disable reclaim pinned cpuset resource package selector") fs.Var(&o.ReservedResourceForReport, "reserved-resource-for-report", "reserved reclaimed resource report to cnr") fs.Var(&o.MinReclaimedResourceForReport, "min-reclaimed-resource-for-report", @@ -110,6 +113,8 @@ func (o *ReclaimedResourceOptions) ApplyTo(c *reclaimedresource.ReclaimedResourc var errList []error c.EnableReclaim = o.EnableReclaim c.DisableReclaimSharePools = o.DisableReclaimSharePools + + c.DisableReclaimPinnedCPUSetResourcePackageSelector = o.DisableReclaimPinnedCPUSetResourcePackageSelector c.ReservedResourceForReport = v1.ResourceList(o.ReservedResourceForReport) c.MinReclaimedResourceForReport = v1.ResourceList(o.MinReclaimedResourceForReport) c.MinIgnoredReclaimedResourceForReport = v1.ResourceList(o.MinIgnoredReclaimedResourceForReport) diff --git a/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go b/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go index 3671789a02..212fef5aa6 100644 --- a/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go +++ b/cmd/katalyst-agent/app/options/qrm/cpu_plugin.go @@ -19,6 +19,7 @@ package qrm import ( "time" + "k8s.io/apimachinery/pkg/labels" cliflag "k8s.io/component-base/cli/flag" "github.com/kubewharf/katalyst-api/pkg/consts" @@ -39,21 +40,22 @@ type CPUOptions struct { } type CPUDynamicPolicyOptions struct { - EnableCPUAdvisor bool - AdvisorGetAdviceInterval time.Duration - EnableCPUPressureEviction bool - LoadPressureEvictionSkipPools []string - EnableSyncingCPUIdle bool - EnableCPUIdle bool - CPUNUMAHintPreferPolicy string - CPUNUMAHintPreferLowThreshold float64 - NUMABindingResultAnnotationKey string - NUMANumberAnnotationKey string - NUMAIDsAnnotationKey string - EnableReserveCPUReversely bool - EnableCPUBurst bool - EnableDefaultDedicatedCoresCPUBurst bool - EnableDefaultSharedCoresCPUBurst bool + EnableCPUAdvisor bool + AdvisorGetAdviceInterval time.Duration + EnableCPUPressureEviction bool + LoadPressureEvictionSkipPools []string + EnableSyncingCPUIdle bool + EnableCPUIdle bool + CPUNUMAHintPreferPolicy string + CPUNUMAHintPreferLowThreshold float64 + NUMABindingResultAnnotationKey string + NUMANumberAnnotationKey string + NUMAIDsAnnotationKey string + EnableReserveCPUReversely bool + EnableCPUBurst bool + EnableDefaultDedicatedCoresCPUBurst bool + EnableDefaultSharedCoresCPUBurst bool + IRQForbiddenPinnedResourcePackageAttributeSelector string *irqtuner.IRQTunerOptions *hintoptimizer.HintOptimizerOptions } @@ -140,6 +142,9 @@ func (o *CPUOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.EnableDefaultSharedCoresCPUBurst, "if set true, it will enable cpu burst for shared cores by default") fs.BoolVar(&o.EnableDefaultDedicatedCoresCPUBurst, "enable-default-dedicated-cores-cpu-burst", o.EnableDefaultDedicatedCoresCPUBurst, "if set true, it will enable cpu burst for dedicated cores by default") + fs.StringVar(&o.IRQForbiddenPinnedResourcePackageAttributeSelector, "irq-forbidden-pinned-resource-package-attribute-selector", + o.IRQForbiddenPinnedResourcePackageAttributeSelector, "The selector to filter pinned resource packages that are"+ + "forbidden for irq binding.") o.HintOptimizerOptions.AddFlags(fss) o.IRQTunerOptions.AddFlags(fss) } @@ -164,6 +169,11 @@ func (o *CPUOptions) ApplyTo(conf *qrmconfig.CPUQRMPluginConfig) error { conf.EnableCPUBurst = o.EnableCPUBurst conf.EnableDefaultDedicatedCoresCPUBurst = o.EnableDefaultDedicatedCoresCPUBurst conf.EnableDefaultSharedCoresCPUBurst = o.EnableDefaultSharedCoresCPUBurst + selector, err := labels.Parse(o.IRQForbiddenPinnedResourcePackageAttributeSelector) + if err != nil { + return err + } + conf.IRQForbiddenPinnedResourcePackageAttributeSelector = selector if err := o.HintOptimizerOptions.ApplyTo(conf.HintOptimizerConfiguration); err != nil { return err } diff --git a/cmd/katalyst-agent/app/options/qrm/memory_plugin.go b/cmd/katalyst-agent/app/options/qrm/memory_plugin.go index bf5cd7963f..20dffa0565 100644 --- a/cmd/katalyst-agent/app/options/qrm/memory_plugin.go +++ b/cmd/katalyst-agent/app/options/qrm/memory_plugin.go @@ -42,6 +42,7 @@ type MemoryOptions struct { EnableNonBindingShareCoresMemoryResourceCheck bool EnableNUMAAllocationReactor bool NUMABindResultResourceAllocationAnnotationKey string + ExtraMemoryResources []string SockMemOptions LogCacheOptions @@ -157,6 +158,7 @@ func NewMemoryOptions() *MemoryOptions { EnabledQoS: []string{apiconsts.PodAnnotationQoSLevelSharedCores}, MonGroupEnabledClosIDs: []string{}, }, + ExtraMemoryResources: []string{}, } } @@ -235,6 +237,8 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.MonGroupEnabledClosIDs, "enabled-closid mon-groups") fs.Float64Var(&o.MonGroupMaxCountRatio, "resctrl-mon-groups-max-count-ratio", o.MonGroupMaxCountRatio, "ratio of mon_groups max count") + fs.StringSliceVar(&o.ExtraMemoryResources, "extra-memory-resources", o.ExtraMemoryResources, + "extra memory resources such as hugepages-*") } func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error { @@ -273,6 +277,7 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error { conf.EnabledQoS = o.EnabledQoS conf.MonGroupEnabledClosIDs = o.MonGroupEnabledClosIDs conf.MonGroupMaxCountRatio = o.MonGroupMaxCountRatio + conf.ExtraMemoryResources = o.ExtraMemoryResources for _, reservation := range o.ReservedNumaMemory { conf.ReservedNumaMemory[reservation.NumaNode] = reservation.Limits diff --git a/cmd/katalyst-agent/app/options/qrm/qrm_base.go b/cmd/katalyst-agent/app/options/qrm/qrm_base.go index 3aad672529..eb52c380e8 100644 --- a/cmd/katalyst-agent/app/options/qrm/qrm_base.go +++ b/cmd/katalyst-agent/app/options/qrm/qrm_base.go @@ -25,26 +25,28 @@ import ( ) type GenericQRMPluginOptions struct { - QRMPluginSocketDirs []string - ExtraStateFileAbsPath string - PodDebugAnnoKeys []string - UseKubeletReservedConfig bool - PodAnnotationKeptKeys []string - PodLabelKeptKeys []string - MainContainerAnnotationKey string - EnableReclaimNUMABinding bool - EnableSNBHighNumaPreference bool + QRMPluginSocketDirs []string + ExtraStateFileAbsPath string + PodDebugAnnoKeys []string + UseKubeletReservedConfig bool + PodAnnotationKeptKeys []string + PodLabelKeptKeys []string + MainContainerAnnotationKey string + EnableReclaimNUMABinding bool + EnableSNBHighNumaPreference bool + TopologyAllocationAnnotationKey string *statedirectory.StateDirectoryOptions } func NewGenericQRMPluginOptions() *GenericQRMPluginOptions { return &GenericQRMPluginOptions{ - QRMPluginSocketDirs: []string{"/var/lib/kubelet/plugins_registry"}, - PodDebugAnnoKeys: []string{}, - PodAnnotationKeptKeys: []string{}, - PodLabelKeptKeys: []string{}, - MainContainerAnnotationKey: consts.MainContainerNameAnnotationKey, - StateDirectoryOptions: statedirectory.NewStateDirectoryOptions(), + QRMPluginSocketDirs: []string{"/var/lib/kubelet/plugins_registry"}, + PodDebugAnnoKeys: []string{}, + PodAnnotationKeptKeys: []string{}, + PodLabelKeptKeys: []string{}, + MainContainerAnnotationKey: consts.MainContainerNameAnnotationKey, + TopologyAllocationAnnotationKey: consts.QRMPodAnnotationTopologyAllocationKey, + StateDirectoryOptions: statedirectory.NewStateDirectoryOptions(), } } @@ -68,6 +70,8 @@ func (o *GenericQRMPluginOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.EnableReclaimNUMABinding, "if set true, reclaim pod will be allocated on a specific NUMA node best-effort, otherwise, reclaim pod will be allocated on multi NUMA nodes") fs.BoolVar(&o.EnableSNBHighNumaPreference, "enable-snb-high-numa-preference", o.EnableSNBHighNumaPreference, "default false,if set true, snb pod will be preferentially allocated on high numa node") + fs.StringVar(&o.TopologyAllocationAnnotationKey, "topology-allocation-annotation-key", + o.TopologyAllocationAnnotationKey, "the annotation key used to describe a topology aware allocation of a container") o.StateDirectoryOptions.AddFlags(fss) } @@ -81,6 +85,7 @@ func (o *GenericQRMPluginOptions) ApplyTo(conf *qrmconfig.GenericQRMPluginConfig conf.MainContainerAnnotationKey = o.MainContainerAnnotationKey conf.EnableReclaimNUMABinding = o.EnableReclaimNUMABinding conf.EnableSNBHighNumaPreference = o.EnableSNBHighNumaPreference + conf.TopologyAllocationAnnotationKey = o.TopologyAllocationAnnotationKey if err := o.StateDirectoryOptions.ApplyTo(conf.StateDirectoryConfiguration); err != nil { return err diff --git a/go.mod b/go.mod index e96fb39627..4f3ecb9bba 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/google/uuid v1.3.0 github.com/h2non/gock v1.2.0 github.com/klauspost/cpuid/v2 v2.2.6 - github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882 + github.com/kubewharf/katalyst-api v0.5.11-0.20260407100730-9a71452c00b1 github.com/moby/sys/mountinfo v0.6.2 github.com/montanaflynn/stats v0.7.1 github.com/opencontainers/runc v1.1.6 @@ -197,7 +197,7 @@ replace ( k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.6 k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.6 k8s.io/kubectl => k8s.io/kubectl v0.24.6 - k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2 + k8s.io/kubelet => github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b k8s.io/kubernetes => k8s.io/kubernetes v1.24.6 k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.6 k8s.io/metrics => k8s.io/metrics v0.24.6 diff --git a/go.sum b/go.sum index 748426d8ac..e78e312ab6 100644 --- a/go.sum +++ b/go.sum @@ -574,10 +574,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882 h1:4KYYk/mAJAOIYDW5V+43wnjnP8p3bwHXAkAcw/AbzuQ= -github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k= -github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2 h1:2KLMzgntDypiFJRX4fSQJCD+a6zIgHuhcAzd/7nAGmU= -github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= +github.com/kubewharf/katalyst-api v0.5.11-0.20260407100730-9a71452c00b1 h1:JQn9/QjjsmS6dZNurits7d3YUU6qKhHVjFyTHqdMoT4= +github.com/kubewharf/katalyst-api v0.5.11-0.20260407100730-9a71452c00b1/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k= github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc= @@ -587,6 +585,8 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA= +github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b h1:4fQ2SJiAbt+RMD/RCN/8iN8LevcHnLxXaFY5z2cuQVI= +github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= diff --git a/pkg/agent/qrm-plugins/commonstate/pool.go b/pkg/agent/qrm-plugins/commonstate/pool.go index 5e68b7cead..eb96bdc359 100644 --- a/pkg/agent/qrm-plugins/commonstate/pool.go +++ b/pkg/agent/qrm-plugins/commonstate/pool.go @@ -22,7 +22,9 @@ import ( apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" + "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) // notice that pool-name may not have direct mapping relations with qos-level, for instance @@ -56,6 +58,8 @@ const ( PoolNotFoundErrMsg = "pool not found" ) +var OwnerPoolNameTranslator = resourcepackage.ResourcePackageSuffixTranslatorWrapper(general.NewCommonSuffixTranslator(NUMAPoolInfix)) + func IsIsolationPool(poolName string) bool { return strings.HasPrefix(poolName, PoolNamePrefixIsolation) } diff --git a/pkg/agent/qrm-plugins/commonstate/state.go b/pkg/agent/qrm-plugins/commonstate/state.go index 8bb71a6704..12249491fa 100644 --- a/pkg/agent/qrm-plugins/commonstate/state.go +++ b/pkg/agent/qrm-plugins/commonstate/state.go @@ -25,6 +25,7 @@ import ( cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) type AllocationMeta struct { @@ -127,6 +128,14 @@ func (am *AllocationMeta) GetOwnerPoolName() string { return am.OwnerPoolName } +func (am *AllocationMeta) GetResourcePackageName() string { + if am == nil { + return "" + } + + return resourcepackage.GetResourcePackageName(am.Annotations) +} + // GetSpecifiedPoolName parses the owner pool name for AllocationInfo from qos-level func (am *AllocationMeta) GetSpecifiedPoolName() string { if am == nil { @@ -145,8 +154,8 @@ func (am *AllocationMeta) GetSpecifiedNUMABindingNUMAID() (int, error) { return GetSpecifiedNUMABindingNUMAID(am.Annotations) } -// SetSpecifiedNUMABindingNUMAID set the numa id for AllocationInfo -func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) { +// SetSpecifiedNUMABindingNUMAID set the numa ids for AllocationInfo +func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaIDs []uint64) { if am == nil { return } @@ -155,7 +164,12 @@ func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) { am.Annotations = make(map[string]string) } - am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(int(numaID)).String() + intIDs := make([]int, len(numaIDs)) + for i, id := range numaIDs { + intIDs[i] = int(id) + } + + am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(intIDs...).String() } // GetSpecifiedNUMABindingPoolName get numa_binding pool name @@ -316,3 +330,14 @@ func (am *AllocationMeta) CheckDedicatedPool() bool { } return am.OwnerPoolName == PoolNameDedicated } + +// CheckDistributeEvenlyAcrossNuma returns true if the AllocationInfo is for pod with distribute evenly across numa +// annotation enabled. +func (am *AllocationMeta) CheckDistributeEvenlyAcrossNuma() bool { + if am == nil { + return false + } + + return am.Annotations[consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma] == + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go index 2d890de409..5ce2c97888 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go @@ -713,17 +713,171 @@ func (m *ContainerAllocationInfoEntries) GetEntries() map[string]*ContainerAlloc return nil } +// ResourcePackageItemConfig describes per-resource-package configuration on a specific NUMA node. +// Currently it carries pinned cpuset for the package. It is designed to be extensible. +type ResourcePackageItemConfig struct { + // pinned_cpuset is the pinned CPUSet string for the resource package on the specific NUMA node. + // The format follows existing cpuset string convention used in this proto (e.g. "0-3,8-11"). + PinnedCpuset string `protobuf:"bytes,1,opt,name=pinned_cpuset,json=pinnedCpuset,proto3" json:"pinned_cpuset,omitempty"` + Attributes map[string]string `protobuf:"bytes,2,rep,name=attributes,proto3" json:"attributes,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ResourcePackageItemConfig) Reset() { *m = ResourcePackageItemConfig{} } +func (*ResourcePackageItemConfig) ProtoMessage() {} +func (*ResourcePackageItemConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_08fc9a87e8768c24, []int{12} +} +func (m *ResourcePackageItemConfig) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ResourcePackageItemConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ResourcePackageItemConfig.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ResourcePackageItemConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_ResourcePackageItemConfig.Merge(m, src) +} +func (m *ResourcePackageItemConfig) XXX_Size() int { + return m.Size() +} +func (m *ResourcePackageItemConfig) XXX_DiscardUnknown() { + xxx_messageInfo_ResourcePackageItemConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_ResourcePackageItemConfig proto.InternalMessageInfo + +func (m *ResourcePackageItemConfig) GetPinnedCpuset() string { + if m != nil { + return m.PinnedCpuset + } + return "" +} + +func (m *ResourcePackageItemConfig) GetAttributes() map[string]string { + if m != nil { + return m.Attributes + } + return nil +} + +// NumaResourcePackageConfig describes resource package configuration for one NUMA node. +type NumaResourcePackageConfig struct { + // packages is keyed by resource package name. + Packages map[string]*ResourcePackageItemConfig `protobuf:"bytes,1,rep,name=packages,proto3" json:"packages,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *NumaResourcePackageConfig) Reset() { *m = NumaResourcePackageConfig{} } +func (*NumaResourcePackageConfig) ProtoMessage() {} +func (*NumaResourcePackageConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_08fc9a87e8768c24, []int{13} +} +func (m *NumaResourcePackageConfig) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *NumaResourcePackageConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_NumaResourcePackageConfig.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *NumaResourcePackageConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_NumaResourcePackageConfig.Merge(m, src) +} +func (m *NumaResourcePackageConfig) XXX_Size() int { + return m.Size() +} +func (m *NumaResourcePackageConfig) XXX_DiscardUnknown() { + xxx_messageInfo_NumaResourcePackageConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_NumaResourcePackageConfig proto.InternalMessageInfo + +func (m *NumaResourcePackageConfig) GetPackages() map[string]*ResourcePackageItemConfig { + if m != nil { + return m.Packages + } + return nil +} + +// ResourcePackageConfig describes node-level resource package configurations organized by NUMA node. +type ResourcePackageConfig struct { + // numa_resource_packages is keyed by NUMA id. + NumaResourcePackages map[uint64]*NumaResourcePackageConfig `protobuf:"bytes,1,rep,name=numa_resource_packages,json=numaResourcePackages,proto3" json:"numa_resource_packages,omitempty" protobuf_key:"varint,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ResourcePackageConfig) Reset() { *m = ResourcePackageConfig{} } +func (*ResourcePackageConfig) ProtoMessage() {} +func (*ResourcePackageConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_08fc9a87e8768c24, []int{14} +} +func (m *ResourcePackageConfig) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ResourcePackageConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_ResourcePackageConfig.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *ResourcePackageConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_ResourcePackageConfig.Merge(m, src) +} +func (m *ResourcePackageConfig) XXX_Size() int { + return m.Size() +} +func (m *ResourcePackageConfig) XXX_DiscardUnknown() { + xxx_messageInfo_ResourcePackageConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_ResourcePackageConfig proto.InternalMessageInfo + +func (m *ResourcePackageConfig) GetNumaResourcePackages() map[uint64]*NumaResourcePackageConfig { + if m != nil { + return m.NumaResourcePackages + } + return nil +} + type GetAdviceRequest struct { - Entries map[string]*ContainerAllocationInfoEntries `protobuf:"bytes,1,rep,name=entries,proto3" json:"entries,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - WantedFeatureGates map[string]*advisorsvc.FeatureGate `protobuf:"bytes,2,rep,name=wanted_feature_gates,json=wantedFeatureGates,proto3" json:"wanted_feature_gates,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_sizecache int32 `json:"-"` + Entries map[string]*ContainerAllocationInfoEntries `protobuf:"bytes,1,rep,name=entries,proto3" json:"entries,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + WantedFeatureGates map[string]*advisorsvc.FeatureGate `protobuf:"bytes,2,rep,name=wanted_feature_gates,json=wantedFeatureGates,proto3" json:"wanted_feature_gates,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // resource_package_config carries resource package related configurations (e.g. pinned cpuset) + // from cpu plugin to sysadvisor. + ResourcePackageConfig *ResourcePackageConfig `protobuf:"bytes,3,opt,name=resource_package_config,json=resourcePackageConfig,proto3" json:"resource_package_config,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *GetAdviceRequest) Reset() { *m = GetAdviceRequest{} } func (*GetAdviceRequest) ProtoMessage() {} func (*GetAdviceRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_08fc9a87e8768c24, []int{12} + return fileDescriptor_08fc9a87e8768c24, []int{15} } func (m *GetAdviceRequest) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -766,6 +920,13 @@ func (m *GetAdviceRequest) GetWantedFeatureGates() map[string]*advisorsvc.Featur return nil } +func (m *GetAdviceRequest) GetResourcePackageConfig() *ResourcePackageConfig { + if m != nil { + return m.ResourcePackageConfig + } + return nil +} + type GetAdviceResponse struct { Entries map[string]*CalculationEntries `protobuf:"bytes,1,rep,name=entries,proto3" json:"entries,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` AllowSharedCoresOverlapReclaimedCores bool `protobuf:"varint,2,opt,name=allow_shared_cores_overlap_reclaimed_cores,json=allowSharedCoresOverlapReclaimedCores,proto3" json:"allow_shared_cores_overlap_reclaimed_cores,omitempty"` @@ -778,7 +939,7 @@ type GetAdviceResponse struct { func (m *GetAdviceResponse) Reset() { *m = GetAdviceResponse{} } func (*GetAdviceResponse) ProtoMessage() {} func (*GetAdviceResponse) Descriptor() ([]byte, []int) { - return fileDescriptor_08fc9a87e8768c24, []int{13} + return fileDescriptor_08fc9a87e8768c24, []int{16} } func (m *GetAdviceResponse) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -857,6 +1018,12 @@ func init() { proto.RegisterType((*ContainerAllocationInfo)(nil), "cpuadvisor.ContainerAllocationInfo") proto.RegisterType((*ContainerAllocationInfoEntries)(nil), "cpuadvisor.ContainerAllocationInfoEntries") proto.RegisterMapType((map[string]*ContainerAllocationInfo)(nil), "cpuadvisor.ContainerAllocationInfoEntries.EntriesEntry") + proto.RegisterType((*ResourcePackageItemConfig)(nil), "cpuadvisor.ResourcePackageItemConfig") + proto.RegisterMapType((map[string]string)(nil), "cpuadvisor.ResourcePackageItemConfig.AttributesEntry") + proto.RegisterType((*NumaResourcePackageConfig)(nil), "cpuadvisor.NumaResourcePackageConfig") + proto.RegisterMapType((map[string]*ResourcePackageItemConfig)(nil), "cpuadvisor.NumaResourcePackageConfig.PackagesEntry") + proto.RegisterType((*ResourcePackageConfig)(nil), "cpuadvisor.ResourcePackageConfig") + proto.RegisterMapType((map[uint64]*NumaResourcePackageConfig)(nil), "cpuadvisor.ResourcePackageConfig.NumaResourcePackagesEntry") proto.RegisterType((*GetAdviceRequest)(nil), "cpuadvisor.GetAdviceRequest") proto.RegisterMapType((map[string]*ContainerAllocationInfoEntries)(nil), "cpuadvisor.GetAdviceRequest.EntriesEntry") proto.RegisterMapType((map[string]*advisorsvc.FeatureGate)(nil), "cpuadvisor.GetAdviceRequest.WantedFeatureGatesEntry") @@ -868,90 +1035,102 @@ func init() { func init() { proto.RegisterFile("cpu.proto", fileDescriptor_08fc9a87e8768c24) } var fileDescriptor_08fc9a87e8768c24 = []byte{ - // 1327 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xd4, 0x58, 0xcd, 0x73, 0xdb, 0x44, - 0x14, 0xb7, 0xe2, 0x34, 0x4d, 0x5e, 0xbe, 0xb7, 0x49, 0xe3, 0xaa, 0x8d, 0xc7, 0x75, 0xa7, 0x4c, - 0x1a, 0x26, 0x76, 0x9b, 0x76, 0xe8, 0xc7, 0xa9, 0x8e, 0x29, 0xa1, 0x7c, 0xb4, 0xa9, 0xda, 0x34, - 0xd3, 0x1e, 0xd0, 0xac, 0xa5, 0xb5, 0xad, 0x89, 0xa4, 0x55, 0xa5, 0x95, 0x83, 0x87, 0x19, 0x86, - 0x1b, 0x47, 0xb8, 0xf2, 0x17, 0x70, 0x66, 0x86, 0x23, 0x07, 0x8e, 0x3d, 0x72, 0xe0, 0xd0, 0x23, - 0x0d, 0xff, 0x06, 0xcc, 0x30, 0x5a, 0x49, 0xce, 0x4a, 0xb6, 0xe4, 0xc0, 0xc0, 0x81, 0x93, 0xb5, - 0xbb, 0xef, 0xf7, 0x7b, 0xbf, 0x7d, 0xfb, 0xde, 0xd3, 0xca, 0x30, 0xa3, 0x39, 0x7e, 0xcd, 0x71, - 0x29, 0xa3, 0x08, 0x34, 0xc7, 0xc7, 0x7a, 0xcf, 0xf0, 0xa8, 0x2b, 0x6f, 0x75, 0x0c, 0xd6, 0xf5, - 0x5b, 0x35, 0x8d, 0x5a, 0xf5, 0x0e, 0xed, 0xd0, 0x3a, 0x37, 0x69, 0xf9, 0x6d, 0x3e, 0xe2, 0x03, - 0xfe, 0x14, 0x42, 0xe5, 0x7d, 0xc1, 0xfc, 0xd0, 0x6f, 0x91, 0xa3, 0x2e, 0x76, 0xdb, 0xf5, 0x43, - 0xcc, 0xb0, 0xd9, 0xf7, 0xd8, 0x96, 0x46, 0x5d, 0x52, 0x77, 0x0e, 0x3b, 0x75, 0xdc, 0x21, 0x36, - 0xab, 0xbf, 0x72, 0xad, 0x2d, 0xc7, 0xf4, 0x3b, 0x86, 0xed, 0xd5, 0x23, 0x87, 0x5e, 0x4f, 0x8b, - 0x1f, 0x55, 0xaf, 0xa7, 0x45, 0xb4, 0xbb, 0xa3, 0x69, 0xfd, 0x16, 0x31, 0x09, 0x0b, 0x09, 0x1d, - 0xc3, 0xab, 0xbb, 0xc4, 0xa3, 0xbe, 0xab, 0x91, 0x90, 0xb3, 0xde, 0xbb, 0x81, 0x4d, 0xa7, 0x8b, - 0x6f, 0x04, 0x8b, 0x21, 0x51, 0xf5, 0xcd, 0x04, 0xac, 0x7c, 0x62, 0x78, 0xac, 0x61, 0xeb, 0x07, - 0x98, 0x69, 0x5d, 0x85, 0x78, 0x0e, 0xb5, 0x3d, 0x82, 0x76, 0xe1, 0x2c, 0xb1, 0x99, 0x6b, 0x10, - 0xaf, 0x24, 0x55, 0x8a, 0x1b, 0xb3, 0xdb, 0x5b, 0xb5, 0x93, 0x28, 0xd4, 0x46, 0x41, 0x6a, 0x0f, - 0x42, 0xfb, 0xe0, 0xa7, 0xaf, 0xc4, 0x68, 0xf4, 0x02, 0x36, 0xb1, 0x69, 0xd2, 0x23, 0xd5, 0xeb, - 0x62, 0x97, 0xe8, 0x6a, 0xb0, 0x65, 0x4f, 0xa5, 0x3d, 0xe2, 0x9a, 0xd8, 0x51, 0x5d, 0xa2, 0x99, - 0xd8, 0xb0, 0xe2, 0xf9, 0xd2, 0x44, 0x45, 0xda, 0x98, 0x56, 0xae, 0x72, 0xc4, 0x53, 0x0e, 0x68, - 0x06, 0xf3, 0x8f, 0x43, 0x73, 0x25, 0xb6, 0xe6, 0x93, 0xe8, 0x3e, 0xcc, 0x93, 0xcf, 0x99, 0x8b, - 0xd5, 0x58, 0x69, 0x91, 0x2b, 0xbd, 0x58, 0x3b, 0x89, 0x5d, 0xad, 0x89, 0x4d, 0xcd, 0x37, 0x31, - 0x33, 0xa8, 0xfd, 0xd0, 0x6e, 0x53, 0x65, 0x8e, 0x23, 0x22, 0xa9, 0xf2, 0x4b, 0x98, 0x13, 0x55, - 0xa3, 0x25, 0x28, 0x1e, 0x92, 0x7e, 0x49, 0xaa, 0x48, 0x1b, 0x33, 0x4a, 0xf0, 0x88, 0x6e, 0xc1, - 0x99, 0x1e, 0x36, 0x7d, 0xc2, 0x95, 0xcd, 0x6e, 0x97, 0xc5, 0x28, 0x08, 0xdc, 0x11, 0x8b, 0x12, - 0x1a, 0xdf, 0x9b, 0xb8, 0x23, 0x55, 0x7f, 0x94, 0x00, 0x0d, 0x5b, 0xa0, 0x07, 0xe9, 0xc0, 0xbe, - 0x9b, 0x4f, 0x39, 0x3a, 0xac, 0xf2, 0xc1, 0x58, 0xe5, 0x37, 0x92, 0xca, 0x2f, 0x66, 0xb8, 0xe1, - 0x51, 0x11, 0x64, 0x7f, 0x3f, 0x01, 0x8b, 0xa9, 0x65, 0xf4, 0x0e, 0x2c, 0xd2, 0x23, 0x9b, 0xb8, - 0xaa, 0x43, 0xa9, 0xa9, 0xda, 0xd8, 0x22, 0x91, 0xa3, 0x79, 0x3e, 0xbd, 0x47, 0xa9, 0xf9, 0x08, - 0x5b, 0x04, 0x7d, 0x01, 0x97, 0xb4, 0x13, 0xa8, 0xea, 0x12, 0xcf, 0x37, 0x99, 0xa7, 0xb6, 0xfa, - 0xaa, 0xed, 0x5b, 0x38, 0x38, 0xdd, 0x60, 0xc3, 0xf7, 0x72, 0x94, 0x88, 0x63, 0x25, 0x84, 0xef, - 0xf4, 0x1f, 0x05, 0xe0, 0x70, 0xff, 0x17, 0xb4, 0xac, 0x75, 0x99, 0x42, 0x39, 0x1f, 0x2c, 0xc6, - 0xa8, 0x18, 0xc6, 0xe8, 0x76, 0x32, 0x46, 0x97, 0x45, 0x65, 0x01, 0x70, 0x88, 0x50, 0x8c, 0xd4, - 0x0e, 0xac, 0x8e, 0xb4, 0x41, 0xd7, 0x60, 0xaa, 0x65, 0x52, 0xed, 0x30, 0xde, 0xf0, 0xb2, 0x48, - 0xbb, 0x13, 0xac, 0x28, 0x91, 0x41, 0xf5, 0x4b, 0x38, 0xc3, 0x27, 0xd0, 0x79, 0x98, 0x0a, 0xc3, - 0xc5, 0xe5, 0x4d, 0x2a, 0xd1, 0x08, 0xed, 0xc0, 0x62, 0x5c, 0x2b, 0x0c, 0xbb, 0x1d, 0xc2, 0x62, - 0xd2, 0x0b, 0x22, 0x69, 0x54, 0x1f, 0xcf, 0xb8, 0x85, 0xb2, 0x40, 0xc5, 0xa1, 0x87, 0x2e, 0xc0, - 0x34, 0x77, 0xa7, 0x1a, 0x7a, 0xa9, 0xc8, 0xcf, 0xed, 0x2c, 0x1f, 0x3f, 0xd4, 0xab, 0x7f, 0x48, - 0x30, 0x9f, 0x00, 0xa3, 0xdb, 0x50, 0x4a, 0x3a, 0x1c, 0x3a, 0xf4, 0xd5, 0x04, 0xfd, 0xe0, 0xf0, - 0x6f, 0xc2, 0xf9, 0x21, 0xa0, 0xae, 0xfa, 0x86, 0xce, 0x83, 0x3b, 0xa3, 0x9c, 0x4b, 0xc1, 0xf4, - 0x7d, 0x43, 0x47, 0x0d, 0x58, 0x4f, 0x81, 0x34, 0x6a, 0x33, 0x6c, 0x04, 0xc9, 0xc6, 0x5d, 0x86, - 0x7a, 0xe5, 0x04, 0xb6, 0x19, 0x9b, 0x70, 0xbf, 0xf7, 0x60, 0x6e, 0x40, 0xd1, 0x77, 0x48, 0x69, - 0xb2, 0x22, 0x6d, 0x2c, 0x6c, 0xaf, 0x8d, 0x0a, 0x4f, 0xdf, 0x21, 0xca, 0x2c, 0x3d, 0x19, 0x54, - 0xcf, 0xc3, 0xca, 0x2e, 0x61, 0xcd, 0x2e, 0xd1, 0x0e, 0x1d, 0x6a, 0xd8, 0x4c, 0x21, 0xaf, 0x7c, - 0xe2, 0xb1, 0xea, 0x4f, 0x12, 0xac, 0xa6, 0x16, 0xa2, 0xbe, 0xf8, 0x61, 0xba, 0x7c, 0x6b, 0xa2, - 0xa3, 0x91, 0x98, 0x8c, 0x0a, 0x7e, 0x31, 0xb6, 0x82, 0x6f, 0x26, 0xb3, 0x73, 0x5d, 0xf4, 0xd4, - 0x30, 0x4d, 0xaa, 0x65, 0xb5, 0x9e, 0x1f, 0x24, 0x58, 0x1e, 0x32, 0x40, 0xef, 0xa7, 0xa5, 0x6f, - 0xe6, 0x12, 0x66, 0xc8, 0x7e, 0x3e, 0x56, 0xf6, 0xf5, 0xa4, 0x6c, 0x79, 0xb4, 0x97, 0x74, 0xdf, - 0xf9, 0xb3, 0x08, 0x0b, 0xc9, 0x55, 0xb4, 0x06, 0x67, 0x5d, 0x6c, 0x39, 0xaa, 0xef, 0x70, 0xfa, - 0x69, 0x65, 0x2a, 0x18, 0xee, 0x3b, 0xa3, 0xfa, 0xd1, 0xc4, 0xa8, 0x7e, 0xd4, 0x03, 0x99, 0x51, - 0x87, 0x9a, 0xb4, 0xd3, 0x57, 0xf1, 0x11, 0x76, 0x89, 0x8a, 0x3d, 0xcf, 0xe8, 0xd8, 0x16, 0xb1, - 0x59, 0xfc, 0xb6, 0xb8, 0x93, 0x2d, 0xaf, 0xf6, 0x2c, 0x02, 0x37, 0x02, 0x6c, 0xe3, 0x04, 0x1a, - 0x86, 0xa4, 0xc4, 0x32, 0x96, 0xd1, 0x37, 0x12, 0x5c, 0xa1, 0xae, 0xd1, 0x31, 0x6c, 0x6c, 0xaa, - 0x39, 0x0a, 0x26, 0xb9, 0x82, 0xfb, 0x39, 0x0a, 0x1e, 0x47, 0x2c, 0xf9, 0x4a, 0x2a, 0x74, 0x8c, - 0x99, 0xfc, 0x31, 0xac, 0xe7, 0x52, 0x88, 0xc7, 0x38, 0x19, 0x1e, 0xe3, 0x8a, 0x78, 0x8c, 0x33, - 0xc2, 0x51, 0xc9, 0x4f, 0xe1, 0xea, 0xa9, 0x74, 0xfd, 0x1d, 0xd2, 0xea, 0x77, 0x12, 0xac, 0x0d, - 0x0a, 0x3b, 0x95, 0x08, 0x77, 0x61, 0xda, 0x22, 0x0c, 0xeb, 0x98, 0x61, 0x4e, 0x16, 0xd4, 0x82, - 0xf8, 0x8e, 0x8f, 0x61, 0x9f, 0x46, 0x46, 0xca, 0xc0, 0x1c, 0x35, 0x61, 0x11, 0x0f, 0xc8, 0x54, - 0xc3, 0x6e, 0xd3, 0x53, 0xa4, 0xe5, 0x02, 0x4e, 0x8c, 0xab, 0xbf, 0x4a, 0x50, 0xce, 0xd0, 0x16, - 0x17, 0xd7, 0x93, 0x74, 0x71, 0xdd, 0x4e, 0xbc, 0xe5, 0x72, 0xc1, 0x19, 0x95, 0xa6, 0x8e, 0xad, - 0xb4, 0xbb, 0xc9, 0x4a, 0xbb, 0x72, 0x0a, 0x97, 0x62, 0xc8, 0xbf, 0x2e, 0xc2, 0xd2, 0x2e, 0x61, - 0x0d, 0xbd, 0x67, 0x68, 0x24, 0x6a, 0x7d, 0xa8, 0x99, 0xde, 0xc8, 0xb5, 0x54, 0x83, 0x4b, 0x98, - 0x67, 0x5c, 0xfa, 0xda, 0xb0, 0x72, 0x84, 0x6d, 0x46, 0x74, 0xb5, 0x4d, 0x30, 0xf3, 0x5d, 0xa2, - 0x76, 0x30, 0x23, 0xf1, 0xab, 0xeb, 0x56, 0x2e, 0xe3, 0x01, 0x07, 0x7e, 0x10, 0xe2, 0x76, 0x03, - 0x58, 0x48, 0x8e, 0x8e, 0x86, 0x16, 0xe4, 0xf6, 0xd8, 0x10, 0xdd, 0x4f, 0x86, 0x68, 0xf3, 0xf4, - 0xa7, 0x22, 0x66, 0xfc, 0x67, 0xb0, 0x96, 0x21, 0x6b, 0x84, 0xcb, 0xad, 0xa4, 0xcb, 0x35, 0x31, - 0x55, 0x05, 0x7c, 0xe2, 0xd2, 0x35, 0x09, 0xcb, 0x42, 0x20, 0xa2, 0x77, 0x4d, 0x7e, 0xc3, 0x1e, - 0xb2, 0xff, 0x1f, 0x5e, 0xc0, 0x91, 0x03, 0x6b, 0x9e, 0xef, 0x38, 0xd4, 0x1d, 0xce, 0x95, 0xc9, - 0xe1, 0xf6, 0x3c, 0xbc, 0xe5, 0xa7, 0x31, 0x78, 0x38, 0x5f, 0x56, 0xbd, 0x51, 0x6b, 0xff, 0xe5, - 0x95, 0x5f, 0xc6, 0x20, 0x67, 0x0b, 0xfa, 0x57, 0x32, 0x65, 0xf3, 0x3d, 0x98, 0x15, 0x6e, 0x33, - 0x08, 0xc1, 0x42, 0x34, 0x3c, 0x30, 0x58, 0x77, 0x8f, 0xea, 0x4b, 0x05, 0x74, 0x0e, 0x16, 0x13, - 0x73, 0xd4, 0x5c, 0x92, 0xb6, 0x7f, 0x9e, 0x00, 0x68, 0xee, 0xed, 0x37, 0x42, 0x07, 0xe8, 0x09, - 0xcc, 0x35, 0x74, 0x7d, 0x50, 0x00, 0x28, 0xbf, 0x9f, 0xca, 0x15, 0x71, 0x59, 0x04, 0xc6, 0x07, - 0x51, 0x2d, 0xa0, 0x8f, 0x60, 0x46, 0x21, 0x16, 0xed, 0x91, 0x3d, 0xaa, 0xa3, 0x4b, 0x22, 0x60, - 0x30, 0x1d, 0x95, 0xb8, 0xbc, 0x9e, 0xb1, 0x3a, 0xe0, 0xda, 0x85, 0x39, 0xf1, 0x13, 0x13, 0x2d, - 0x8b, 0x80, 0x07, 0x96, 0xc3, 0xfa, 0x72, 0x65, 0xdc, 0xf7, 0x68, 0xb5, 0x70, 0x5d, 0x0a, 0x44, - 0x0d, 0x92, 0x06, 0x5d, 0xca, 0xeb, 0x3b, 0xf2, 0x7a, 0x6e, 0xa6, 0x55, 0x0b, 0xdb, 0x1a, 0xcc, - 0x34, 0xf7, 0xf6, 0xf7, 0xf8, 0xb7, 0x34, 0x7a, 0x0e, 0xf3, 0x89, 0xcb, 0x1e, 0xaa, 0xe4, 0xdc, - 0x03, 0x43, 0x07, 0x97, 0xc7, 0xde, 0x14, 0xab, 0x85, 0x1d, 0xef, 0xf5, 0xdb, 0xb2, 0xf4, 0xe6, - 0x6d, 0xb9, 0xf0, 0xd5, 0x71, 0x59, 0x7a, 0x7d, 0x5c, 0x96, 0x7e, 0x39, 0x2e, 0x4b, 0xbf, 0x1d, - 0x97, 0xa5, 0x6f, 0x7f, 0x2f, 0x17, 0x5e, 0xfe, 0xf3, 0xbf, 0x13, 0x34, 0xc7, 0xaf, 0xeb, 0x7d, - 0x1b, 0x5b, 0x86, 0xe6, 0x50, 0xd3, 0xd0, 0xfa, 0xf5, 0x13, 0x31, 0xad, 0x29, 0xfe, 0x67, 0xc0, - 0xcd, 0xbf, 0x02, 0x00, 0x00, 0xff, 0xff, 0xb6, 0x3b, 0xc7, 0x6a, 0xf4, 0x10, 0x00, 0x00, + // 1515 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xd4, 0x58, 0x4b, 0x6f, 0xdb, 0x46, + 0x10, 0x36, 0x6d, 0xc7, 0xb1, 0xc7, 0xef, 0x8d, 0x9f, 0x4c, 0x2c, 0x28, 0x0a, 0x52, 0x38, 0x2e, + 0x2c, 0x25, 0x76, 0xda, 0xbc, 0x50, 0x20, 0xb2, 0x9a, 0xba, 0xe9, 0x23, 0x71, 0x98, 0x38, 0x46, + 0x72, 0x28, 0xb1, 0x22, 0x57, 0x32, 0x61, 0x8a, 0xcb, 0x90, 0x4b, 0xb9, 0x42, 0x81, 0xa2, 0xff, + 0xa0, 0xbd, 0xf6, 0x17, 0xf4, 0x5c, 0xa0, 0xc7, 0x1e, 0x7a, 0xcc, 0x31, 0x87, 0xa2, 0xc8, 0xb1, + 0x71, 0xff, 0x45, 0xd1, 0x02, 0x05, 0x97, 0xa4, 0xb4, 0xa4, 0x48, 0xca, 0x2d, 0xda, 0x43, 0x4f, + 0xd2, 0xee, 0xce, 0xf7, 0xcd, 0x37, 0xb3, 0x3b, 0xfb, 0x20, 0x4c, 0x68, 0xb6, 0x57, 0xb6, 0x1d, + 0xca, 0x28, 0x02, 0xcd, 0xf6, 0xb0, 0xde, 0x36, 0x5c, 0xea, 0xc8, 0x9b, 0x4d, 0x83, 0x1d, 0x7a, + 0xf5, 0xb2, 0x46, 0x5b, 0x95, 0x26, 0x6d, 0xd2, 0x0a, 0x37, 0xa9, 0x7b, 0x0d, 0xde, 0xe2, 0x0d, + 0xfe, 0x2f, 0x80, 0xca, 0xfb, 0x82, 0xf9, 0x91, 0x57, 0x27, 0xc7, 0x87, 0xd8, 0x69, 0x54, 0x8e, + 0x30, 0xc3, 0x66, 0xc7, 0x65, 0x9b, 0x1a, 0x75, 0x48, 0xc5, 0x3e, 0x6a, 0x56, 0x70, 0x93, 0x58, + 0xac, 0xf2, 0xc2, 0x69, 0x6d, 0xda, 0xa6, 0xd7, 0x34, 0x2c, 0xb7, 0x12, 0x3a, 0x74, 0xdb, 0x5a, + 0xf4, 0x57, 0x75, 0xdb, 0x5a, 0x48, 0xbb, 0x9b, 0x4e, 0xeb, 0xd5, 0x89, 0x49, 0x58, 0x40, 0x68, + 0x1b, 0x6e, 0xc5, 0x21, 0x2e, 0xf5, 0x1c, 0x8d, 0x04, 0x9c, 0x95, 0xf6, 0x35, 0x6c, 0xda, 0x87, + 0xf8, 0x9a, 0x3f, 0x18, 0x10, 0x95, 0x5e, 0x0f, 0xc3, 0xc2, 0x27, 0x86, 0xcb, 0xaa, 0x96, 0x7e, + 0x80, 0x99, 0x76, 0xa8, 0x10, 0xd7, 0xa6, 0x96, 0x4b, 0xd0, 0x2e, 0x9c, 0x25, 0x16, 0x73, 0x0c, + 0xe2, 0xae, 0x48, 0xc5, 0x91, 0xf5, 0xc9, 0xad, 0xcd, 0x72, 0x2f, 0x0b, 0xe5, 0x34, 0x48, 0xf9, + 0x5e, 0x60, 0xef, 0xff, 0x74, 0x94, 0x08, 0x8d, 0x9e, 0xc1, 0x06, 0x36, 0x4d, 0x7a, 0xac, 0xba, + 0x87, 0xd8, 0x21, 0xba, 0xea, 0x87, 0xec, 0xaa, 0xb4, 0x4d, 0x1c, 0x13, 0xdb, 0xaa, 0x43, 0x34, + 0x13, 0x1b, 0xad, 0xa8, 0x7f, 0x65, 0xb8, 0x28, 0xad, 0x8f, 0x2b, 0x97, 0x39, 0xe2, 0x31, 0x07, + 0xd4, 0xfc, 0xfe, 0x87, 0x81, 0xb9, 0x12, 0x59, 0xf3, 0x4e, 0x74, 0x17, 0xa6, 0xc9, 0xe7, 0xcc, + 0xc1, 0x6a, 0xa4, 0x74, 0x84, 0x2b, 0x3d, 0x5f, 0xee, 0xe5, 0xae, 0x5c, 0xc3, 0xa6, 0xe6, 0x99, + 0x98, 0x19, 0xd4, 0xba, 0x6f, 0x35, 0xa8, 0x32, 0xc5, 0x11, 0xa1, 0x54, 0xf9, 0x39, 0x4c, 0x89, + 0xaa, 0xd1, 0x1c, 0x8c, 0x1c, 0x91, 0xce, 0x8a, 0x54, 0x94, 0xd6, 0x27, 0x14, 0xff, 0x2f, 0xba, + 0x0e, 0x67, 0xda, 0xd8, 0xf4, 0x08, 0x57, 0x36, 0xb9, 0x55, 0x10, 0xb3, 0x20, 0x70, 0x87, 0x2c, + 0x4a, 0x60, 0x7c, 0x7b, 0xf8, 0xa6, 0x54, 0xfa, 0x41, 0x02, 0xd4, 0x6f, 0x81, 0xee, 0x25, 0x13, + 0xfb, 0x76, 0x3e, 0x65, 0x7a, 0x5a, 0xe5, 0x83, 0x81, 0xca, 0xaf, 0xc5, 0x95, 0x9f, 0xcf, 0x70, + 0xc3, 0xb3, 0x22, 0xc8, 0xfe, 0x6e, 0x18, 0x66, 0x13, 0xc3, 0xe8, 0x2d, 0x98, 0xa5, 0xc7, 0x16, + 0x71, 0x54, 0x9b, 0x52, 0x53, 0xb5, 0x70, 0x8b, 0x84, 0x8e, 0xa6, 0x79, 0xf7, 0x1e, 0xa5, 0xe6, + 0x03, 0xdc, 0x22, 0xe8, 0x0b, 0xb8, 0xa0, 0xf5, 0xa0, 0xaa, 0x43, 0x5c, 0xcf, 0x64, 0xae, 0x5a, + 0xef, 0xa8, 0x96, 0xd7, 0xc2, 0xfe, 0xec, 0xfa, 0x01, 0xdf, 0xce, 0x51, 0x22, 0xb6, 0x95, 0x00, + 0xbe, 0xd3, 0x79, 0xe0, 0x83, 0x83, 0xf8, 0x57, 0xb5, 0xac, 0x71, 0x99, 0x42, 0x21, 0x1f, 0x2c, + 0xe6, 0x68, 0x24, 0xc8, 0xd1, 0x8d, 0x78, 0x8e, 0x2e, 0x8a, 0xca, 0x7c, 0x60, 0x1f, 0xa1, 0x98, + 0xa9, 0x1d, 0x58, 0x4c, 0xb5, 0x41, 0x57, 0x60, 0xac, 0x6e, 0x52, 0xed, 0x28, 0x0a, 0x78, 0x5e, + 0xa4, 0xdd, 0xf1, 0x47, 0x94, 0xd0, 0xa0, 0xf4, 0x25, 0x9c, 0xe1, 0x1d, 0x68, 0x09, 0xc6, 0x82, + 0x74, 0x71, 0x79, 0xa3, 0x4a, 0xd8, 0x42, 0x3b, 0x30, 0x1b, 0xd5, 0x0a, 0xc3, 0x4e, 0x93, 0xb0, + 0x88, 0x74, 0x55, 0x24, 0x0d, 0xeb, 0xe3, 0x09, 0xb7, 0x50, 0x66, 0xa8, 0xd8, 0x74, 0xd1, 0x2a, + 0x8c, 0x73, 0x77, 0xaa, 0xa1, 0xaf, 0x8c, 0xf0, 0x79, 0x3b, 0xcb, 0xdb, 0xf7, 0xf5, 0xd2, 0x1f, + 0x12, 0x4c, 0xc7, 0xc0, 0xe8, 0x06, 0xac, 0xc4, 0x1d, 0xf6, 0x4d, 0xfa, 0x62, 0x8c, 0xbe, 0x3b, + 0xf9, 0xdb, 0xb0, 0xd4, 0x07, 0xd4, 0x55, 0xcf, 0xd0, 0x79, 0x72, 0x27, 0x94, 0x73, 0x09, 0x98, + 0xbe, 0x6f, 0xe8, 0xa8, 0x0a, 0x6b, 0x09, 0x90, 0x46, 0x2d, 0x86, 0x0d, 0x7f, 0xb1, 0x71, 0x97, + 0x81, 0x5e, 0x39, 0x86, 0xad, 0x45, 0x26, 0xdc, 0xef, 0x6d, 0x98, 0xea, 0x52, 0x74, 0x6c, 0xb2, + 0x32, 0x5a, 0x94, 0xd6, 0x67, 0xb6, 0x96, 0xd3, 0xd2, 0xd3, 0xb1, 0x89, 0x32, 0x49, 0x7b, 0x8d, + 0xd2, 0x12, 0x2c, 0xec, 0x12, 0x56, 0x3b, 0x24, 0xda, 0x91, 0x4d, 0x0d, 0x8b, 0x29, 0xe4, 0x85, + 0x47, 0x5c, 0x56, 0xfa, 0x51, 0x82, 0xc5, 0xc4, 0x40, 0xb8, 0x2f, 0x7e, 0x98, 0x2c, 0xdf, 0xb2, + 0xe8, 0x28, 0x15, 0x93, 0x51, 0xc1, 0xcf, 0x06, 0x56, 0xf0, 0x76, 0x7c, 0x75, 0xae, 0x89, 0x9e, + 0xaa, 0xa6, 0x49, 0xb5, 0xac, 0xad, 0xe7, 0x7b, 0x09, 0xe6, 0xfb, 0x0c, 0xd0, 0xfb, 0x49, 0xe9, + 0x1b, 0xb9, 0x84, 0x19, 0xb2, 0x9f, 0x0e, 0x94, 0x7d, 0x35, 0x2e, 0x5b, 0x4e, 0xf7, 0x92, 0xdc, + 0x77, 0xfe, 0x1c, 0x81, 0x99, 0xf8, 0x28, 0x5a, 0x86, 0xb3, 0x0e, 0x6e, 0xd9, 0xaa, 0x67, 0x73, + 0xfa, 0x71, 0x65, 0xcc, 0x6f, 0xee, 0xdb, 0x69, 0xfb, 0xd1, 0x70, 0xda, 0x7e, 0xd4, 0x06, 0x99, + 0x51, 0x9b, 0x9a, 0xb4, 0xd9, 0x51, 0xf1, 0x31, 0x76, 0x88, 0x8a, 0x5d, 0xd7, 0x68, 0x5a, 0x2d, + 0x62, 0xb1, 0xe8, 0xb4, 0xb8, 0x99, 0x2d, 0xaf, 0xfc, 0x24, 0x04, 0x57, 0x7d, 0x6c, 0xb5, 0x07, + 0x0d, 0x52, 0xb2, 0xc2, 0x32, 0x86, 0xd1, 0xd7, 0x12, 0x5c, 0xa2, 0x8e, 0xd1, 0x34, 0x2c, 0x6c, + 0xaa, 0x39, 0x0a, 0x46, 0xb9, 0x82, 0xbb, 0x39, 0x0a, 0x1e, 0x86, 0x2c, 0xf9, 0x4a, 0x8a, 0x74, + 0x80, 0x99, 0xfc, 0x31, 0xac, 0xe5, 0x52, 0x88, 0xd3, 0x38, 0x1a, 0x4c, 0xe3, 0x82, 0x38, 0x8d, + 0x13, 0xc2, 0x54, 0xc9, 0x8f, 0xe1, 0xf2, 0xa9, 0x74, 0xfd, 0x1d, 0xd2, 0xd2, 0xb7, 0x12, 0x2c, + 0x77, 0x0b, 0x3b, 0xb1, 0x10, 0x6e, 0xc1, 0x78, 0x8b, 0x30, 0xac, 0x63, 0x86, 0x39, 0x99, 0x5f, + 0x0b, 0xe2, 0x19, 0x1f, 0xc1, 0x3e, 0x0d, 0x8d, 0x94, 0xae, 0x39, 0xaa, 0xc1, 0x2c, 0xee, 0x92, + 0xa9, 0x86, 0xd5, 0xa0, 0xa7, 0x58, 0x96, 0x33, 0x38, 0xd6, 0x2e, 0xfd, 0x2c, 0x41, 0x21, 0x43, + 0x5b, 0x54, 0x5c, 0x8f, 0x92, 0xc5, 0x75, 0x23, 0x76, 0xca, 0xe5, 0x82, 0x33, 0x2a, 0x4d, 0x1d, + 0x58, 0x69, 0xb7, 0xe2, 0x95, 0x76, 0xe9, 0x14, 0x2e, 0xc5, 0x94, 0xff, 0x22, 0xc1, 0xaa, 0x12, + 0x5e, 0x11, 0xf7, 0xb0, 0x76, 0x84, 0x9b, 0xe4, 0x3e, 0x23, 0xad, 0x1a, 0xb5, 0x1a, 0x46, 0x13, + 0x5d, 0x82, 0x69, 0xdb, 0xb0, 0x2c, 0xff, 0x6a, 0x66, 0x7b, 0x2e, 0x61, 0xa1, 0xe3, 0xa9, 0xa0, + 0xb3, 0xc6, 0xfb, 0xd0, 0x3e, 0x00, 0x66, 0xcc, 0x31, 0xea, 0x1e, 0x23, 0xd1, 0xc9, 0xf4, 0x8e, + 0x28, 0x23, 0x93, 0xbf, 0x5c, 0xed, 0xe2, 0x82, 0xb8, 0x05, 0x22, 0xf9, 0x3d, 0x98, 0x4d, 0x0c, + 0xa7, 0x44, 0x9f, 0xbd, 0x96, 0x5e, 0x49, 0xb0, 0xea, 0x1f, 0xcd, 0x09, 0xe7, 0x61, 0x60, 0x0f, + 0x61, 0xdc, 0x0e, 0x3a, 0xa2, 0xb9, 0xda, 0x4e, 0x9e, 0xfb, 0xa9, 0xc0, 0x72, 0xd8, 0x0a, 0xf5, + 0x76, 0x49, 0xe4, 0x3a, 0x4c, 0xc7, 0x86, 0x52, 0xb4, 0xde, 0x89, 0xcf, 0xd4, 0xe5, 0x53, 0xa5, + 0x48, 0x0c, 0xe9, 0x77, 0x09, 0x16, 0xd3, 0xc3, 0x79, 0x01, 0x4b, 0xfe, 0xed, 0x4a, 0x8d, 0x2e, + 0xfb, 0x6a, 0x22, 0xb8, 0x3b, 0x39, 0xbe, 0xc2, 0xc0, 0x52, 0x42, 0x0e, 0x83, 0x5c, 0xb0, 0x52, + 0x86, 0x64, 0x2b, 0x35, 0xbd, 0x99, 0x45, 0x9f, 0x17, 0x7c, 0x66, 0xb6, 0xc5, 0xe0, 0x4f, 0x46, + 0x60, 0x6e, 0x97, 0xb0, 0xaa, 0xde, 0x36, 0x34, 0x12, 0x9e, 0xd1, 0xa8, 0x96, 0xac, 0xb8, 0x2b, + 0x89, 0x93, 0x38, 0x66, 0x9e, 0xf1, 0x3a, 0x69, 0xc0, 0xc2, 0x31, 0xb6, 0x18, 0xd1, 0xd5, 0x06, + 0xc1, 0xcc, 0x73, 0x88, 0xda, 0xc4, 0xbd, 0x95, 0x7c, 0x3d, 0x97, 0xf1, 0x80, 0x03, 0x3f, 0x08, + 0x70, 0xbb, 0xb8, 0xbb, 0x90, 0xd1, 0x71, 0xdf, 0x00, 0x7a, 0x06, 0xcb, 0xc9, 0xf9, 0xf1, 0x6f, + 0x3a, 0x0d, 0xa3, 0xc9, 0x6f, 0x38, 0x89, 0xab, 0x67, 0x7a, 0x42, 0x16, 0x9d, 0xb4, 0x6e, 0xb9, + 0x31, 0x70, 0x9b, 0xb8, 0x1b, 0xcf, 0xff, 0xc6, 0xe9, 0x77, 0x26, 0x71, 0xd7, 0xff, 0x0c, 0x96, + 0x33, 0x22, 0x4e, 0x71, 0xb9, 0x19, 0x77, 0xb9, 0x2c, 0x6e, 0xd7, 0x02, 0x3e, 0xf6, 0xf0, 0x18, + 0x85, 0x79, 0x21, 0xc7, 0xe1, 0x7d, 0x2b, 0xff, 0xd2, 0xd2, 0x67, 0xff, 0x3f, 0x7c, 0x84, 0x22, + 0x1b, 0x96, 0x5d, 0xcf, 0xb6, 0xa9, 0xd3, 0xbf, 0x0c, 0x47, 0xfb, 0xaf, 0x28, 0xfd, 0x21, 0x3f, + 0x8e, 0xc0, 0xfd, 0x4b, 0x71, 0xd1, 0x4d, 0x1b, 0xfb, 0x2f, 0x9f, 0xbd, 0x32, 0x06, 0x39, 0x5b, + 0xd0, 0xbf, 0xb2, 0x52, 0x36, 0xde, 0x85, 0x49, 0xe1, 0x46, 0x8f, 0x10, 0xcc, 0x84, 0xcd, 0x03, + 0x83, 0x1d, 0xee, 0x51, 0x7d, 0x6e, 0x08, 0x9d, 0x83, 0xd9, 0x58, 0x1f, 0x35, 0xe7, 0xa4, 0xad, + 0x9f, 0x86, 0x01, 0x6a, 0x7b, 0xfb, 0xd5, 0xc0, 0x01, 0x7a, 0x04, 0x53, 0x55, 0x5d, 0xef, 0x16, + 0x00, 0xca, 0xbf, 0x53, 0xc8, 0x45, 0x71, 0x58, 0x04, 0x46, 0x13, 0x51, 0x1a, 0x42, 0x1f, 0xc1, + 0x84, 0x42, 0x5a, 0xb4, 0x4d, 0xf6, 0xa8, 0x8e, 0x2e, 0x88, 0x80, 0x6e, 0x77, 0xb8, 0x7b, 0xc8, + 0x6b, 0x19, 0xa3, 0x5d, 0xae, 0x5d, 0x98, 0x12, 0x3f, 0xb3, 0xa0, 0x79, 0x11, 0x70, 0xaf, 0x65, + 0xb3, 0x8e, 0x5c, 0x1c, 0xf4, 0x4d, 0xa6, 0x34, 0x74, 0x55, 0xf2, 0x45, 0x75, 0x17, 0x0d, 0xba, + 0x90, 0xb7, 0xa5, 0xc9, 0x6b, 0xb9, 0x2b, 0xad, 0x34, 0xb4, 0xa5, 0xc1, 0x44, 0x6d, 0x6f, 0x7f, + 0x8f, 0x7f, 0x4f, 0x42, 0x4f, 0x61, 0x3a, 0xf6, 0xe0, 0x41, 0xc5, 0x9c, 0xb7, 0x50, 0xe0, 0xe0, + 0xe2, 0xc0, 0xd7, 0x52, 0x69, 0x68, 0xc7, 0x7d, 0xf9, 0xa6, 0x20, 0xbd, 0x7e, 0x53, 0x18, 0xfa, + 0xea, 0xa4, 0x20, 0xbd, 0x3c, 0x29, 0x48, 0xaf, 0x4e, 0x0a, 0xd2, 0xaf, 0x27, 0x05, 0xe9, 0x9b, + 0xdf, 0x0a, 0x43, 0xcf, 0xff, 0xf9, 0x27, 0x35, 0xcd, 0xf6, 0x2a, 0x7a, 0xc7, 0xc2, 0x2d, 0x43, + 0xb3, 0xa9, 0x69, 0x68, 0x9d, 0x4a, 0x4f, 0x4c, 0x7d, 0x8c, 0x7f, 0x10, 0xdb, 0xfe, 0x2b, 0x00, + 0x00, 0xff, 0xff, 0x0a, 0x61, 0xe1, 0x40, 0xf8, 0x13, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. @@ -1844,6 +2023,151 @@ func (m *ContainerAllocationInfoEntries) MarshalToSizedBuffer(dAtA []byte) (int, return len(dAtA) - i, nil } +func (m *ResourcePackageItemConfig) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ResourcePackageItemConfig) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ResourcePackageItemConfig) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.Attributes) > 0 { + for k := range m.Attributes { + v := m.Attributes[k] + baseI := i + i -= len(v) + copy(dAtA[i:], v) + i = encodeVarintCpu(dAtA, i, uint64(len(v))) + i-- + dAtA[i] = 0x12 + i -= len(k) + copy(dAtA[i:], k) + i = encodeVarintCpu(dAtA, i, uint64(len(k))) + i-- + dAtA[i] = 0xa + i = encodeVarintCpu(dAtA, i, uint64(baseI-i)) + i-- + dAtA[i] = 0x12 + } + } + if len(m.PinnedCpuset) > 0 { + i -= len(m.PinnedCpuset) + copy(dAtA[i:], m.PinnedCpuset) + i = encodeVarintCpu(dAtA, i, uint64(len(m.PinnedCpuset))) + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil +} + +func (m *NumaResourcePackageConfig) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *NumaResourcePackageConfig) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *NumaResourcePackageConfig) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.Packages) > 0 { + for k := range m.Packages { + v := m.Packages[k] + baseI := i + if v != nil { + { + size, err := v.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintCpu(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } + i -= len(k) + copy(dAtA[i:], k) + i = encodeVarintCpu(dAtA, i, uint64(len(k))) + i-- + dAtA[i] = 0xa + i = encodeVarintCpu(dAtA, i, uint64(baseI-i)) + i-- + dAtA[i] = 0xa + } + } + return len(dAtA) - i, nil +} + +func (m *ResourcePackageConfig) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ResourcePackageConfig) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *ResourcePackageConfig) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.NumaResourcePackages) > 0 { + for k := range m.NumaResourcePackages { + v := m.NumaResourcePackages[k] + baseI := i + if v != nil { + { + size, err := v.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintCpu(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x12 + } + i = encodeVarintCpu(dAtA, i, uint64(k)) + i-- + dAtA[i] = 0x8 + i = encodeVarintCpu(dAtA, i, uint64(baseI-i)) + i-- + dAtA[i] = 0xa + } + } + return len(dAtA) - i, nil +} + func (m *GetAdviceRequest) Marshal() (dAtA []byte, err error) { size := m.Size() dAtA = make([]byte, size) @@ -1864,6 +2188,18 @@ func (m *GetAdviceRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.ResourcePackageConfig != nil { + { + size, err := m.ResourcePackageConfig.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintCpu(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x1a + } if len(m.WantedFeatureGates) > 0 { for k := range m.WantedFeatureGates { v := m.WantedFeatureGates[k] @@ -2293,14 +2629,35 @@ func (m *ContainerAllocationInfoEntries) Size() (n int) { return n } -func (m *GetAdviceRequest) Size() (n int) { +func (m *ResourcePackageItemConfig) Size() (n int) { if m == nil { return 0 } var l int _ = l - if len(m.Entries) > 0 { - for k, v := range m.Entries { + l = len(m.PinnedCpuset) + if l > 0 { + n += 1 + l + sovCpu(uint64(l)) + } + if len(m.Attributes) > 0 { + for k, v := range m.Attributes { + _ = k + _ = v + mapEntrySize := 1 + len(k) + sovCpu(uint64(len(k))) + 1 + len(v) + sovCpu(uint64(len(v))) + n += mapEntrySize + 1 + sovCpu(uint64(mapEntrySize)) + } + } + return n +} + +func (m *NumaResourcePackageConfig) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.Packages) > 0 { + for k, v := range m.Packages { _ = k _ = v l = 0 @@ -2312,8 +2669,17 @@ func (m *GetAdviceRequest) Size() (n int) { n += mapEntrySize + 1 + sovCpu(uint64(mapEntrySize)) } } - if len(m.WantedFeatureGates) > 0 { - for k, v := range m.WantedFeatureGates { + return n +} + +func (m *ResourcePackageConfig) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.NumaResourcePackages) > 0 { + for k, v := range m.NumaResourcePackages { _ = k _ = v l = 0 @@ -2321,14 +2687,53 @@ func (m *GetAdviceRequest) Size() (n int) { l = v.Size() l += 1 + sovCpu(uint64(l)) } - mapEntrySize := 1 + len(k) + sovCpu(uint64(len(k))) + l + mapEntrySize := 1 + sovCpu(uint64(k)) + l n += mapEntrySize + 1 + sovCpu(uint64(mapEntrySize)) } } return n } -func (m *GetAdviceResponse) Size() (n int) { +func (m *GetAdviceRequest) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.Entries) > 0 { + for k, v := range m.Entries { + _ = k + _ = v + l = 0 + if v != nil { + l = v.Size() + l += 1 + sovCpu(uint64(l)) + } + mapEntrySize := 1 + len(k) + sovCpu(uint64(len(k))) + l + n += mapEntrySize + 1 + sovCpu(uint64(mapEntrySize)) + } + } + if len(m.WantedFeatureGates) > 0 { + for k, v := range m.WantedFeatureGates { + _ = k + _ = v + l = 0 + if v != nil { + l = v.Size() + l += 1 + sovCpu(uint64(l)) + } + mapEntrySize := 1 + len(k) + sovCpu(uint64(len(k))) + l + n += mapEntrySize + 1 + sovCpu(uint64(mapEntrySize)) + } + } + if m.ResourcePackageConfig != nil { + l = m.ResourcePackageConfig.Size() + n += 1 + l + sovCpu(uint64(l)) + } + return n +} + +func (m *GetAdviceResponse) Size() (n int) { if m == nil { return 0 } @@ -2604,6 +3009,67 @@ func (this *ContainerAllocationInfoEntries) String() string { }, "") return s } +func (this *ResourcePackageItemConfig) String() string { + if this == nil { + return "nil" + } + keysForAttributes := make([]string, 0, len(this.Attributes)) + for k, _ := range this.Attributes { + keysForAttributes = append(keysForAttributes, k) + } + github_com_gogo_protobuf_sortkeys.Strings(keysForAttributes) + mapStringForAttributes := "map[string]string{" + for _, k := range keysForAttributes { + mapStringForAttributes += fmt.Sprintf("%v: %v,", k, this.Attributes[k]) + } + mapStringForAttributes += "}" + s := strings.Join([]string{`&ResourcePackageItemConfig{`, + `PinnedCpuset:` + fmt.Sprintf("%v", this.PinnedCpuset) + `,`, + `Attributes:` + mapStringForAttributes + `,`, + `}`, + }, "") + return s +} +func (this *NumaResourcePackageConfig) String() string { + if this == nil { + return "nil" + } + keysForPackages := make([]string, 0, len(this.Packages)) + for k, _ := range this.Packages { + keysForPackages = append(keysForPackages, k) + } + github_com_gogo_protobuf_sortkeys.Strings(keysForPackages) + mapStringForPackages := "map[string]*ResourcePackageItemConfig{" + for _, k := range keysForPackages { + mapStringForPackages += fmt.Sprintf("%v: %v,", k, this.Packages[k]) + } + mapStringForPackages += "}" + s := strings.Join([]string{`&NumaResourcePackageConfig{`, + `Packages:` + mapStringForPackages + `,`, + `}`, + }, "") + return s +} +func (this *ResourcePackageConfig) String() string { + if this == nil { + return "nil" + } + keysForNumaResourcePackages := make([]uint64, 0, len(this.NumaResourcePackages)) + for k, _ := range this.NumaResourcePackages { + keysForNumaResourcePackages = append(keysForNumaResourcePackages, k) + } + github_com_gogo_protobuf_sortkeys.Uint64s(keysForNumaResourcePackages) + mapStringForNumaResourcePackages := "map[uint64]*NumaResourcePackageConfig{" + for _, k := range keysForNumaResourcePackages { + mapStringForNumaResourcePackages += fmt.Sprintf("%v: %v,", k, this.NumaResourcePackages[k]) + } + mapStringForNumaResourcePackages += "}" + s := strings.Join([]string{`&ResourcePackageConfig{`, + `NumaResourcePackages:` + mapStringForNumaResourcePackages + `,`, + `}`, + }, "") + return s +} func (this *GetAdviceRequest) String() string { if this == nil { return "nil" @@ -2631,6 +3097,7 @@ func (this *GetAdviceRequest) String() string { s := strings.Join([]string{`&GetAdviceRequest{`, `Entries:` + mapStringForEntries + `,`, `WantedFeatureGates:` + mapStringForWantedFeatureGates + `,`, + `ResourcePackageConfig:` + strings.Replace(this.ResourcePackageConfig.String(), "ResourcePackageConfig", "ResourcePackageConfig", 1) + `,`, `}`, }, "") return s @@ -4711,7 +5178,7 @@ func (m *ContainerAllocationInfoEntries) Unmarshal(dAtA []byte) error { } return nil } -func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { +func (m *ResourcePackageItemConfig) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 for iNdEx < l { @@ -4734,15 +5201,47 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { fieldNum := int32(wire >> 3) wireType := int(wire & 0x7) if wireType == 4 { - return fmt.Errorf("proto: GetAdviceRequest: wiretype end group for non-group") + return fmt.Errorf("proto: ResourcePackageItemConfig: wiretype end group for non-group") } if fieldNum <= 0 { - return fmt.Errorf("proto: GetAdviceRequest: illegal tag %d (wire type %d)", fieldNum, wire) + return fmt.Errorf("proto: ResourcePackageItemConfig: illegal tag %d (wire type %d)", fieldNum, wire) } switch fieldNum { case 1: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Entries", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field PinnedCpuset", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthCpu + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthCpu + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.PinnedCpuset = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Attributes", wireType) } var msglen int for shift := uint(0); ; shift += 7 { @@ -4769,11 +5268,11 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - if m.Entries == nil { - m.Entries = make(map[string]*ContainerAllocationInfoEntries) + if m.Attributes == nil { + m.Attributes = make(map[string]string) } var mapkey string - var mapvalue *ContainerAllocationInfoEntries + var mapvalue string for iNdEx < postIndex { entryPreIndex := iNdEx var wire uint64 @@ -4822,7 +5321,7 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { mapkey = string(dAtA[iNdEx:postStringIndexmapkey]) iNdEx = postStringIndexmapkey } else if fieldNum == 2 { - var mapmsglen int + var stringLenmapvalue uint64 for shift := uint(0); ; shift += 7 { if shift >= 64 { return ErrIntOverflowCpu @@ -4832,26 +5331,24 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { } b := dAtA[iNdEx] iNdEx++ - mapmsglen |= int(b&0x7F) << shift + stringLenmapvalue |= uint64(b&0x7F) << shift if b < 0x80 { break } } - if mapmsglen < 0 { + intStringLenmapvalue := int(stringLenmapvalue) + if intStringLenmapvalue < 0 { return ErrInvalidLengthCpu } - postmsgIndex := iNdEx + mapmsglen - if postmsgIndex < 0 { + postStringIndexmapvalue := iNdEx + intStringLenmapvalue + if postStringIndexmapvalue < 0 { return ErrInvalidLengthCpu } - if postmsgIndex > l { + if postStringIndexmapvalue > l { return io.ErrUnexpectedEOF } - mapvalue = &ContainerAllocationInfoEntries{} - if err := mapvalue.Unmarshal(dAtA[iNdEx:postmsgIndex]); err != nil { - return err - } - iNdEx = postmsgIndex + mapvalue = string(dAtA[iNdEx:postStringIndexmapvalue]) + iNdEx = postStringIndexmapvalue } else { iNdEx = entryPreIndex skippy, err := skipCpu(dAtA[iNdEx:]) @@ -4867,11 +5364,61 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { iNdEx += skippy } } - m.Entries[mapkey] = mapvalue + m.Attributes[mapkey] = mapvalue iNdEx = postIndex - case 2: + default: + iNdEx = preIndex + skippy, err := skipCpu(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthCpu + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *NumaResourcePackageConfig) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: NumaResourcePackageConfig: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: NumaResourcePackageConfig: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field WantedFeatureGates", wireType) + return fmt.Errorf("proto: wrong wireType = %d for field Packages", wireType) } var msglen int for shift := uint(0); ; shift += 7 { @@ -4898,11 +5445,11 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - if m.WantedFeatureGates == nil { - m.WantedFeatureGates = make(map[string]*advisorsvc.FeatureGate) + if m.Packages == nil { + m.Packages = make(map[string]*ResourcePackageItemConfig) } var mapkey string - var mapvalue *advisorsvc.FeatureGate + var mapvalue *ResourcePackageItemConfig for iNdEx < postIndex { entryPreIndex := iNdEx var wire uint64 @@ -4976,7 +5523,7 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { if postmsgIndex > l { return io.ErrUnexpectedEOF } - mapvalue = &advisorsvc.FeatureGate{} + mapvalue = &ResourcePackageItemConfig{} if err := mapvalue.Unmarshal(dAtA[iNdEx:postmsgIndex]); err != nil { return err } @@ -4996,7 +5543,516 @@ func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { iNdEx += skippy } } - m.WantedFeatureGates[mapkey] = mapvalue + m.Packages[mapkey] = mapvalue + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipCpu(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthCpu + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ResourcePackageConfig) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ResourcePackageConfig: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ResourcePackageConfig: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field NumaResourcePackages", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthCpu + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthCpu + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.NumaResourcePackages == nil { + m.NumaResourcePackages = make(map[uint64]*NumaResourcePackageConfig) + } + var mapkey uint64 + var mapvalue *NumaResourcePackageConfig + for iNdEx < postIndex { + entryPreIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + if fieldNum == 1 { + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + mapkey |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + } else if fieldNum == 2 { + var mapmsglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + mapmsglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if mapmsglen < 0 { + return ErrInvalidLengthCpu + } + postmsgIndex := iNdEx + mapmsglen + if postmsgIndex < 0 { + return ErrInvalidLengthCpu + } + if postmsgIndex > l { + return io.ErrUnexpectedEOF + } + mapvalue = &NumaResourcePackageConfig{} + if err := mapvalue.Unmarshal(dAtA[iNdEx:postmsgIndex]); err != nil { + return err + } + iNdEx = postmsgIndex + } else { + iNdEx = entryPreIndex + skippy, err := skipCpu(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthCpu + } + if (iNdEx + skippy) > postIndex { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + m.NumaResourcePackages[mapkey] = mapvalue + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipCpu(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthCpu + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *GetAdviceRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: GetAdviceRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: GetAdviceRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Entries", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthCpu + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthCpu + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Entries == nil { + m.Entries = make(map[string]*ContainerAllocationInfoEntries) + } + var mapkey string + var mapvalue *ContainerAllocationInfoEntries + for iNdEx < postIndex { + entryPreIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + if fieldNum == 1 { + var stringLenmapkey uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLenmapkey |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLenmapkey := int(stringLenmapkey) + if intStringLenmapkey < 0 { + return ErrInvalidLengthCpu + } + postStringIndexmapkey := iNdEx + intStringLenmapkey + if postStringIndexmapkey < 0 { + return ErrInvalidLengthCpu + } + if postStringIndexmapkey > l { + return io.ErrUnexpectedEOF + } + mapkey = string(dAtA[iNdEx:postStringIndexmapkey]) + iNdEx = postStringIndexmapkey + } else if fieldNum == 2 { + var mapmsglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + mapmsglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if mapmsglen < 0 { + return ErrInvalidLengthCpu + } + postmsgIndex := iNdEx + mapmsglen + if postmsgIndex < 0 { + return ErrInvalidLengthCpu + } + if postmsgIndex > l { + return io.ErrUnexpectedEOF + } + mapvalue = &ContainerAllocationInfoEntries{} + if err := mapvalue.Unmarshal(dAtA[iNdEx:postmsgIndex]); err != nil { + return err + } + iNdEx = postmsgIndex + } else { + iNdEx = entryPreIndex + skippy, err := skipCpu(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthCpu + } + if (iNdEx + skippy) > postIndex { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + m.Entries[mapkey] = mapvalue + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field WantedFeatureGates", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthCpu + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthCpu + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.WantedFeatureGates == nil { + m.WantedFeatureGates = make(map[string]*advisorsvc.FeatureGate) + } + var mapkey string + var mapvalue *advisorsvc.FeatureGate + for iNdEx < postIndex { + entryPreIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + if fieldNum == 1 { + var stringLenmapkey uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLenmapkey |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLenmapkey := int(stringLenmapkey) + if intStringLenmapkey < 0 { + return ErrInvalidLengthCpu + } + postStringIndexmapkey := iNdEx + intStringLenmapkey + if postStringIndexmapkey < 0 { + return ErrInvalidLengthCpu + } + if postStringIndexmapkey > l { + return io.ErrUnexpectedEOF + } + mapkey = string(dAtA[iNdEx:postStringIndexmapkey]) + iNdEx = postStringIndexmapkey + } else if fieldNum == 2 { + var mapmsglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + mapmsglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if mapmsglen < 0 { + return ErrInvalidLengthCpu + } + postmsgIndex := iNdEx + mapmsglen + if postmsgIndex < 0 { + return ErrInvalidLengthCpu + } + if postmsgIndex > l { + return io.ErrUnexpectedEOF + } + mapvalue = &advisorsvc.FeatureGate{} + if err := mapvalue.Unmarshal(dAtA[iNdEx:postmsgIndex]); err != nil { + return err + } + iNdEx = postmsgIndex + } else { + iNdEx = entryPreIndex + skippy, err := skipCpu(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthCpu + } + if (iNdEx + skippy) > postIndex { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + m.WantedFeatureGates[mapkey] = mapvalue + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ResourcePackageConfig", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowCpu + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthCpu + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthCpu + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.ResourcePackageConfig == nil { + m.ResourcePackageConfig = &ResourcePackageConfig{} + } + if err := m.ResourcePackageConfig.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } iNdEx = postIndex default: iNdEx = preIndex diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.proto b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.proto index d4b716d281..fd6d19ce6f 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.proto +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.proto @@ -110,9 +110,33 @@ message ContainerAllocationInfoEntries { map entries = 1; // keyed by pool name or podUID } +// ResourcePackageItemConfig describes per-resource-package configuration on a specific NUMA node. +// Currently it carries pinned cpuset for the package. It is designed to be extensible. +message ResourcePackageItemConfig { + // pinned_cpuset is the pinned CPUSet string for the resource package on the specific NUMA node. + // The format follows existing cpuset string convention used in this proto (e.g. "0-3,8-11"). + string pinned_cpuset = 1; + map attributes = 2; +} + +// NumaResourcePackageConfig describes resource package configuration for one NUMA node. +message NumaResourcePackageConfig { + // packages is keyed by resource package name. + map packages = 1; +} + +// ResourcePackageConfig describes node-level resource package configurations organized by NUMA node. +message ResourcePackageConfig { + // numa_resource_packages is keyed by NUMA id. + map numa_resource_packages = 1; +} + message GetAdviceRequest { map entries = 1; // keyed by pool name or podUID map wanted_feature_gates = 2; // keyed by feature gate name + // resource_package_config carries resource package related configurations (e.g. pinned cpuset) + // from cpu plugin to sysadvisor. + ResourcePackageConfig resource_package_config = 3; } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go index 0f3e6d88f3..2c88f20df8 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go @@ -90,7 +90,7 @@ type CPUPressureLoadEviction struct { poolMetricCollectHandlers map[string]util.PoolMetricCollectHandler systemReservedCPUs machine.CPUSet - configTranslator *general.CommonSuffixTranslator + configTranslator general.SuffixTranslator } func NewCPUPressureLoadEviction(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, @@ -105,7 +105,7 @@ func NewCPUPressureLoadEviction(emitter metrics.MetricEmitter, metaServer *metas dynamicConf: conf.DynamicAgentConfiguration, skipPools: sets.NewString(conf.LoadPressureEvictionSkipPools...), syncPeriod: conf.LoadEvictionSyncPeriod, - configTranslator: general.NewCommonSuffixTranslator(commonstate.NUMAPoolInfix), + configTranslator: commonstate.OwnerPoolNameTranslator, } systemReservedCores, reserveErr := cpuutil.GetCoresReservedForSystem(conf, metaServer, metaServer.KatalystMachineInfo, metaServer.CPUDetails.CPUs().Clone()) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/factory.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/factory.go index bde95ae6c7..3c595102cb 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/factory.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/factory.go @@ -27,17 +27,19 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" "github.com/kubewharf/katalyst-core/pkg/config" "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metaserver/resourcepackage" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" ) type HintOptimizerFactoryOptions struct { - Conf *config.Configuration - MetaServer *metaserver.MetaServer - Emitter metrics.MetricEmitter - State state.State - ReservedCPUs machine.CPUSet + Conf *config.Configuration + MetaServer *metaserver.MetaServer + ResourcePackageManager resourcepackage.ResourcePackageManager + Emitter metrics.MetricEmitter + State state.State + ReservedCPUs machine.CPUSet } type HintOptimizerFactory func(options HintOptimizerFactoryOptions) (hintoptimizer.HintOptimizer, error) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer.go index 8ccc4a85fb..44d06ff3d3 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer.go @@ -20,11 +20,8 @@ import ( "context" "fmt" "sync" - "time" - apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/apimachinery/pkg/util/wait" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer" @@ -32,29 +29,25 @@ import ( hintoptimizerutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/util" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" "github.com/kubewharf/katalyst-core/pkg/config" - "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metaserver/resourcepackage" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/native" qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos" - resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" + rputil "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) const HintOptimizerNameResourcePackage = "resource_package" -const ( - syncResourcePackageUpdatePeriod = 30 * time.Second -) - // resourcePackageHintOptimizer implements the HintOptimizer interface based on resource package information. type resourcePackageHintOptimizer struct { - conf *config.Configuration - metaServer *metaserver.MetaServer - emitter metrics.MetricEmitter - state state.State + conf *config.Configuration + rpm resourcepackage.ResourcePackageManager + emitter metrics.MetricEmitter + state state.State mux sync.RWMutex - resourcePackageMap map[int][]resourcepackage.ResourcePackageItem + resourcePackageMap rputil.NUMAResourcePackageItems } // NewResourcePackageHintOptimizer creates a new resourcePackageHintOptimizer. @@ -62,12 +55,11 @@ func NewResourcePackageHintOptimizer( options policy.HintOptimizerFactoryOptions, ) (hintoptimizer.HintOptimizer, error) { o := &resourcePackageHintOptimizer{ - conf: options.Conf, - metaServer: options.MetaServer, - emitter: options.Emitter, - state: options.State, + conf: options.Conf, + rpm: options.ResourcePackageManager, + emitter: options.Emitter, + state: options.State, } - o.updateResourcePackageMap() return o, nil } @@ -88,7 +80,7 @@ func (o *resourcePackageHintOptimizer) OptimizeHints( return hintoptimizerutil.ErrHintOptimizerSkip } - resourcePackage := resourcepackage.GetResourcePackageName(request.ResourceRequest.Annotations) + resourcePackage := rputil.GetResourcePackageName(request.ResourceRequest.Annotations) if resourcePackage == "" { general.Errorf("skip resourcePackageHintOptimizer for pod resource package not found in annotation") return hintoptimizerutil.ErrHintOptimizerSkip @@ -127,8 +119,7 @@ func (o *resourcePackageHintOptimizer) OptimizeHints( } // Run starts the resource package hint optimizer. -func (o *resourcePackageHintOptimizer) Run(stopCh <-chan struct{}) error { - go wait.Until(o.updateResourcePackageMap, syncResourcePackageUpdatePeriod, stopCh) +func (o *resourcePackageHintOptimizer) Run(_ <-chan struct{}) error { return nil } @@ -162,27 +153,29 @@ func (o *resourcePackageHintOptimizer) populateHintsByResourcePackage( } func (o *resourcePackageHintOptimizer) getResourcePackageAllocatable(resourcePackage string) (map[int]float64, error) { - resourcePackageMap := o.getResourcePackageMap() + resourcePackageMap, err := o.rpm.NodeResourcePackages(context.Background()) + if err != nil { + return nil, fmt.Errorf("NodeResourcePackages failed with error: %v", err) + } + if resourcePackageMap == nil { return nil, fmt.Errorf("resourcePackageMap is nil") } allocatable := make(map[int]float64) for nodeID, packages := range resourcePackageMap { - for _, pkg := range packages { - if pkg.PackageName == resourcePackage { - if pkg.Allocatable == nil { - continue - } - - // Use the native package to get CPU quantity safely - cpuQuantity := native.CPUQuantityGetter()(*pkg.Allocatable) - if cpuQuantity.IsZero() { - continue - } + if pkg, ok := packages[resourcePackage]; ok { + if pkg.Allocatable == nil { + continue + } - allocatable[nodeID] = float64(cpuQuantity.MilliValue()) / 1000 + // Use the native package to get CPU quantity safely + cpuQuantity := native.CPUQuantityGetter()(*pkg.Allocatable) + if cpuQuantity.IsZero() { + continue } + + allocatable[nodeID] = float64(cpuQuantity.MilliValue()) / 1000 } } return allocatable, nil @@ -198,7 +191,7 @@ func (o *resourcePackageHintOptimizer) getResourcePackageAllocated(resourcePacka for _, entries := range nodeState.PodEntries { for _, entry := range entries { - if entry == nil || resourcepackage.GetResourcePackageName(entry.Annotations) != resourcePackage { + if entry == nil || rputil.GetResourcePackageName(entry.Annotations) != resourcePackage { continue } @@ -208,26 +201,3 @@ func (o *resourcePackageHintOptimizer) getResourcePackageAllocated(resourcePacka } return allocated, nil } - -func (o *resourcePackageHintOptimizer) getResourcePackageMap() map[int][]resourcepackage.ResourcePackageItem { - o.mux.RLock() - defer o.mux.RUnlock() - return o.resourcePackageMap -} - -func (o *resourcePackageHintOptimizer) updateResourcePackageMap() { - // Get resource package information from meta server - resourcePackageMap, err := o.metaServer.NodeResourcePackages(context.Background()) - if err != nil { - general.Errorf("NodeResourcePackages failed with error: %v", err) - return - } - - o.mux.Lock() - defer o.mux.Unlock() - if apiequality.Semantic.DeepEqual(resourcePackageMap, o.resourcePackageMap) { - return - } - general.Infof("update resource package map from %+v to %+v", o.resourcePackageMap, resourcePackageMap) - o.resourcePackageMap = resourcePackageMap -} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer_test.go index 526d26dae7..8b05844c1c 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/hintoptimizer/policy/resourcepackage/optimizer_test.go @@ -36,7 +36,6 @@ import ( "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/metrics" pkgutil "github.com/kubewharf/katalyst-core/pkg/util/resource-package" - resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) // MockState is a mock implementation of the state.State interface for testing purposes @@ -71,15 +70,15 @@ func (m *MockState) ClearState() {} func (m *MockState) StoreState() error { return nil } type resourcePackageManagerStub struct { - nodeResourcePackagesMap map[int][]pkgutil.ResourcePackageItem + nodeResourcePackagesMap pkgutil.NUMAResourcePackageItems err error } -func (s *resourcePackageManagerStub) ConvertNPDResourcePackages(_ *nodev1alpha1.NodeProfileDescriptor) (map[int][]pkgutil.ResourcePackageItem, error) { +func (s *resourcePackageManagerStub) ConvertNPDResourcePackages(_ *nodev1alpha1.NodeProfileDescriptor) (pkgutil.NUMAResourcePackageItems, error) { return s.nodeResourcePackagesMap, nil } -func (s *resourcePackageManagerStub) NodeResourcePackages(_ context.Context) (map[int][]pkgutil.ResourcePackageItem, error) { +func (s *resourcePackageManagerStub) NodeResourcePackages(_ context.Context) (pkgutil.NUMAResourcePackageItems, error) { return s.nodeResourcePackagesMap, s.err } @@ -88,12 +87,11 @@ func TestNewResourcePackageHintOptimizer(t *testing.T) { convey.Convey("Test NewResourcePackageHintOptimizer", t, func() { options := policy.HintOptimizerFactoryOptions{ - Conf: &config.Configuration{}, - MetaServer: &metaserver.MetaServer{ - ResourcePackageManager: &resourcePackageManagerStub{}, - }, - Emitter: metrics.DummyMetrics{}, - State: &MockState{}, + Conf: &config.Configuration{}, + MetaServer: &metaserver.MetaServer{}, + ResourcePackageManager: &resourcePackageManagerStub{}, + Emitter: metrics.DummyMetrics{}, + State: &MockState{}, } optimizer, err := NewResourcePackageHintOptimizer(options) @@ -108,9 +106,7 @@ func TestResourcePackageHintOptimizer_OptimizeHints(t *testing.T) { convey.Convey("Test OptimizeHints", t, func() { convey.Convey("when request is nil", func() { optimizer := &resourcePackageHintOptimizer{ - metaServer: &metaserver.MetaServer{ - ResourcePackageManager: &resourcePackageManagerStub{}, - }, + rpm: &resourcePackageManagerStub{}, } err := optimizer.OptimizeHints(hintoptimizer.Request{}, &pluginapi.ListOfTopologyHints{}) convey.So(err, convey.ShouldNotBeNil) @@ -119,9 +115,7 @@ func TestResourcePackageHintOptimizer_OptimizeHints(t *testing.T) { convey.Convey("when hints is nil", func() { optimizer := &resourcePackageHintOptimizer{ - metaServer: &metaserver.MetaServer{ - ResourcePackageManager: &resourcePackageManagerStub{}, - }, + rpm: &resourcePackageManagerStub{}, } err := optimizer.OptimizeHints(hintoptimizer.Request{ ResourceRequest: &pluginapi.ResourceRequest{}, @@ -131,9 +125,7 @@ func TestResourcePackageHintOptimizer_OptimizeHints(t *testing.T) { }) convey.Convey("when pod is NUMA exclusive", func() { - optimizer := &resourcePackageHintOptimizer{ - metaServer: &metaserver.MetaServer{}, - } + optimizer := &resourcePackageHintOptimizer{} err := optimizer.OptimizeHints(hintoptimizer.Request{ ResourceRequest: &pluginapi.ResourceRequest{ Annotations: map[string]string{ @@ -146,9 +138,7 @@ func TestResourcePackageHintOptimizer_OptimizeHints(t *testing.T) { }) convey.Convey("when resource package not found in annotation", func() { - optimizer := &resourcePackageHintOptimizer{ - metaServer: &metaserver.MetaServer{}, - } + optimizer := &resourcePackageHintOptimizer{} err := optimizer.OptimizeHints(hintoptimizer.Request{ ResourceRequest: &pluginapi.ResourceRequest{ Annotations: map[string]string{ @@ -175,12 +165,10 @@ func TestResourcePackageHintOptimizer_OptimizeHints(t *testing.T) { }, } - optimizer := &resourcePackageHintOptimizer{ - metaServer: &metaserver.MetaServer{}, - state: mockState, - resourcePackageMap: map[int][]pkgutil.ResourcePackageItem{ + mockRPM := &resourcePackageManagerStub{ + nodeResourcePackagesMap: pkgutil.NUMAResourcePackageItems{ 0: { - { + "test-package": { ResourcePackage: nodev1alpha1.ResourcePackage{ PackageName: "test-package", Allocatable: &v1.ResourceList{ @@ -190,7 +178,7 @@ func TestResourcePackageHintOptimizer_OptimizeHints(t *testing.T) { }, }, 1: { - { + "test-package-1": { ResourcePackage: nodev1alpha1.ResourcePackage{ PackageName: "test-package-1", Allocatable: &v1.ResourceList{ @@ -202,6 +190,11 @@ func TestResourcePackageHintOptimizer_OptimizeHints(t *testing.T) { }, } + optimizer := &resourcePackageHintOptimizer{ + rpm: mockRPM, + state: mockState, + } + hints := &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ { @@ -233,9 +226,7 @@ func TestResourcePackageHintOptimizer_Run(t *testing.T) { t.Parallel() convey.Convey("Test Run", t, func() { - optimizer := &resourcePackageHintOptimizer{ - metaServer: &metaserver.MetaServer{}, - } + optimizer := &resourcePackageHintOptimizer{} stopCh := make(chan struct{}) close(stopCh) optimizer.Run(stopCh) @@ -246,9 +237,7 @@ func TestResourcePackageHintOptimizer_populateHintsByResourcePackage(t *testing. t.Parallel() convey.Convey("Test populateHintsByResourcePackage", t, func() { - optimizer := &resourcePackageHintOptimizer{ - metaServer: &metaserver.MetaServer{}, - } + optimizer := &resourcePackageHintOptimizer{} testCases := []struct { name string @@ -332,12 +321,10 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocatable(t *testing.T convey.Convey("Test getResourcePackageAllocatable", t, func() { convey.Convey("when resourcePackageMap is nil", func() { - mockMetaServer := &metaserver.MetaServer{ - ResourcePackageManager: &resourcePackageManagerStub{}, - } + mockRPM := &resourcePackageManagerStub{} optimizer := &resourcePackageHintOptimizer{ - metaServer: mockMetaServer, + rpm: mockRPM, } result, err := optimizer.getResourcePackageAllocatable("test-package") convey.So(result, convey.ShouldBeNil) @@ -346,13 +333,10 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocatable(t *testing.T }) convey.Convey("success case", func() { - mockMetaServer := &metaserver.MetaServer{} - - optimizer := &resourcePackageHintOptimizer{ - metaServer: mockMetaServer, - resourcePackageMap: map[int][]pkgutil.ResourcePackageItem{ + mockRPM := &resourcePackageManagerStub{ + nodeResourcePackagesMap: pkgutil.NUMAResourcePackageItems{ 0: { - { + "test-package": { ResourcePackage: nodev1alpha1.ResourcePackage{ PackageName: "test-package", Allocatable: &v1.ResourceList{ @@ -360,7 +344,7 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocatable(t *testing.T }, }, }, - { + "test-package-1": { ResourcePackage: nodev1alpha1.ResourcePackage{ PackageName: "test-package-1", Allocatable: &v1.ResourceList{ @@ -371,6 +355,10 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocatable(t *testing.T }, }, } + + optimizer := &resourcePackageHintOptimizer{ + rpm: mockRPM, + } result, err := optimizer.getResourcePackageAllocatable("test-package") convey.So(err, convey.ShouldBeNil) convey.So(result, convey.ShouldNotBeNil) @@ -378,13 +366,10 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocatable(t *testing.T }) convey.Convey("when allocatable or cpu is nil", func() { - mockMetaServer := &metaserver.MetaServer{} - - optimizer := &resourcePackageHintOptimizer{ - metaServer: mockMetaServer, - resourcePackageMap: map[int][]resourcepackage.ResourcePackageItem{ + mockRPM := &resourcePackageManagerStub{ + nodeResourcePackagesMap: pkgutil.NUMAResourcePackageItems{ 0: { - { + "test-package": { ResourcePackage: nodev1alpha1.ResourcePackage{ PackageName: "test-package", Allocatable: nil, @@ -392,7 +377,7 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocatable(t *testing.T }, }, 1: { - { + "test-package": { ResourcePackage: nodev1alpha1.ResourcePackage{ PackageName: "test-package", Allocatable: &v1.ResourceList{ @@ -403,6 +388,10 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocatable(t *testing.T }, }, } + + optimizer := &resourcePackageHintOptimizer{ + rpm: mockRPM, + } result, err := optimizer.getResourcePackageAllocatable("test-package") convey.So(err, convey.ShouldBeNil) convey.So(len(result), convey.ShouldEqual, 0) @@ -469,8 +458,8 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocated(t *testing.T) } optimizer := &resourcePackageHintOptimizer{ - state: mockState, - metaServer: &metaserver.MetaServer{}, + state: mockState, + rpm: &resourcePackageManagerStub{}, } result, err := optimizer.getResourcePackageAllocated("test-package") convey.So(err, convey.ShouldBeNil) @@ -494,8 +483,8 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocated(t *testing.T) } optimizer := &resourcePackageHintOptimizer{ - state: mockState, - metaServer: &metaserver.MetaServer{}, + state: mockState, + rpm: &resourcePackageManagerStub{}, } result, err := optimizer.getResourcePackageAllocated("test-package") convey.So(err, convey.ShouldBeNil) @@ -534,8 +523,8 @@ func TestResourcePackageHintOptimizer_getResourcePackageAllocated(t *testing.T) } optimizer := &resourcePackageHintOptimizer{ - state: mockState, - metaServer: &metaserver.MetaServer{}, + state: mockState, + rpm: &resourcePackageManagerStub{}, } result, err := optimizer.getResourcePackageAllocated("test-package") convey.So(err, convey.ShouldBeNil) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go index ef56fd58b7..1f18de1abb 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go @@ -59,6 +59,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/crd" "github.com/kubewharf/katalyst-core/pkg/config/generic" "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metaserver/resourcepackage" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" "github.com/kubewharf/katalyst-core/pkg/util/general" @@ -114,6 +115,8 @@ type DynamicPolicy struct { cpuPressureEviction agent.Component cpuPressureEvictionCancel context.CancelFunc + resourcePackageManager *resourcepackage.CachedResourcePackageManager + irqTuner irqtuner.Tuner // those are parsed from configurations @@ -140,6 +143,7 @@ type DynamicPolicy struct { numaBindingResultAnnotationKey string numaNumberAnnotationKey string numaIDsAnnotationKey string + topologyAllocationAnnotationKey string transitionPeriod time.Duration reservedReclaimedCPUsSize int @@ -198,6 +202,8 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration emitter: wrappedEmitter, metaServer: agentCtx.MetaServer, + resourcePackageManager: resourcepackage.NewCachedResourcePackageManager(agentCtx.MetaServer.ResourcePackageManager), + state: stateImpl, residualHitMap: make(map[string]int64), @@ -223,14 +229,15 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration reclaimRelativeRootCgroupPath: conf.ReclaimRelativeRootCgroupPath, numaBindingReclaimRelativeRootCgroupPaths: common.GetNUMABindingReclaimRelativeRootCgroupPaths(conf.ReclaimRelativeRootCgroupPath, agentCtx.CPUDetails.NUMANodes().ToSliceNoSortInt()), - podDebugAnnoKeys: conf.PodDebugAnnoKeys, - podAnnotationKeptKeys: conf.PodAnnotationKeptKeys, - podLabelKeptKeys: conf.PodLabelKeptKeys, - numaBindingResultAnnotationKey: conf.NUMABindingResultAnnotationKey, - numaNumberAnnotationKey: conf.NUMANumberAnnotationKey, - numaIDsAnnotationKey: conf.NUMAIDsAnnotationKey, - transitionPeriod: 30 * time.Second, - reservedReclaimedCPUsSize: general.Max(reservedReclaimedCPUsSize, agentCtx.KatalystMachineInfo.NumNUMANodes), + podDebugAnnoKeys: conf.PodDebugAnnoKeys, + podAnnotationKeptKeys: conf.PodAnnotationKeptKeys, + podLabelKeptKeys: conf.PodLabelKeptKeys, + numaBindingResultAnnotationKey: conf.NUMABindingResultAnnotationKey, + numaNumberAnnotationKey: conf.NUMANumberAnnotationKey, + numaIDsAnnotationKey: conf.NUMAIDsAnnotationKey, + topologyAllocationAnnotationKey: conf.TopologyAllocationAnnotationKey, + transitionPeriod: 30 * time.Second, + reservedReclaimedCPUsSize: general.Max(reservedReclaimedCPUsSize, agentCtx.KatalystMachineInfo.NumNUMANodes), } // initialize hint optimizer @@ -319,23 +326,25 @@ func (p *DynamicPolicy) Start() (err error) { general.Infof("called") p.Lock() - defer func() { - if !p.started { - if err == nil { - p.started = true - } else { - close(p.stopCh) - } - } - p.Unlock() - }() - if p.started { general.Infof("is already started") + p.Unlock() return nil } - + p.started = true p.stopCh = make(chan struct{}) + p.Unlock() + + defer func() { + if err != nil { + p.Lock() + if p.started { + p.started = false + close(p.stopCh) + } + p.Unlock() + } + }() if p.irqTuner != nil { go p.irqTuner.Run(p.stopCh) @@ -385,9 +394,13 @@ func (p *DynamicPolicy) Start() (err error) { // start cpu-pressure eviction plugin if needed if p.cpuPressureEviction != nil { - var ctx context.Context - ctx, p.cpuPressureEvictionCancel = context.WithCancel(context.Background()) - go p.cpuPressureEviction.Run(ctx) + p.Lock() + if p.started { + var ctx context.Context + ctx, p.cpuPressureEvictionCancel = context.WithCancel(context.Background()) + go p.cpuPressureEviction.Run(ctx) + } + p.Unlock() } go wait.Until(func() { @@ -461,6 +474,14 @@ func (p *DynamicPolicy) Start() (err error) { go wait.BackoffUntil(communicateWithCPUAdvisorServer, wait.NewExponentialBackoffManager(800*time.Millisecond, 30*time.Second, 2*time.Minute, 2.0, 0, &clock.RealClock{}), true, p.stopCh) + err = p.resourcePackageManager.Run(p.stopCh) + if err != nil { + return fmt.Errorf("resourcePackageManager.Run failed with error: %v", err) + } + + p.syncResourcePackagePinnedCPUSet() + go wait.Until(p.syncResourcePackagePinnedCPUSet, 30*time.Second, p.stopCh) + err = p.sharedCoresNUMABindingHintOptimizer.Run(p.stopCh) if err != nil { return fmt.Errorf("sharedCoresNUMABindingHintOptimizer.Run failed with error: %v", err) @@ -979,30 +1000,24 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, "originalAllocationResult", allocationInfo.OriginalAllocationResult.String(), "currentResult", allocationInfo.AllocationResult.String()) - return &pluginapi.ResourceAllocationResponse{ - PodUid: req.PodUid, - PodNamespace: req.PodNamespace, - PodName: req.PodName, - ContainerName: req.ContainerName, - ContainerType: req.ContainerType, - ContainerIndex: req.ContainerIndex, - PodRole: req.PodRole, - PodType: req.PodType, - ResourceName: string(v1.ResourceCPU), - AllocationResult: &pluginapi.ResourceAllocation{ - ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ - string(v1.ResourceCPU): { - OciPropertyName: util.OCIPropertyNameCPUSetCPUs, - IsNodeResource: false, - IsScalarResource: true, - AllocatedQuantity: float64(allocationInfo.AllocationResult.Size()), - AllocationResult: allocationInfo.AllocationResult.String(), - }, - }, - }, - Labels: general.DeepCopyMap(req.Labels), - Annotations: general.DeepCopyMap(req.Annotations), - }, nil + // Add topologyAllocationAnnotations for numa binding containers + var topologyAllocationAnnotations map[string]string + if allocationInfo.CheckNUMABinding() { + topologyAllocationAnnotations, err = cpuutil.GetCPUTopologyAllocationsAnnotations(allocationInfo, p.topologyAllocationAnnotationKey, req) + if err != nil { + return nil, fmt.Errorf("GetCPUTopologyAllocationsAnnotations failed with error: %v", err) + } + } + + resp, err = cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, + false, true, req, topologyAllocationAnnotations) + if err != nil { + general.Errorf("pod: %s/%s, container: %s PackResourceAllocationResponseByAllocationInfo failed with error: %v", + req.PodNamespace, req.PodName, req.ContainerName, err) + return nil, fmt.Errorf("PackResourceAllocationResponseByAllocationInfo failed with error: %v", err) + } + + return resp, nil } if p.allocationHandlers[qosLevel] == nil { @@ -1075,7 +1090,7 @@ func (p *DynamicPolicy) RemovePod(ctx context.Context, return nil, fmt.Errorf("failed to release accompany resource %v", err) } - aErr := p.adjustAllocationEntries(false) + aErr := p.adjustAllocationEntries(podEntries, p.state.GetMachineState(), false) if aErr != nil { general.ErrorS(aErr, "adjustAllocationEntries failed", "podUID", req.PodUid) } @@ -1170,11 +1185,12 @@ func (p *DynamicPolicy) initHintOptimizers() error { func (p *DynamicPolicy) generateHintOptimizerFactoryOptions() policy.HintOptimizerFactoryOptions { return policy.HintOptimizerFactoryOptions{ - Conf: p.conf, - Emitter: p.emitter, - MetaServer: p.metaServer, - State: p.state, - ReservedCPUs: p.reservedCPUs, + Conf: p.conf, + Emitter: p.emitter, + MetaServer: p.metaServer, + ResourcePackageManager: p.resourcePackageManager, + State: p.state, + ReservedCPUs: p.reservedCPUs, } } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go index 1a56ec2364..ed564ed85b 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler.go @@ -45,6 +45,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator" advisorapi "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/featuregatenegotiation" "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/featuregatenegotiation/finders" @@ -56,6 +57,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/util/metric" "github.com/kubewharf/katalyst-core/pkg/util/native" "github.com/kubewharf/katalyst-core/pkg/util/process" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) const ( @@ -323,9 +325,31 @@ func (p *DynamicPolicy) createGetAdviceRequest() (*advisorapi.GetAdviceRequest, general.InfofV(6, "CPU plugin desire negotiation feature gates: %#v", wantedFeatureGates) + machineState := p.state.GetMachineState() + numaResourcePackageStates := machineState.GetNUMAResourcePackageStates() + var resourcePackageConfig *advisorapi.ResourcePackageConfig + if len(numaResourcePackageStates) > 0 { + resourcePackageConfig = &advisorapi.ResourcePackageConfig{ + NumaResourcePackages: make(map[uint64]*advisorapi.NumaResourcePackageConfig), + } + for numaID, pkgStates := range numaResourcePackageStates { + numaConfig := &advisorapi.NumaResourcePackageConfig{ + Packages: make(map[string]*advisorapi.ResourcePackageItemConfig), + } + for pkgName, state := range pkgStates { + numaConfig.Packages[pkgName] = &advisorapi.ResourcePackageItemConfig{ + PinnedCpuset: state.PinnedCPUSet.String(), + Attributes: state.Attributes, + } + } + resourcePackageConfig.NumaResourcePackages[uint64(numaID)] = numaConfig + } + } + return &advisorapi.GetAdviceRequest{ - Entries: chkEntries, - WantedFeatureGates: wantedFeatureGates, + Entries: chkEntries, + WantedFeatureGates: wantedFeatureGates, + ResourcePackageConfig: resourcePackageConfig, }, nil } @@ -810,37 +834,11 @@ func (p *DynamicPolicy) applyAllSubCgroupQuotaToUnLimit(containerRelativePath st }) } -// generateBlockCPUSet generates BlockCPUSet from cpu-advisor response. -// The logic contains the following main steps: -// 1. Handle blocks for static pools and forbidden pools -// 2. Handle blocks with specified NUMA IDs (for NUMA-bound dedicated_cores/shared_cores containers -// and reclaimed_cores containers colocated with them) -// 3. Handle blocks without specified NUMA ID (for non-NUMA-bound containers including -// dedicated_cores, shared_cores and reclaimed_cores containers) -// -// For each block, the function allocates CPU sets based on: -// - Already allocated CPUs for dedicated cores -// - Available CPUs considering already allocated static/forbidden pools -// - NUMA topology awareness for better performance -// - CPU allocation strategies that minimize CPU migrations -func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchResponse) (advisorapi.BlockCPUSet, error) { - if resp == nil { - return nil, fmt.Errorf("got nil resp") - } - - numaToBlocks, err := resp.GetBlocks() - if err != nil { - return nil, err - } - - machineInfo := p.machineInfo - topology := machineInfo.CPUTopology - availableCPUs := topology.CPUDetails.CPUs() - - // Walk through static pools to construct blockCPUSet (for static pool), - // and calculate availableCPUs after deducting static pools. - // Static pools are predefined pools that should not be changed during runtime. - blockCPUSet := advisorapi.NewBlockCPUSet() +// allocateStaticAndForbiddenPools allocates CPU sets for static and forbidden pools. +// It iterates through the configured static pools (e.g., reserve) and forbidden pools, +// assigning their pre-calculated or required CPU sets from the state, and deducting +// them from the globally available CPUs. +func (p *DynamicPolicy) allocateStaticAndForbiddenPools(resp *advisorapi.ListAndWatchResponse, blockCPUSet advisorapi.BlockCPUSet, availableCPUs machine.CPUSet) (machine.CPUSet, error) { for _, poolName := range state.StaticPools.List() { allocationInfo := p.state.GetAllocationInfo(poolName, commonstate.FakedContainerName) if allocationInfo == nil { @@ -849,7 +847,7 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons blocks, ok := resp.GeEntryNUMABlocks(poolName, commonstate.FakedContainerName, commonstate.FakedNUMAID) if !ok || len(blocks) != 1 { - return nil, fmt.Errorf("blocks of pool: %s is invalid", poolName) + return availableCPUs, fmt.Errorf("blocks of pool: %s is invalid", poolName) } blockID := blocks[0].BlockId @@ -857,8 +855,6 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons availableCPUs = availableCPUs.Difference(blockCPUSet[blockID]) } - // Walk through forbidden pools and deduct their CPUs from availableCPUs. - // Forbidden pools are reserved pools that should not be allocated to any containers. for _, poolName := range state.ForbiddenPools.List() { allocationInfo := p.state.GetAllocationInfo(poolName, commonstate.FakedContainerName) if allocationInfo == nil { @@ -867,162 +863,438 @@ func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchRespons availableCPUs = availableCPUs.Difference(allocationInfo.AllocationResult.Clone()) } + return availableCPUs, nil +} - // Process blocks with specified NUMA IDs (for NUMA-bound containers) - // These are typically dedicated_cores containers with NUMA binding and - // reclaimed_cores containers colocated with them - for numaID, blocks := range numaToBlocks { +func isDedicatedBlock(block *advisorapi.BlockInfo) bool { + _, ok := block.OwnerPoolEntryMap[commonstate.PoolNameDedicated] + return ok +} + +func isSharedBlock(block *advisorapi.BlockInfo) bool { + for poolName := range block.OwnerPoolEntryMap { + if commonstate.IsIsolationPool(poolName) || commonstate.IsShareNUMABindingPool(poolName) { + return true + } + } + return false +} + +// allocateDedicatedBlocks allocates CPU sets for dedicated blocks. +// It iterates through the dedicated blocks and assigns them the CPUs they have already +// been allocated according to the state. It also subtracts these allocated CPUs +// from the available and remaining CPU sets. +func (p *DynamicPolicy) allocateDedicatedBlocks( + numaID int, + blocks []*advisorapi.BlockInfo, + blockCPUSet advisorapi.BlockCPUSet, + numaAvailableCPUs machine.CPUSet, + nodeRemainingCPUs *machine.CPUSet, + availableCPUs *machine.CPUSet, + rpPinnedCPUSet map[string]machine.CPUSet, + allPinnedCPUSets machine.CPUSet, + withNUMABinding *bool, +) error { + machineInfo := p.machineInfo + for _, block := range blocks { + if block == nil { + continue + } + + entry := block.OwnerPoolEntryMap[commonstate.PoolNameDedicated] + blockID := block.BlockId + + if _, found := blockCPUSet[blockID]; found { + general.Warningf("block: %v already allocated", blockID) + continue + } + + blockResult, err := general.CovertUInt64ToInt(block.Result) + if err != nil { + return fmt.Errorf("parse block: %s result failed with error: %v", blockID, err) + } + + allocationInfo := p.state.GetAllocationInfo(entry.EntryName, entry.SubEntryName) + if allocationInfo == nil { + continue + } + + alreadyAllocatedCPUs, ok := allocationInfo.TopologyAwareAssignments[numaID] + if !ok { + continue + } + + pinnedCPUSets := machine.NewCPUSet() + pkg := allocationInfo.GetResourcePackageName() + if pkg != "" && !rpPinnedCPUSet[pkg].IsEmpty() { + pinnedCPUSets = rpPinnedCPUSet[pkg].Intersection(numaAvailableCPUs) + } + + // Calculate current available CPUs on this NUMA node by intersecting the globally updated + // availableCPUs with the static numaAvailableCPUs. This correctly computes the latest available CPUs + // dynamically without needing a separate NUMA-level tracking variable. + currentAvailableCPUs := availableCPUs.Intersection(numaAvailableCPUs) + if !pinnedCPUSets.IsEmpty() { + currentAvailableCPUs = currentAvailableCPUs.Intersection(pinnedCPUSets) + } else { + currentAvailableCPUs = currentAvailableCPUs.Difference(allPinnedCPUSets) + } + + var cpuset machine.CPUSet + // Get the CPUs that are both already allocated to this block and still available in the current context + availableAlreadyAllocatedCPUs := alreadyAllocatedCPUs.Intersection(currentAvailableCPUs) + + if alreadyAllocatedCPUs.Size() == blockResult { + // If the requested block size hasn't changed, we should ideally reuse the exact same allocation. + // However, we must first verify that all previously allocated CPUs are still available. + if availableAlreadyAllocatedCPUs.Size() != blockResult { + return fmt.Errorf("NUMA Aware block: %s in NUMA: %d size not changed, but some CPUs are not available", blockID, numaID) + } + cpuset = alreadyAllocatedCPUs + } else if availableAlreadyAllocatedCPUs.Size() >= blockResult { + // If the block size decreased, we can fulfill the new size entirely from the previously allocated (and still available) CPUs. + cpuset, err = calculator.TakeByTopology(machineInfo, availableAlreadyAllocatedCPUs, blockResult, true) + if err != nil { + return fmt.Errorf("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v", blockID, numaID, err) + } + } else { + // If the block size increased, we keep whatever previously allocated CPUs are still available, + // and allocate the remaining required CPUs from the pool of current available CPUs. + cpuset, err = calculator.TakeByTopology(machineInfo, currentAvailableCPUs.Difference(availableAlreadyAllocatedCPUs), blockResult-availableAlreadyAllocatedCPUs.Size(), true) + if err != nil { + return fmt.Errorf("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v", blockID, numaID, err) + } + cpuset = cpuset.Union(availableAlreadyAllocatedCPUs) + } + + blockCPUSet[blockID] = cpuset + *nodeRemainingCPUs = nodeRemainingCPUs.Difference(cpuset) + *availableCPUs = availableCPUs.Difference(cpuset) + if withNUMABinding != nil { + *withNUMABinding = true + } + } + return nil +} + +// allocateShareBlocks allocates CPU sets for share blocks. +// It iterates through the share blocks, allocating CPUs based on advisor results. +// For FakedNUMAID, it uses the global available CPUs. For specific NUMA nodes, +// it uses the NUMA-available CPUs. It subtracts the allocated CPUs from the remaining sets. +func (p *DynamicPolicy) allocateShareBlocks( + numaID int, + blocks []*advisorapi.BlockInfo, + blockCPUSet advisorapi.BlockCPUSet, + numaAvailableCPUs machine.CPUSet, + nodeRemainingCPUs *machine.CPUSet, + availableCPUs *machine.CPUSet, + rpPinnedCPUSet map[string]machine.CPUSet, + allPinnedCPUSets machine.CPUSet, + withNUMABinding *bool, +) error { + machineInfo := p.machineInfo + for _, block := range blocks { + if block == nil { + continue + } + + pinnedCPUSets := machine.NewCPUSet() + for poolName := range block.OwnerPoolEntryMap { + if commonstate.IsIsolationPool(poolName) || commonstate.IsShareNUMABindingPool(poolName) { + _, pkg := resourcepackage.UnwrapOwnerPoolName(poolName) + if pkg != "" && !rpPinnedCPUSet[pkg].IsEmpty() { + pinnedCPUSets = rpPinnedCPUSet[pkg].Intersection(numaAvailableCPUs) + } + break + } + } + + blockID := block.BlockId + if _, found := blockCPUSet[blockID]; found { + continue + } + + blockResult, err := general.CovertUInt64ToInt(block.Result) + if err != nil { + return fmt.Errorf("parse block: %s result failed with error: %v", blockID, err) + } + + // Same as in allocateDedicatedBlocks, intersect the globally updated availableCPUs with + // the static numaAvailableCPUs to get the latest available CPUs dynamically. + currentAvailableCPUs := availableCPUs.Intersection(numaAvailableCPUs) + if numaID == commonstate.FakedNUMAID { + currentAvailableCPUs = *availableCPUs + } + + if !pinnedCPUSets.IsEmpty() { + currentAvailableCPUs = currentAvailableCPUs.Intersection(pinnedCPUSets) + } else { + currentAvailableCPUs = currentAvailableCPUs.Difference(allPinnedCPUSets) + } + + var cpuset machine.CPUSet if numaID == commonstate.FakedNUMAID { + cpuset, _, err = calculator.TakeByNUMABalance(machineInfo, currentAvailableCPUs, blockResult) + } else { + cpuset, err = calculator.TakeByTopology(machineInfo, currentAvailableCPUs, blockResult, false) + } + if err != nil { + return fmt.Errorf("allocate cpuset for block: %s failed with error: %v", blockID, err) + } + + blockCPUSet[blockID] = cpuset + *nodeRemainingCPUs = nodeRemainingCPUs.Difference(cpuset) + *availableCPUs = availableCPUs.Difference(cpuset) + if withNUMABinding != nil { + *withNUMABinding = true + } + } + return nil +} + +// generateReclaimBlockCPUSet generates BlockCPUSet for reclaim blocks using a separate phase. +// It iterates through the reclaim blocks and allocates CPUs from the available CPU pool. +// It explicitly excludes the unused non-reclaimable pinned CPUs (based on the provided +// disableReclaimSelector and resource packages) to ensure they are not used for reclaim workloads. +func (p *DynamicPolicy) generateReclaimBlockCPUSet( + reclaimBlocksMap map[int][]*advisorapi.BlockInfo, + nodeRemainingCPUs machine.CPUSet, + availableCPUs machine.CPUSet, + globalNonReclaimableCPUSet machine.CPUSet, + blockCPUSet advisorapi.BlockCPUSet, +) error { + machineInfo := p.machineInfo + topology := machineInfo.CPUTopology + + // 1. Process NUMA-aware reclaim blocks + for numaID, blocks := range reclaimBlocksMap { + if numaID == commonstate.FakedNUMAID || len(blocks) == 0 { continue } + numaAvailableCPUs := nodeRemainingCPUs.Intersection(topology.CPUDetails.CPUsInNUMANodes(numaID)) - withNUMABindingShareOrDedicatedPod := false - numaAvailableCPUs := availableCPUs.Intersection(topology.CPUDetails.CPUsInNUMANodes(numaID)) + // Deduct the non-reclaimable CPUSet for this NUMA node + currentAvailableCPUs := numaAvailableCPUs.Difference(globalNonReclaimableCPUSet) - // First handle blocks for NUMA-bound dedicated_cores containers - // Reuse already allocated CPU sets when possible to minimize CPU migration for _, block := range blocks { if block == nil { - general.Warningf("got nil block") - continue - } - - entry, ok := block.OwnerPoolEntryMap[commonstate.PoolNameDedicated] - if !ok { continue } - blockID := block.BlockId - if _, found := blockCPUSet[blockID]; found { - general.Warningf("block: %v already allocated", blockID) continue } blockResult, err := general.CovertUInt64ToInt(block.Result) if err != nil { - return nil, fmt.Errorf("parse block: %s result failed with error: %v", - blockID, err) + return fmt.Errorf("parse block: %s result failed with error: %v", blockID, err) } - allocationInfo := p.state.GetAllocationInfo(entry.EntryName, entry.SubEntryName) - if allocationInfo == nil { - continue - } + general.InfoS("generateReclaimBlockCPUSet allocating NUMA Aware block", + "blockID", blockID, + "numaID", numaID, + "blockResult", blockResult, + "numaAvailableCPUs", numaAvailableCPUs.String(), + "globalNonReclaimableCPUSet", globalNonReclaimableCPUSet.String(), + "currentAvailableCPUs", currentAvailableCPUs.String()) - alreadyAllocatedCPUs, ok := allocationInfo.TopologyAwareAssignments[numaID] - if !ok { - continue - } - - var cpuset machine.CPUSet - alreadyAllocatedCPUs = alreadyAllocatedCPUs.Intersection(numaAvailableCPUs) - if alreadyAllocatedCPUs.Size() >= blockResult { - cpuset, err = calculator.TakeByTopology(machineInfo, alreadyAllocatedCPUs, blockResult, true) - if err != nil { - return nil, fmt.Errorf("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d", - blockID, numaID, err, numaAvailableCPUs.Size(), numaAvailableCPUs.String(), blockResult) - } - } else { - cpuset, err = calculator.TakeByTopology(machineInfo, numaAvailableCPUs.Difference(alreadyAllocatedCPUs), blockResult-alreadyAllocatedCPUs.Size(), true) - if err != nil { - return nil, fmt.Errorf("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d", - blockID, numaID, err, numaAvailableCPUs.Size(), numaAvailableCPUs.String(), blockResult) - } - cpuset = cpuset.Union(alreadyAllocatedCPUs) + cpuset, err := calculator.TakeByTopology(machineInfo, currentAvailableCPUs, blockResult, false) + if err != nil { + return fmt.Errorf("allocate cpuset for NUMA Aware reclaim block: %s in NUMA: %d failed with error: %v", blockID, numaID, err) } blockCPUSet[blockID] = cpuset - numaAvailableCPUs = numaAvailableCPUs.Difference(cpuset) + currentAvailableCPUs = currentAvailableCPUs.Difference(cpuset) availableCPUs = availableCPUs.Difference(cpuset) - withNUMABindingShareOrDedicatedPod = true + + general.InfoS("generateReclaimBlockCPUSet allocated NUMA Aware block", + "blockID", blockID, + "numaID", numaID, + "allocatedCPUSet", cpuset.String(), + "currentAvailableCPUs", currentAvailableCPUs.String(), + "availableCPUs", availableCPUs.String()) } + } + + // 2. Process non-NUMA-aware reclaim blocks + if blocks, ok := reclaimBlocksMap[commonstate.FakedNUMAID]; ok && len(blocks) > 0 { + // Deduct the global non-reclaimable CPUSet to ensure non-NUMA-aware + // reclaim/share blocks do not overlap with non-reclaimable pinned CPUs. + currentAvailableCPUs := availableCPUs.Difference(globalNonReclaimableCPUSet) - // Then handle blocks for NUMA-bound shared_cores containers and reclaimed_cores containers colocated with them - // These containers can share NUMA nodes with dedicated_cores containers for _, block := range blocks { if block == nil { - general.Warningf("got nil block") - continue - } - - _, ok := block.OwnerPoolEntryMap[commonstate.PoolNameDedicated] - if ok { continue } - blockID := block.BlockId if _, found := blockCPUSet[blockID]; found { - general.Warningf("block: %v already allocated", blockID) continue } blockResult, err := general.CovertUInt64ToInt(block.Result) if err != nil { - return nil, fmt.Errorf("parse block: %s result failed with error: %v", - blockID, err) + return fmt.Errorf("parse block: %s result failed with error: %v", blockID, err) } - cpuset, err := calculator.TakeByTopology(machineInfo, numaAvailableCPUs, blockResult, false) + general.InfoS("generateReclaimBlockCPUSet allocating non-NUMA Aware block", + "blockID", blockID, + "blockResult", blockResult, + "availableCPUs", availableCPUs.String(), + "globalNonReclaimableCPUSet", globalNonReclaimableCPUSet.String(), + "currentAvailableCPUs", currentAvailableCPUs.String()) + + cpuset, _, err := calculator.TakeByNUMABalance(machineInfo, currentAvailableCPUs, blockResult) if err != nil { - return nil, fmt.Errorf("allocate cpuset for NUMA Aware block: %s in NUMA: %d failed with error: %v, numaAvailableCPUs: %d(%s), blockResult: %d", - blockID, numaID, err, numaAvailableCPUs.Size(), numaAvailableCPUs.String(), blockResult) + return fmt.Errorf("allocate cpuset for non NUMA Aware reclaim block: %s failed with error: %v", blockID, err) } blockCPUSet[blockID] = cpuset - numaAvailableCPUs = numaAvailableCPUs.Difference(cpuset) + currentAvailableCPUs = currentAvailableCPUs.Difference(cpuset) availableCPUs = availableCPUs.Difference(cpuset) - for poolName := range block.OwnerPoolEntryMap { - if commonstate.IsIsolationPool(poolName) || commonstate.IsShareNUMABindingPool(poolName) { - withNUMABindingShareOrDedicatedPod = true - break - } - } + general.InfoS("generateReclaimBlockCPUSet allocated non-NUMA Aware block", + "blockID", blockID, + "allocatedCPUSet", cpuset.String(), + "currentAvailableCPUs", currentAvailableCPUs.String(), + "availableCPUs", availableCPUs.String()) } + } - // Finally, if there are NUMA-bound containers on this NUMA node, - // deduct all numaAvailableCPUs from availableCPUs to ensure that - // NUMA-bound pods don't share the same NUMA node with non-NUMA-bound pods - if withNUMABindingShareOrDedicatedPod { - // Because numaAvailableCPUs is a subset of availableCPUs, - // we need to deduct all numaAvailableCPUs from availableCPUs - availableCPUs = availableCPUs.Difference(numaAvailableCPUs) - } + return nil +} + +// generateBlockCPUSet computes the CPUSet allocation for all requested blocks using a two-phase allocation process. +// Phase 1 (High Priority): Allocates Dedicated and Share blocks, resolving NUMA boundaries and updating available CPUs. +// Phase 2 (Low Priority): Allocates Reclaim blocks, ensuring they don't use non-reclaimable pinned CPUs or CPUs already taken. +func (p *DynamicPolicy) generateBlockCPUSet(resp *advisorapi.ListAndWatchResponse) (advisorapi.BlockCPUSet, error) { + if resp == nil { + return nil, fmt.Errorf("got nil resp") } - // Walk through all blocks without specified NUMA ID (non-NUMA-bound containers) - // For each block, allocate CPUs using NUMA balance strategy to minimize - // memory access latency and CPU migrations - for _, block := range numaToBlocks[commonstate.FakedNUMAID] { - if block == nil { - general.Warningf("got nil block") + numaToBlocks, err := resp.GetBlocks() + if err != nil { + return nil, err + } + + machineInfo := p.machineInfo + topology := machineInfo.CPUTopology + availableCPUs := topology.CPUDetails.CPUs() + + rpPinnedCPUSet := p.state.GetMachineState().GetResourcePackagePinnedCPUSet() + allPinnedCPUSets := machine.NewCPUSet() + for _, cset := range rpPinnedCPUSet { + allPinnedCPUSets = allPinnedCPUSets.Union(cset) + } + + blockCPUSet := advisorapi.NewBlockCPUSet() + + // 1. Allocate Static and Forbidden Pools + availableCPUs, err = p.allocateStaticAndForbiddenPools(resp, blockCPUSet, availableCPUs) + if err != nil { + return nil, err + } + nodeRemainingCPUs := availableCPUs.Clone() + + // Get non-reclaimable pinned CPUSets + disableReclaimSelectorStr := p.conf.GetDynamicConfiguration().DisableReclaimPinnedCPUSetResourcePackageSelector + disableReclaimSelector, err := general.ParseSelector(disableReclaimSelectorStr) + if err != nil { + return nil, err + } + machineState := p.state.GetMachineState() + globalNonReclaimableCPUSet := cpuutil.GetAggResourcePackagePinnedCPUSet(disableReclaimSelector, machineState) + + general.InfoS("generateBlockCPUSet variables after allocateStaticAndForbiddenPools", + "allPinnedCPUSets", allPinnedCPUSets.String(), + "nodeRemainingCPUs", nodeRemainingCPUs.String(), + "globalNonReclaimableCPUSet", globalNonReclaimableCPUSet.String(), + "disableReclaimSelector", disableReclaimSelector.String()) + + reclaimBlocksMap := make(map[int][]*advisorapi.BlockInfo) + + // Phase 1: Allocate Dedicated and Share blocks + for numaID, blocks := range numaToBlocks { + if numaID == commonstate.FakedNUMAID { continue } - blockID := block.BlockId + var dedicatedBlocks, shareBlocks, reclaimBlocks []*advisorapi.BlockInfo + for _, block := range blocks { + if isDedicatedBlock(block) { + dedicatedBlocks = append(dedicatedBlocks, block) + } else if isSharedBlock(block) { + shareBlocks = append(shareBlocks, block) + } else { + reclaimBlocks = append(reclaimBlocks, block) + } + } + reclaimBlocksMap[numaID] = reclaimBlocks - if _, found := blockCPUSet[blockID]; found { - general.Warningf("block: %s already allocated", blockID) - continue + numaAvailableCPUs := availableCPUs.Intersection(topology.CPUDetails.CPUsInNUMANodes(numaID)) + withNUMABindingShareOrDedicatedPod := false + + err = p.allocateDedicatedBlocks(numaID, dedicatedBlocks, blockCPUSet, numaAvailableCPUs, &nodeRemainingCPUs, &availableCPUs, rpPinnedCPUSet, allPinnedCPUSets, &withNUMABindingShareOrDedicatedPod) + if err != nil { + return nil, err } - blockResult, err := general.CovertUInt64ToInt(block.Result) + general.InfoS("generateBlockCPUSet variables after allocateDedicatedBlocks", + "numaID", numaID, + "withNUMABindingShareOrDedicatedPod", withNUMABindingShareOrDedicatedPod, + "numaAvailableCPUs", numaAvailableCPUs.String(), + "nodeRemainingCPUs", nodeRemainingCPUs.String(), + "availableCPUs", availableCPUs.String()) + + err = p.allocateShareBlocks(numaID, shareBlocks, blockCPUSet, numaAvailableCPUs, &nodeRemainingCPUs, &availableCPUs, rpPinnedCPUSet, allPinnedCPUSets, &withNUMABindingShareOrDedicatedPod) if err != nil { - return nil, fmt.Errorf("parse block: %s result failed with error: %v", - blockID, err) + return nil, err } - // Use NUMA balance strategy to avoid changing memory affinity (memset) as much as possible - // for blocks with faked NUMA ID (non-NUMA-bound containers) - resultCPUSet, _, err := calculator.TakeByNUMABalance(machineInfo, availableCPUs, blockResult) + if withNUMABindingShareOrDedicatedPod { + // If there is any NUMA-binding share or dedicated pod on this NUMA node, + // the entire NUMA node is excluded from the global pool (availableCPUs) + // to avoid cross-NUMA interference from non-NUMA-aware workloads. + availableCPUs = availableCPUs.Difference(numaAvailableCPUs) + } + + general.InfoS("generateBlockCPUSet variables after allocateShareBlocks", + "numaID", numaID, + "withNUMABindingShareOrDedicatedPod", withNUMABindingShareOrDedicatedPod, + "numaAvailableCPUs", numaAvailableCPUs.String(), + "nodeRemainingCPUs", nodeRemainingCPUs.String(), + "availableCPUs", availableCPUs.String()) + } + + // Phase 2 for FakedNUMAID + // Note: Normal share blocks are not considered "shared" by isSharedBlock + // (which only matches isolation or NUMA-binding share pools), so they will + // be pushed to reclaimBlocks and processed in Phase 3 alongside reclaim blocks. + if blocks, ok := numaToBlocks[commonstate.FakedNUMAID]; ok { + var shareBlocks, reclaimBlocks []*advisorapi.BlockInfo + for _, block := range blocks { + if isSharedBlock(block) { + shareBlocks = append(shareBlocks, block) + } else { + reclaimBlocks = append(reclaimBlocks, block) + } + } + reclaimBlocksMap[commonstate.FakedNUMAID] = reclaimBlocks + + emptyNUMA := machine.NewCPUSet() + err = p.allocateShareBlocks(commonstate.FakedNUMAID, shareBlocks, blockCPUSet, emptyNUMA, &nodeRemainingCPUs, &availableCPUs, rpPinnedCPUSet, allPinnedCPUSets, nil) if err != nil { - return nil, fmt.Errorf("allocate cpuset for non NUMA Aware block: %s failed with error: %v, availableCPUs: %d(%s), blockResult: %d", - blockID, err, availableCPUs.Size(), availableCPUs.String(), blockResult) + return nil, err } - blockCPUSet[blockID] = resultCPUSet - availableCPUs = availableCPUs.Difference(resultCPUSet) + general.InfoS("generateBlockCPUSet variables after allocateShareBlocks for FakedNUMAID", + "nodeRemainingCPUs", nodeRemainingCPUs.String(), + "availableCPUs", availableCPUs.String()) + } + + // Phase 3: Allocate Reclaim blocks + err = p.generateReclaimBlockCPUSet(reclaimBlocksMap, nodeRemainingCPUs, availableCPUs, globalNonReclaimableCPUSet, blockCPUSet) + if err != nil { + return nil, err } return blockCPUSet, nil diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler_test.go index ac560a52d9..85725789ae 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_advisor_handler_test.go @@ -30,13 +30,19 @@ import ( v1 "k8s.io/api/core/v1" resource2 "k8s.io/apimachinery/pkg/api/resource" + apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/advisorsvc" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" advisorapi "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory" "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" + "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" cgroupmgr "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager" + "github.com/kubewharf/katalyst-core/pkg/util/machine" "github.com/kubewharf/katalyst-core/pkg/util/native" ) @@ -573,3 +579,595 @@ func TestDynamicPolicy_checkAndApplySubCgroupPath(t *testing.T) { convey.So(err3, convey.ShouldBeNil) }) } + +// TestDynamicPolicy_generateBlockCPUSet verifies the block CPUSet generation logic. +// It uses a table-driven approach to test various scenarios including: +// - Two-phase allocation: Dedicated/Share blocks first, Reclaim blocks second. +// - Non-reclaimable CPU deduction: Ensuring reclaim blocks do not overlap with pinned CPUSets from resource packages marked as disable-reclaim. +// - Parallel execution: Ensuring no race conditions exist in the policy's read-only operations. +func TestDynamicPolicy_generateBlockCPUSet(t *testing.T) { + t.Parallel() + + type testCase struct { + name string + disableReclaimSelector string + // setupMachineState prepares the mock machine state, e.g., resource packages, existing pod allocations. + setupMachineState func(state state.State, topo *machine.CPUTopology) + // advisorResponse simulates the response from the CPU advisor containing blocks to be allocated. + advisorResponse *advisorapi.ListAndWatchResponse + expectedError bool + expectedErrorStr string + // validateResult contains custom assertions for the resulting BlockCPUSet. + validateResult func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) + } + + testCases := []testCase{ + { + // Scenario: A single reclaim block without a specific NUMA ID (FakedNUMAID). + // It should be allocated from the global available pool minus any global non-reclaimable CPUs. + name: "basic reclaim block with faked NUMA ID", + disableReclaimSelector: "disable-reclaim=true", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + machineState := st.GetMachineState() + // NUMA 0 has a non-reclaimable package using CPUs 0,1,2,3 + machineState[0].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(0, 1, 2, 3), + }, + } + // NUMA 1 has a reclaimable package using CPUs 8,9 + machineState[1].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg2": { + Attributes: map[string]string{"disable-reclaim": "false"}, + PinnedCPUSet: machine.NewCPUSet(8, 9), + }, + } + st.SetMachineState(machineState, false) + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "reclaim": { + Entries: map[string]*advisorapi.CalculationInfo{ + "reclaim-entry": { + OwnerPoolName: "reclaim", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + -1: { // FakedNUMAID + Blocks: []*advisorapi.Block{ + {BlockId: "block-reclaim-1", Result: 4}, + }, + }, + }, + }, + }, + }, + }, + }, + expectedError: false, + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + as := assert.New(t) + res := blockCPUSet["block-reclaim-1"] + as.Equal(4, res.Size()) + as.True(res.Intersection(machine.NewCPUSet(0, 1, 2, 3)).IsEmpty(), "reclaim block should not use non-reclaimable pinned CPUs") + }, + }, + { + // Scenario: Multiple NUMA-aware reclaim blocks. + // Reclaim blocks tied to specific NUMA nodes must avoid the non-reclaimable CPUs on their respective nodes. + name: "NUMA-aware reclaim block allocation", + disableReclaimSelector: "disable-reclaim=true", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + machineState := st.GetMachineState() + machineState[0].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(0, 1, 2, 3), // NUMA 0 + }, + } + machineState[1].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg2": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(4, 5), // NUMA 1 (CPUs 4,5,6,7,12,13,14,15) + }, + } + st.SetMachineState(machineState, false) + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "reclaim": { + Entries: map[string]*advisorapi.CalculationInfo{ + "reclaim-entry": { + OwnerPoolName: "reclaim", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + 0: { + Blocks: []*advisorapi.Block{{BlockId: "block-reclaim-numa0", Result: 2}}, + }, + 1: { + Blocks: []*advisorapi.Block{{BlockId: "block-reclaim-numa1", Result: 4}}, + }, + }, + }, + }, + }, + }, + }, + expectedError: false, + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + as := assert.New(t) + res0 := blockCPUSet["block-reclaim-numa0"] + as.Equal(2, res0.Size()) + as.True(res0.Intersection(machine.NewCPUSet(0, 1, 2, 3)).IsEmpty()) + as.True(res0.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(0))) + + res1 := blockCPUSet["block-reclaim-numa1"] + as.Equal(4, res1.Size()) + as.True(res1.Intersection(machine.NewCPUSet(4, 5)).IsEmpty()) + as.True(res1.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(1))) + }, + }, + { + // Scenario: Verifying two-phase allocation logic. + // Dedicated and Share blocks should be allocated first. Then, Reclaim blocks should be allocated + // from the remaining CPUs, while also avoiding the non-reclaimable CPUs. + name: "mixed dedicated, share, and reclaim blocks", + disableReclaimSelector: "disable-reclaim=true", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + // Set up a pre-allocated dedicated pod on NUMA 0 + podEntries := state.PodEntries{ + "pod-dedicated": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationResult: machine.NewCPUSet(8, 9), // NUMA 0 + OriginalAllocationResult: machine.NewCPUSet(8, 9), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(8, 9), + }, + AllocationMeta: commonstate.AllocationMeta{ + QoSLevel: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + }, + "pod-shared": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationResult: machine.NewCPUSet(2, 3, 10, 11), // NUMA 0 + OriginalAllocationResult: machine.NewCPUSet(2, 3, 10, 11), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(2, 3, 10, 11), + }, + AllocationMeta: commonstate.AllocationMeta{ + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + }, + }, + } + st.SetPodEntries(podEntries, false) + + machineState, _ := state.GenerateMachineStateFromPodEntries(topo, podEntries, nil) + // Add non-reclaimable package on NUMA 0 (CPUs 0, 1) + machineState[0].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(0, 1), + }, + } + machineState[1].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg2": { + Attributes: map[string]string{"disable-reclaim": "false"}, + PinnedCPUSet: machine.NewCPUSet(4, 5, 6, 7, 12, 13, 14, 15), // NUMA 1 (CPUs 4,5,6,7,12,13,14,15) + }, + } + st.SetMachineState(machineState, false) + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "pod-dedicated": { + Entries: map[string]*advisorapi.CalculationInfo{ + "container-1": { + OwnerPoolName: "dedicated", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + 0: {Blocks: []*advisorapi.Block{{BlockId: "block-dedicated-1", Result: 2}}}, + }, + }, + }, + }, + "share": { + Entries: map[string]*advisorapi.CalculationInfo{ + "share-entry": { + OwnerPoolName: "share", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + -1: {Blocks: []*advisorapi.Block{{BlockId: "block-share-1", Result: 4}}}, + }, + }, + }, + }, + "reclaim": { + Entries: map[string]*advisorapi.CalculationInfo{ + "reclaim-entry": { + OwnerPoolName: "reclaim", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + // NUMA 0 is excluded from global availableCPUs by the dedicated pod, + // so global Reclaim (FakedNUMAID) must be allocated on NUMA 1 to succeed. + -1: { + Blocks: []*advisorapi.Block{ + {BlockId: "block-reclaim-1", Result: 4}, + }, + }, + // However, NUMA-aware Reclaim on NUMA 0 can still use the remaining + // reclaimable CPUs on NUMA 0 (i.e. CPUs occupied by shared pods). + 0: { + Blocks: []*advisorapi.Block{ + {BlockId: "block-reclaim-2", Result: 4}, + }, + }, + }, + }, + }, + }, + }, + }, + expectedError: false, + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + as := assert.New(t) + ded := blockCPUSet["block-dedicated-1"] + as.True(ded.Equals(machine.NewCPUSet(8, 9)), "dedicated block should reuse existing allocation") + + share := blockCPUSet["block-share-1"] + as.Equal(4, share.Size()) + as.True(share.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(1)), "share block must be on NUMA 1 because NUMA 0 is excluded") + + rec := blockCPUSet["block-reclaim-1"] + as.Equal(4, rec.Size()) + as.True(rec.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(1)), "reclaim block must be on NUMA 1") + as.True(rec.Intersection(share).IsEmpty(), "reclaim block must avoid share CPUs") + + rec2 := blockCPUSet["block-reclaim-2"] + as.Equal(4, rec2.Size()) + as.True(rec2.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(0)), "reclaim block must be on NUMA 0") + as.True(rec2.Intersection(ded).IsEmpty(), "reclaim block must avoid dedicated CPUs") + as.True(rec2.Intersection(machine.NewCPUSet(0, 1)).IsEmpty(), "reclaim block must avoid non-reclaimable pkg CPUs on NUMA 0") + }, + }, + { + // Scenario: Verifying priority of isolation pool over normal share pool. + // Isolation pools (starts with "isolation") should be in Phase 1, + // while normal share pools should be in Phase 2. + name: "isolation pool priority over normal share pool", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + // No special machine state needed + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "isolation": { + Entries: map[string]*advisorapi.CalculationInfo{ + "isolation-entry": { + OwnerPoolName: "isolation-1", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + 0: {Blocks: []*advisorapi.Block{{BlockId: "block-isolation-1", Result: 8}}}, + }, + }, + }, + }, + "share": { + Entries: map[string]*advisorapi.CalculationInfo{ + "share-entry": { + OwnerPoolName: "share", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + -1: {Blocks: []*advisorapi.Block{{BlockId: "block-share-1", Result: 2}}}, + }, + }, + }, + }, + }, + }, + expectedError: false, + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + as := assert.New(t) + // NUMA 0 has 8 CPUs (0,1,2,3, 8,9,10,11) + iso := blockCPUSet["block-isolation-1"] + as.Equal(8, iso.Size()) + as.True(iso.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(0))) + + // share block should be allocated from remaining CPUs on the node or globally. + // Since NUMA 0 is exhausted by isolation, it must be allocated from NUMA 1. + share := blockCPUSet["block-share-1"] + as.Equal(2, share.Size()) + as.True(share.Intersection(iso).IsEmpty()) + as.True(share.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(1))) + }, + }, + { + // Scenario: Verifying that share block (normal) now avoids only globalNonReclaimableCPUSet. + // Previously it avoided allPinnedCPUSets. + name: "share block can use reclaimable pinned CPUs", + disableReclaimSelector: "disable-reclaim=true", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + machineState := st.GetMachineState() + // pkg1 is reclaimable (disable-reclaim=false) + machineState[0].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "false"}, + PinnedCPUSet: machine.NewCPUSet(0, 1, 2, 3), + }, + } + st.SetMachineState(machineState, false) + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "share": { + Entries: map[string]*advisorapi.CalculationInfo{ + "share-entry": { + OwnerPoolName: "share", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + -1: {Blocks: []*advisorapi.Block{{BlockId: "block-share-1", Result: 2}}}, + }, + }, + }, + }, + }, + }, + expectedError: false, + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + as := assert.New(t) + share := blockCPUSet["block-share-1"] + as.Equal(2, share.Size()) + // Since pkg1 is reclaimable, and share is now in Phase 2 (treated like reclaim), + // it is NOT excluded from the available pool. The CPU allocator picks CPUs + // in numerical order, so it will allocate CPUs 0 and 1, which overlap with pkg1. + as.True(share.Intersection(machine.NewCPUSet(0, 1, 2, 3)).Size() > 0, "share block should be able to use reclaimable pinned CPUs") + }, + }, + { + // Scenario: Verifying NUMA-binding share pool priority. + name: "NUMA-binding share pool priority", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + // No special machine state needed + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "share-NUMA": { + Entries: map[string]*advisorapi.CalculationInfo{ + "share-entry": { + OwnerPoolName: "share-NUMA0", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + 0: {Blocks: []*advisorapi.Block{{BlockId: "block-share-numa-1", Result: 8}}}, + }, + }, + }, + }, + "share-normal": { + Entries: map[string]*advisorapi.CalculationInfo{ + "share-entry": { + OwnerPoolName: "share", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + -1: {Blocks: []*advisorapi.Block{{BlockId: "block-share-normal-1", Result: 2}}}, + }, + }, + }, + }, + }, + }, + expectedError: false, + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + as := assert.New(t) + // NUMA 0 has 8 CPUs. + shareNuma := blockCPUSet["block-share-numa-1"] + as.Equal(8, shareNuma.Size()) + as.True(shareNuma.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(0))) + + // normal share should be pushed to Phase 2 and find CPUs elsewhere. + shareNormal := blockCPUSet["block-share-normal-1"] + as.Equal(2, shareNormal.Size()) + as.True(shareNormal.Intersection(shareNuma).IsEmpty()) + as.True(shareNormal.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(1))) + }, + }, + { + // Scenario: Verifying that NUMA-binding pod excludes the whole NUMA node from global available pool. + name: "NUMA-binding pod excludes NUMA node from global pool", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + // Dedicated pod must be present in state for allocateDedicatedBlocks to work + podEntries := state.PodEntries{ + "dedicated": state.ContainerEntries{ + "dedicated-entry": &state.AllocationInfo{ + AllocationResult: machine.NewCPUSet(0, 1, 2, 3), + OriginalAllocationResult: machine.NewCPUSet(0, 1, 2, 3), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 2, 3), + }, + AllocationMeta: commonstate.AllocationMeta{ + QoSLevel: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + }, + } + st.SetPodEntries(podEntries, false) + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "dedicated": { + Entries: map[string]*advisorapi.CalculationInfo{ + "dedicated-entry": { + OwnerPoolName: "dedicated", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + 0: {Blocks: []*advisorapi.Block{{BlockId: "block-dedicated-1", Result: 4}}}, + }, + }, + }, + }, + "share": { + Entries: map[string]*advisorapi.CalculationInfo{ + "share-entry": { + OwnerPoolName: "share", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + -1: {Blocks: []*advisorapi.Block{{BlockId: "block-share-1", Result: 6}}}, + }, + }, + }, + }, + }, + }, + expectedError: false, + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + as := assert.New(t) + // NUMA 0 has 8 CPUs. Dedicated takes 4. + ded := blockCPUSet["block-dedicated-1"] + as.Equal(4, ded.Size()) + as.True(ded.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(0))) + + // The remaining 4 CPUs on NUMA 0 should be excluded from global available pool + // because NUMA 0 now has a NUMA-binding pod. + // share block (non-NUMA aware) needs 6 CPUs. + // If it could use the remaining 4 on NUMA 0, it might take 4 from NUMA 0 and 2 from NUMA 1. + // But since NUMA 0 is excluded, it MUST take all 6 from NUMA 1. + share := blockCPUSet["block-share-1"] + as.Equal(6, share.Size()) + as.True(share.IsSubsetOf(topo.CPUDetails.CPUsInNUMANodes(1)), "share block should be entirely on NUMA 1") + }, + }, + { + // Scenario: Verifying that dedicated pod returns error if its size is same but some CPUs are unavailable. + name: "dedicated pod size same but CPUs unavailable returns error", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + // Dedicated pod on CPUs 0,1,2,3 + podEntries := state.PodEntries{ + "dedicated": state.ContainerEntries{ + "dedicated-entry": &state.AllocationInfo{ + AllocationResult: machine.NewCPUSet(0, 1, 2, 3), + OriginalAllocationResult: machine.NewCPUSet(0, 1, 2, 3), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 2, 3), + }, + AllocationMeta: commonstate.AllocationMeta{ + QoSLevel: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + }, + "reserve": state.ContainerEntries{ + "": &state.AllocationInfo{ + AllocationResult: machine.NewCPUSet(0), + }, + }, + } + st.SetPodEntries(podEntries, false) + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "dedicated": { + Entries: map[string]*advisorapi.CalculationInfo{ + "dedicated-entry": { + OwnerPoolName: "dedicated", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + 0: {Blocks: []*advisorapi.Block{{BlockId: "block-dedicated-1", Result: 4}}}, + }, + }, + }, + }, + "reserve": { + Entries: map[string]*advisorapi.CalculationInfo{ + "": { + OwnerPoolName: "reserve", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + -1: {Blocks: []*advisorapi.Block{{BlockId: "block-reserve", Result: 1}}}, + }, + }, + }, + }, + }, + }, + expectedError: true, + expectedErrorStr: "size not changed, but some CPUs are not available", + }, + { + // Scenario: Exhaustion of CPUs for reclaim. + // After deducting dedicated, share, and non-reclaimable CPUs, if there are not enough + // CPUs left for a reclaim block, an error should be returned. + name: "not enough CPUs for reclaim block after deducting non-reclaimable", + disableReclaimSelector: "disable-reclaim=true", + setupMachineState: func(st state.State, topo *machine.CPUTopology) { + machineState := st.GetMachineState() + // NUMA 0 has 8 CPUs (0,1,2,3, 8,9,10,11). We pin 7 of them as non-reclaimable. + machineState[0].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(0, 1, 2, 3, 8, 9, 10), // only CPU 11 is available + }, + } + st.SetMachineState(machineState, false) + }, + advisorResponse: &advisorapi.ListAndWatchResponse{ + Entries: map[string]*advisorapi.CalculationEntries{ + "reclaim": { + Entries: map[string]*advisorapi.CalculationInfo{ + "reclaim-entry": { + OwnerPoolName: "reclaim", + CalculationResultsByNumas: map[int64]*advisorapi.NumaCalculationResult{ + 0: {Blocks: []*advisorapi.Block{{BlockId: "block-reclaim-1", Result: 2}}}, // Requests 2, but only 1 available + }, + }, + }, + }, + }, + }, + expectedError: true, + expectedErrorStr: "allocate cpuset for NUMA Aware reclaim block: block-reclaim-1 in NUMA: 0 failed", + validateResult: func(t *testing.T, blockCPUSet advisorapi.BlockCPUSet, topo *machine.CPUTopology) { + // No validation needed if error is expected + }, + }, + { + name: "test invalid disable reclaim selector", + disableReclaimSelector: "disable-reclaim=true,,invalid", + expectedError: true, + }, + } + + for _, tc := range testCases { + tc := tc // capture range variable for parallel execution + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + as := assert.New(t) + + // Initialize a clean topology for each parallel test (16 CPUs, 2 Sockets, 2 NUMA nodes) + // NUMA 0: 0, 1, 2, 3, 8, 9, 10, 11 + // NUMA 1: 4, 5, 6, 7, 12, 13, 14, 15 + topo, err := machine.GenerateDummyCPUTopology(16, 2, 2) + as.NoError(err) + + conf := generateTestConfiguration(t, "", "") + if tc.disableReclaimSelector != "" { + conf.GetDynamicConfiguration().DisableReclaimPinnedCPUSetResourcePackageSelector = tc.disableReclaimSelector + } + + // Strict isolation using a fresh temp directory + st, _ := state.NewCheckpointState(&statedirectory.StateDirectoryConfiguration{StateFileDirectory: t.TempDir()}, "test", "test", topo, false, state.GenerateMachineStateFromPodEntries, metrics.DummyMetrics{}) + + // Prepare initial machine state (e.g. static pools) + machineState, _ := state.GenerateMachineStateFromPodEntries(topo, nil, nil) + st.SetMachineState(machineState, false) + + if tc.setupMachineState != nil { + tc.setupMachineState(st, topo) + } + + policy := &DynamicPolicy{ + machineInfo: &machine.KatalystMachineInfo{ + CPUTopology: topo, + }, + state: st, + conf: conf, + } + + blockCPUSet, err := policy.generateBlockCPUSet(tc.advisorResponse) + if tc.expectedError { + as.Error(err) + if tc.expectedErrorStr != "" { + as.Contains(err.Error(), tc.expectedErrorStr) + } + return + } + as.NoError(err) + if tc.validateResult != nil { + tc.validateResult(t, blockCPUSet, topo) + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go index 23a60ed01f..c3810d3c77 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers.go @@ -31,6 +31,7 @@ import ( apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" + cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util" @@ -40,6 +41,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/util/machine" "github.com/kubewharf/katalyst-core/pkg/util/native" qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos" + rputil "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) func (p *DynamicPolicy) sharedCoresAllocationHandler(ctx context.Context, @@ -284,7 +286,7 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, // set reclaimed numa_binding NUMA ID to allocationInfo if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualBindingNUMAs.Contains(int(req.Hint.Nodes[0])) || !nonReclaimActualBindingNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) { - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes) } } @@ -321,7 +323,17 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context, p.state.SetMachineState(updatedMachineState, persistCheckpoint) } - resp, err := cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, false, true, req) + // Get topology allocation for numa binding reclaimed cores + var topologyAllocationAnnotations map[string]string + if allocationInfo.CheckReclaimedActualNUMABinding() { + var err error + topologyAllocationAnnotations, err = cpuutil.GetCPUTopologyAllocationsAnnotations(allocationInfo, p.conf.TopologyAllocationAnnotationKey, req) + if err != nil { + return nil, fmt.Errorf("GetCPUTopologyAllocationsAnnotations failed with error: %v", err) + } + } + + resp, err := cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, false, true, req, topologyAllocationAnnotations) if err != nil { general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -470,7 +482,7 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+ "not equal to 1", len(req.Hint.Nodes)) } - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes) } // update pod entries directly. @@ -486,14 +498,20 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte } p.state.SetMachineState(updatedMachineState, persistCheckpoint) - err = p.adjustAllocationEntries(persistCheckpoint) + err = p.adjustAllocationEntries(podEntries, updatedMachineState, persistCheckpoint) if err != nil { general.Errorf("pod: %s/%s, container: %s putContainersAndAdjustAllocationEntriesWithoutAllocation failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) return nil, fmt.Errorf("adjustAllocationEntries failed with error: %v", err) } - resp, err := cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, false, true, req) + topologyAllocationAnnotations, err := cpuutil.GetCPUTopologyAllocationsAnnotations(allocationInfo, p.conf.TopologyAllocationAnnotationKey, req) + if err != nil { + return nil, fmt.Errorf("GetCPUTopologyAllocationsAnnotations failed with error: %v", err) + } + + resp, err := cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), + util.OCIPropertyNameCPUSetCPUs, false, true, req, topologyAllocationAnnotations) if err != nil { general.Errorf("pod: %s/%s, container: %s PackResourceAllocationResponseByAllocationInfo failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -589,8 +607,13 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingAllocationHandler(ctx context. // there is no need to call SetPodEntries and SetMachineState, // since they are already done in doAndCheckPutAllocationInfo of allocateSharedNumaBindingCPUs + topologyAllocationAnnotations, err := cpuutil.GetCPUTopologyAllocationsAnnotations(allocationInfo, p.conf.TopologyAllocationAnnotationKey, req) + if err != nil { + return nil, fmt.Errorf("GetCPUTopologyAllocationsAnnotations failed with error: %v", err) + } - resp, err := cpuutil.PackAllocationResponse(allocationInfo, string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, false, true, req) + resp, err := cpuutil.PackAllocationResponse(allocationInfo, + string(v1.ResourceCPU), util.OCIPropertyNameCPUSetCPUs, false, true, req, topologyAllocationAnnotations) if err != nil { general.Errorf("pod: %s/%s, container: %s PackResourceAllocationResponseByAllocationInfo failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -604,6 +627,14 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingAllocationHandler(ctx context. return resp, nil } +// allocateNumaBindingCPUs allocates CPUs for NUMA binding containers. +// It considers NUMA affinity, exclusive requirements, and resource package pinning. +// Steps: +// 1. Calculate the initial available CPUSet based on the TopologyHint (NUMA nodes). +// 2. If a Resource Package is specified in annotations: +// a. If the package has pinned CPUs, restrict the available CPUs to the intersection of NUMA CPUs and Pinned CPUs. +// b. If the package is not pinned but others are, exclude other packages' pinned CPUs. +// 3. Allocate CPUs from the calculated available set using the topology calculator. func (p *DynamicPolicy) allocateNumaBindingCPUs(numCPUs int, hint *pluginapi.TopologyHint, machineState state.NUMANodeMap, reqAnnotations map[string]string, ) (machine.CPUSet, error) { @@ -631,11 +662,31 @@ func (p *DynamicPolicy) allocateNumaBindingCPUs(numCPUs int, hint *pluginapi.Top result := machine.NewCPUSet() alignedAvailableCPUs := machine.CPUSet{} - availableCPUsPerNUMA := make(map[uint64]machine.CPUSet) + alignedAvailableCPUsPerNUMA := make(map[uint64]machine.CPUSet) hintNodes := hint.Nodes + pkgName := rputil.GetResourcePackageName(reqAnnotations) + numaRPPinnedCPUSet := machineState.GetNUMAResourcePackagePinnedCPUSet() + for _, numaNode := range hintNodes { availableCPUs := machineState[int(numaNode)].GetAvailableCPUSet(p.reservedCPUs) - availableCPUsPerNUMA[numaNode] = availableCPUs + + // if the resource package is specified and the resource package is pinned, + // then only the pinned CPUs are available for allocation. + // if the package is not pinned but other packages are, exclude pinned CPUs of other packages. + if pkgName != "" { + pinnedCPUSetsInNUMA := numaRPPinnedCPUSet[int(numaNode)] + if !pinnedCPUSetsInNUMA[pkgName].IsEmpty() { + // If the package has pinned CPUs, restrict allocation to those CPUs + availableCPUs = availableCPUs.Intersection(pinnedCPUSetsInNUMA[pkgName]) + } else if len(pinnedCPUSetsInNUMA) > 0 { + // If the package is not pinned but other packages are, exclude pinned CPUs of other packages + for _, pinnedCPUs := range pinnedCPUSetsInNUMA { + availableCPUs = availableCPUs.Difference(pinnedCPUs) + } + } + } + + alignedAvailableCPUsPerNUMA[numaNode] = availableCPUs alignedAvailableCPUs = alignedAvailableCPUs.Union(availableCPUs) } @@ -650,7 +701,7 @@ func (p *DynamicPolicy) allocateNumaBindingCPUs(numCPUs int, hint *pluginapi.Top // Evenly allocate cpus for distribute_evenly_across_numa if distributeEvenlyAcrossNuma { - alignedCPUs, err = p.allocateEvenlyAcrossNUMAs(numCPUs, hintNodes, availableCPUsPerNUMA) + alignedCPUs, err = p.allocateEvenlyAcrossNUMAs(numCPUs, hintNodes, alignedAvailableCPUsPerNUMA) if err != nil { return machine.NewCPUSet(), fmt.Errorf("allocateEvenlyAcrossNUMA failed with error: %v", err) } @@ -741,7 +792,7 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq InitTimestamp: time.Now().Format(util.QRMTimeFormat), RequestQuantity: reqFloat64, } - allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes) if util.PodInplaceUpdateResizing(req) { originAllocationInfo := p.state.GetAllocationInfo(allocationInfo.PodUid, allocationInfo.ContainerName) @@ -780,11 +831,23 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq // putAllocationsAndAdjustAllocationEntries calculates and generates the latest checkpoint // - unlike adjustAllocationEntries, it will also consider AllocationInfo -func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntries(allocationInfos []*state.AllocationInfo, incrByReq bool, persistCheckpoint bool) error { +func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntries( + allocationInfos []*state.AllocationInfo, + incrByReq bool, + persistCheckpoint bool, +) error { return p.putAllocationsAndAdjustAllocationEntriesResizeAware(nil, allocationInfos, incrByReq, false, persistCheckpoint) } -func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntriesResizeAware(originAllocationInfos, allocationInfos []*state.AllocationInfo, incrByReq, podInplaceUpdateResizing, persistCheckpoint bool) error { +// putAllocationsAndAdjustAllocationEntriesResizeAware adjusts the allocation entries based on the given allocation infos, +// considering resize requests and resource package information. +func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntriesResizeAware( + originAllocationInfos, + allocationInfos []*state.AllocationInfo, + incrByReq, + podInplaceUpdateResizing, + persistCheckpoint bool, +) error { if len(allocationInfos) == 0 { return nil } @@ -814,6 +877,7 @@ func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntriesResizeAware(orig } machineState := p.state.GetMachineState() + numaResourcePackagePinnedCPUSet := machineState.GetNUMAResourcePackagePinnedCPUSet() var poolsQuantityMap map[string]map[int]int if p.enableCPUAdvisor && @@ -846,7 +910,7 @@ func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntriesResizeAware(orig return fmt.Errorf("pool %s cross NUMA: %+v", poolName, poolsQuantityMap[poolName]) } } else if incrByReq { - err := state.CountAllocationInfosToPoolsQuantityMap(allocationInfos, poolsQuantityMap, p.getContainerRequestedCores) + err := state.CountAllocationInfosToPoolsQuantityMap(numaResourcePackagePinnedCPUSet, allocationInfos, poolsQuantityMap, p.getContainerRequestedCores) if err != nil { return fmt.Errorf("CountAllocationInfosToPoolsQuantityMap failed with error: %v", err) } @@ -854,7 +918,7 @@ func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntriesResizeAware(orig } else { // else we do sum(containers req) for each pool to get pools ratio var err error - poolsQuantityMap, err = state.GetSharedQuantityMapFromPodEntries(entries, allocationInfos, p.getContainerRequestedCores) + poolsQuantityMap, err = state.GetSharedQuantityMapFromPodEntries(numaResourcePackagePinnedCPUSet, entries, allocationInfos, p.getContainerRequestedCores) if err != nil { return fmt.Errorf("GetSharedQuantityMapFromPodEntries failed with error: %v", err) } @@ -865,9 +929,9 @@ func (p *DynamicPolicy) putAllocationsAndAdjustAllocationEntriesResizeAware(orig allocationInfos[0].PodNamespace, allocationInfos[0].PodName, allocationInfos[0].ContainerName) } // if advisor is disabled, qrm can re-calc the pool size exactly. we don't need to adjust the pool size. - err := state.CountAllocationInfosToPoolsQuantityMap(allocationInfos, poolsQuantityMap, p.getContainerRequestedCores) - if err != nil { - return fmt.Errorf("CountAllocationInfosToPoolsQuantityMap failed with error: %v", err) + cErr := state.CountAllocationInfosToPoolsQuantityMap(numaResourcePackagePinnedCPUSet, allocationInfos, poolsQuantityMap, p.getContainerRequestedCores) + if cErr != nil { + return fmt.Errorf("CountAllocationInfosToPoolsQuantityMap failed with error: %v", cErr) } } } @@ -970,16 +1034,18 @@ func (p *DynamicPolicy) calcPoolResizeRequest(originAllocation, allocation *stat } // adjustAllocationEntries calculates and generates the latest checkpoint -func (p *DynamicPolicy) adjustAllocationEntries(persistCheckpoint bool) error { +// It fetches resource package items and updates the allocation entries accordingly. +func (p *DynamicPolicy) adjustAllocationEntries( + entries state.PodEntries, + machineState state.NUMANodeMap, + persistCheckpoint bool, +) error { startTime := time.Now() general.Infof("called") defer func() { general.InfoS("finished", "duration", time.Since(startTime)) }() - entries := p.state.GetPodEntries() - machineState := p.state.GetMachineState() - // since adjustAllocationEntries will cause re-generate pools, // if sys advisor is enabled, we believe the pools' ratio that sys advisor indicates, // else we do sum(containers req) for each pool to get pools ratio @@ -993,7 +1059,7 @@ func (p *DynamicPolicy) adjustAllocationEntries(persistCheckpoint bool) error { poolsQuantityMap = machine.ParseCPUAssignmentQuantityMap(poolsCPUSetMap) } else { var err error - poolsQuantityMap, err = state.GetSharedQuantityMapFromPodEntries(entries, nil, p.getContainerRequestedCores) + poolsQuantityMap, err = state.GetSharedQuantityMapFromPodEntries(machineState.GetNUMAResourcePackagePinnedCPUSet(), entries, nil, p.getContainerRequestedCores) if err != nil { return fmt.Errorf("GetSharedQuantityMapFromPodEntries failed with error: %v", err) } @@ -1023,6 +1089,9 @@ func (p *DynamicPolicy) adjustPoolsAndIsolatedEntries( availableCPUs := machineState.GetFilteredAvailableCPUSet(p.reservedCPUs, nil, state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckDedicatedNUMABindingNUMAExclusive)) + // rpPinnedCPUSet contains the pinned CPU sets for resource packages + rpPinnedCPUSet := machineState.GetResourcePackagePinnedCPUSet() + // deduct the cpus that is forbidden from being used by user containers. forbiddenPoolCPUs, err := state.GetUnitedPoolsCPUs(state.ForbiddenPools, entries) if err != nil { @@ -1035,13 +1104,15 @@ func (p *DynamicPolicy) adjustPoolsAndIsolatedEntries( return fmt.Errorf("reclaimOverlapShareRatio failed with error: %v", err) } - general.Infof("poolsQuantityMap: %#v, availableCPUs: %v, reclaimOverlapShareRatio: %#v", poolsQuantityMap, availableCPUs, reclaimOverlapShareRatio) + general.Infof("poolsQuantityMap: %#v, rpPinnedCPUSet: %v, availableCPUs: %v, reclaimOverlapShareRatio: %#v", poolsQuantityMap, rpPinnedCPUSet, availableCPUs, reclaimOverlapShareRatio) - poolsCPUSet, isolatedCPUSet, err := p.generatePoolsAndIsolation(poolsQuantityMap, isolatedQuantityMap, availableCPUs, reclaimOverlapShareRatio) + poolsCPUSet, isolatedCPUSet, err := p.groupAndAllocatePools(poolsQuantityMap, isolatedQuantityMap, availableCPUs, rpPinnedCPUSet, reclaimOverlapShareRatio) if err != nil { - return fmt.Errorf("generatePoolsAndIsolation failed with error: %v", err) + return fmt.Errorf("groupAndAllocatePools failed with error: %v", err) } + general.Infof("poolsCPUSet: %v, isolatedCPUSet: %v", poolsCPUSet, isolatedCPUSet) + err = p.reclaimOverlapNUMABinding(poolsCPUSet, entries) if err != nil { return fmt.Errorf("reclaimOverlapNUMABinding failed with error: %v", err) @@ -1061,6 +1132,77 @@ func (p *DynamicPolicy) adjustPoolsAndIsolatedEntries( return nil } +func (p *DynamicPolicy) groupAndAllocatePools( + poolsQuantityMap map[string]map[int]int, + isolatedQuantityMap map[string]map[string]int, + availableCPUs machine.CPUSet, + rpPinnedCPUSet map[string]machine.CPUSet, + reclaimOverlapShareRatio map[string]float64, +) (map[string]machine.CPUSet, map[string]map[string]machine.CPUSet, error) { + // 1. Separate pools into pinned and common + pinnedPoolsQuantityMap := make(map[string]map[int]int) + commonPoolsQuantityMap := make(map[string]map[int]int) + pinnedCPUSets := machine.NewCPUSet() + + // Accumulate all pinned cpusets from resource packages + for _, cset := range rpPinnedCPUSet { + pinnedCPUSets = pinnedCPUSets.Union(cset) + } + + for poolName, quantityMap := range poolsQuantityMap { + _, pkgName := rputil.UnwrapOwnerPoolName(poolName) + if pkgName != "" && !rpPinnedCPUSet[pkgName].IsEmpty() { + pinnedPoolsQuantityMap[poolName] = quantityMap + } else { + commonPoolsQuantityMap[poolName] = quantityMap + } + } + + // 2. Calculate common available CPUs + // For pools without pinned cpuset, availableCPUs needs to deduct allocated pinned cpuset + commonAvailableCPUs := availableCPUs.Difference(pinnedCPUSets) + + // 3. Process Pinned Pools + poolsCPUSet := make(map[string]machine.CPUSet) + + // Group pinned pools by package to call generatePoolsAndIsolation with correct constraints + pinnedPoolsByPkg := make(map[string]map[string]map[int]int) + for poolName, quantityMap := range pinnedPoolsQuantityMap { + _, pkgName := rputil.UnwrapOwnerPoolName(poolName) + if pinnedPoolsByPkg[pkgName] == nil { + pinnedPoolsByPkg[pkgName] = make(map[string]map[int]int) + } + pinnedPoolsByPkg[pkgName][poolName] = quantityMap + } + + for pkgName, poolsMap := range pinnedPoolsByPkg { + pkgAvailableCPUs := availableCPUs.Intersection(rpPinnedCPUSet[pkgName]) + // Call generatePoolsAndIsolation for this package + // Pass nil for isolatedQuantityMap as we assume isolated containers go to common + pPools, _, err := p.generatePoolsAndIsolation(poolsMap, nil, pkgAvailableCPUs, reclaimOverlapShareRatio) + if err != nil { + return nil, nil, fmt.Errorf("generatePoolsAndIsolation for pkg %s failed with error: %v", pkgName, err) + } + for k, v := range pPools { + poolsCPUSet[k] = v + } + } + + // 4. Process Common Pools + // Pass rpPinnedCPUSet to generatePoolsAndIsolation to handle pinned resources (Legacy comment removed) + cPools, cIso, err := p.generatePoolsAndIsolation(commonPoolsQuantityMap, isolatedQuantityMap, commonAvailableCPUs, reclaimOverlapShareRatio) + if err != nil { + return nil, nil, fmt.Errorf("generatePoolsAndIsolation failed with error: %v", err) + } + + for k, v := range cPools { + poolsCPUSet[k] = v + } + isolatedCPUSet := cIso + + return poolsCPUSet, isolatedCPUSet, nil +} + // reclaimOverlapNUMABinding unions calculated reclaim pool in empty NUMAs // with the intersection of previous reclaim pool and non-ramp-up dedicated_cores numa_binding containers func (p *DynamicPolicy) reclaimOverlapNUMABinding(poolsCPUSet map[string]machine.CPUSet, entries state.PodEntries) error { @@ -1303,6 +1445,24 @@ func (p *DynamicPolicy) applyPoolsAndIsolatedInfo(poolsCPUSet map[string]machine allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, err) } + + pkgName := allocationInfo.GetResourcePackageName() + if pkgName != "" { + numaSet, err := machine.Parse(allocationInfo.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint]) + if err != nil { + return fmt.Errorf("parse numaHintStr: %s failed with error: %v", + allocationInfo.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint], err) + } + + if numaSet.Size() == 1 { + targetNUMAID := numaSet.ToSliceNoSortInt()[0] + if pinnedSets, ok := machineState.GetNUMAResourcePackagePinnedCPUSet()[targetNUMAID]; ok { + if cpuSet, exists := pinnedSets[pkgName]; exists && cpuSet.Size() > 0 { + ownerPoolName = rputil.WrapOwnerPoolName(ownerPoolName, pkgName) + } + } + } + } } // else already in a numa_binding share pool or isolated } else { ownerPoolName = allocationInfo.GetPoolName() @@ -1437,7 +1597,8 @@ func (p *DynamicPolicy) generateNUMABindingPoolsCPUSetInPlace(poolsCPUSet map[st // 1. allocate isolated cpuset for pod/containers, and divide total cores evenly if not possible to allocate // 2. use the left cores to allocate among different pools // 3. apportion to other pools if reclaimed is disabled -func (p *DynamicPolicy) generatePoolsAndIsolation(poolsQuantityMap map[string]map[int]int, +func (p *DynamicPolicy) generatePoolsAndIsolation( + poolsQuantityMap map[string]map[int]int, isolatedQuantityMap map[string]map[string]int, availableCPUs machine.CPUSet, reclaimOverlapShareRatio map[string]float64) (poolsCPUSet map[string]machine.CPUSet, isolatedCPUSet map[string]map[string]machine.CPUSet, err error, diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers_test.go index dacf55d56e..f646ff77ee 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_allocation_handlers_test.go @@ -30,6 +30,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" "github.com/kubewharf/katalyst-core/pkg/util/machine" + rputil "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) func TestDynamicPolicy_getReclaimOverlapShareRatio(t *testing.T) { @@ -277,7 +278,7 @@ func TestAllocateSharedNumaBindingCPUs(t *testing.T) { 0: machine.NewCPUSet(0, 1), }, } - originAllocationInfo.SetSpecifiedNUMABindingNUMAID(0) + originAllocationInfo.SetSpecifiedNUMABindingNUMAID([]uint64{0}) policy.state.SetAllocationInfo(podUID, containerName, originAllocationInfo, false) @@ -347,3 +348,639 @@ func TestAllocateSharedNumaBindingCPUs(t *testing.T) { as.Contains(err.Error(), "larger than 1 NUMA") }) } + +func TestDynamicPolicy_allocateNumaBindingCPUs(t *testing.T) { + t.Parallel() + + type args struct { + numCPUs int + hint *pluginapi.TopologyHint + machineState state.NUMANodeMap + reqAnnotations map[string]string + } + tests := []struct { + name string + args args + want machine.CPUSet + wantErr bool + }{ + { + name: "normal allocation without pinning", + args: args{ + numCPUs: 2, + hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(0, 1, 2, 3), + }, + }, + reqAnnotations: map[string]string{ + apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + want: machine.NewCPUSet(0, 1), + wantErr: false, + }, + { + name: "allocation with pinned resource package", + args: args{ + numCPUs: 2, + hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(0, 1, 2, 3), + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(2, 3), + }, + }, + }, + }, + reqAnnotations: map[string]string{ + apiconsts.PodAnnotationResourcePackageKey: "pkg1", + apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + want: machine.NewCPUSet(2, 3), + wantErr: false, + }, + { + name: "allocation without pinned resource package but with other pinned packages", + args: args{ + numCPUs: 2, + hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(0, 1, 2, 3), + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(2, 3), + }, + }, + }, + }, + reqAnnotations: map[string]string{ + apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + want: machine.NewCPUSet(0, 1), + wantErr: false, + }, + { + name: "distribute evenly with pinned resource package", + args: args{ + numCPUs: 2, + hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0, 1}, + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(0, 1, 2, 3), + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(2, 3), + }, + }, + }, + 1: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(4, 5, 6, 7), + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(6, 7), + }, + }, + }, + }, + reqAnnotations: map[string]string{ + apiconsts.PodAnnotationResourcePackageKey: "pkg1", + apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, + apiconsts.PodAnnotationCPUEnhancementNumaNumber: "2", + apiconsts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: apiconsts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + want: machine.NewCPUSet(2, 6), + wantErr: false, + }, + { + name: "distribute evenly without pinned resource package but with other pinned packages", + args: args{ + numCPUs: 2, + hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0, 1}, + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(0, 1, 2, 3), + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(2, 3), + }, + }, + }, + 1: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(4, 5, 6, 7), + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(6, 7), + }, + }, + }, + }, + reqAnnotations: map[string]string{ + apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, + apiconsts.PodAnnotationCPUEnhancementNumaNumber: "2", + apiconsts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: apiconsts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + want: machine.NewCPUSet(0, 4), + wantErr: false, + }, + { + name: "distribute evenly with pinned resource package on some NUMAs but not others", + args: args{ + numCPUs: 2, + hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0, 1}, + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(0, 1, 2, 3), + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(2, 3), + }, + }, + }, + 1: &state.NUMANodeState{ + DefaultCPUSet: machine.NewCPUSet(4, 5, 6, 7), + // pkg1 is not pinned on NUMA 1 + ResourcePackageStates: map[string]*state.ResourcePackageState{}, + }, + }, + reqAnnotations: map[string]string{ + apiconsts.PodAnnotationResourcePackageKey: "pkg1", + apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, + apiconsts.PodAnnotationCPUEnhancementNumaNumber: "2", + apiconsts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: apiconsts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + want: machine.NewCPUSet(2, 4), + wantErr: false, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + as := require.New(t) + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) + as.Nil(err) + tmpDir, err := ioutil.TempDir("", "checkpoint-TestDynamicPolicy_allocateNumaBindingCPUs") + as.Nil(err) + + p, err := getTestDynamicPolicyWithInitialization(cpuTopology, tmpDir) + as.Nil(err) + p.reservedCPUs = machine.NewCPUSet() + t.Logf("Reserved: %s", p.reservedCPUs.String()) + + got, err := p.allocateNumaBindingCPUs(tt.args.numCPUs, tt.args.hint, tt.args.machineState, tt.args.reqAnnotations) + if (err != nil) != tt.wantErr { + t.Errorf("allocateNumaBindingCPUs() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !got.Equals(tt.want) { + t.Errorf("allocateNumaBindingCPUs() = %v, want %v", got, tt.want) + } + }) + } +} + +// TestDynamicPolicy_generateNUMABindingPoolsCPUSetInPlace verifies the logic of generating CPU sets for NUMA-binding pools. +// It simulates a scenario with specific CPU topology and available CPUs, checking if the allocation strategies (like packing full cores) work as expected. +// Topology Assumption for mustGenerateDummyCPUTopology(16, 2, 2): +// - 16 CPUs total, 2 NUMA Nodes (0 and 1). +// - HT enabled, siblings are separated by 16/2 = 8. +// - NUMA 0: CPUs {0, 1, 2, 3} (Logic Cores) and {8, 9, 10, 11} (Siblings). +// - Core 0: {0, 8}, Core 1: {1, 9}, Core 2: {2, 10}, Core 3: {3, 11}. +// +// - NUMA 1: CPUs {4, 5, 6, 7} (Logic Cores) and {12, 13, 14, 15} (Siblings). +func TestDynamicPolicy_generateNUMABindingPoolsCPUSetInPlace(t *testing.T) { + t.Parallel() + + type args struct { + poolsCPUSet map[string]machine.CPUSet + poolsQuantityMap map[string]map[int]int + availableCPUs machine.CPUSet + } + tests := []struct { + name string + cpuTopology *machine.CPUTopology + args args + wantPools map[string]machine.CPUSet + wantLeft machine.CPUSet + wantErr bool + enableReclaim bool + }{ + // Case 1: Single pool allocation in NUMA 0. + // Available CPUs: {8, 9, 10} (All in NUMA 0). + // - Core 0: {0, 8} (Only 8 available). + // - Core 1: {1, 9} (Only 9 available). + // - Core 2: {2, 10} (Only 10 available). + // Request: pool1 needs 2 CPUs from NUMA 0. + // Allocation: No full cores available, so it picks {8, 9}. + { + name: "single pool, ample cpus", + cpuTopology: mustGenerateDummyCPUTopology(16, 2, 2), + args: args{ + poolsCPUSet: make(map[string]machine.CPUSet), + poolsQuantityMap: map[string]map[int]int{ + "pool1": { + 0: 2, + }, + }, + availableCPUs: machine.NewCPUSet(8, 9, 10), + }, + wantPools: map[string]machine.CPUSet{ + "pool1": machine.NewCPUSet(8, 9), + }, + wantLeft: machine.NewCPUSet(10), + wantErr: false, + enableReclaim: true, + }, + // Case 2: Multiple pools allocation across NUMA 0 and NUMA 1. + // Available CPUs: {2, 3, 4, 5, 10}. + // NUMA 0 Available: {2, 3, 10}. + // - Core 2: {2, 10} (Both available -> Full Core). + // - Core 3: {3, 11} (Only 3 available). + // NUMA 1 Available: {4, 5}. + // - Core 4: {4, 12} (Only 4 available). + // - Core 5: {5, 13} (Only 5 available). + // Request: pool1 needs 2 from NUMA 0; pool2 needs 2 from NUMA 1. + // Allocation: + // - pool1 (NUMA 0): Prefers full core {2, 10}. + // - pool2 (NUMA 1): Takes {4, 5}. + { + name: "multiple pools, ample cpus", + cpuTopology: mustGenerateDummyCPUTopology(16, 2, 2), + args: args{ + poolsCPUSet: make(map[string]machine.CPUSet), + poolsQuantityMap: map[string]map[int]int{ + "pool1": { + 0: 2, + }, + "pool2": { + 1: 2, + }, + }, + availableCPUs: machine.NewCPUSet(2, 3, 4, 5, 10), + }, + wantPools: map[string]machine.CPUSet{ + "pool1": machine.NewCPUSet(2, 10), + "pool2": machine.NewCPUSet(4, 5), + }, + wantLeft: machine.NewCPUSet(3), + wantErr: false, + enableReclaim: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + as := require.New(t) + + tmpDir, err := ioutil.TempDir("", "checkpoint-TestDynamicPolicy_generateNUMABindingPoolsCPUSetInPlace") + as.Nil(err) + defer os.RemoveAll(tmpDir) // Added cleanup + + p, err := getTestDynamicPolicyWithInitialization(tt.cpuTopology, tmpDir) + as.Nil(err) + + // Clear state to ensure clean slate + p.state.SetPodEntries(state.PodEntries{}, false) + p.reservedCPUs = machine.NewCPUSet() + + p.dynamicConfig.GetDynamicConfiguration().EnableReclaim = tt.enableReclaim + + gotLeft, err := p.generateNUMABindingPoolsCPUSetInPlace(tt.args.poolsCPUSet, tt.args.poolsQuantityMap, tt.args.availableCPUs) + if (err != nil) != tt.wantErr { + t.Errorf("generateNUMABindingPoolsCPUSetInPlace() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr { + if !reflect.DeepEqual(tt.args.poolsCPUSet, tt.wantPools) { + t.Errorf("generateNUMABindingPoolsCPUSetInPlace() poolsCPUSet = %v, want %v", tt.args.poolsCPUSet, tt.wantPools) + } + if !gotLeft.Equals(tt.wantLeft) { + t.Errorf("generateNUMABindingPoolsCPUSetInPlace() gotLeft = %v, want %v", gotLeft, tt.wantLeft) + } + } + }) + } +} + +func TestDynamicPolicy_adjustPoolsAndIsolatedEntries_Pinned(t *testing.T) { + t.Parallel() + as := require.New(t) + + // Setup topology: 2 sockets, 8 cores each. Total 16 CPUs. + // S0: 0-7, S1: 8-15. + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) + as.Nil(err) + + tmpDir, err := ioutil.TempDir("", "checkpoint-TestDynamicPolicy_adjustPoolsAndIsolatedEntries_Pinned") + as.Nil(err) + defer os.RemoveAll(tmpDir) + + p, err := getTestDynamicPolicyWithInitialization(cpuTopology, tmpDir) + as.Nil(err) + + // Clear reserved CPUs to ensure deterministic allocation for test + p.reservedCPUs = machine.NewCPUSet() + + // Enable Reclaim + p.dynamicConfig.GetDynamicConfiguration().EnableReclaim = true + // Disable overlap to ensure pool2 gets exactly what it requests (4 cores) + // If enabled, it would take all available cores (12) which is also correct behavior but makes checking "exactly 4" fail. + // We want to verify it can successfully allocate 4 from the remaining unpinned set. + p.state.SetAllowSharedCoresOverlapReclaimedCores(false, true) + + // Setup Pinned CPUSets + // pkg1 pinned to {0, 1} (NUMA 0) + // pkg2 pinned to {2, 3} (NUMA 0) -- BUT no pools use it! + machineState := p.state.GetMachineState() + for numaID, numaState := range machineState { + if numaID == 0 { + if numaState.ResourcePackageStates == nil { + numaState.ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + numaState.ResourcePackageStates["pkg1"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(0, 1)} + numaState.ResourcePackageStates["pkg2"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(2, 3)} + } + } + p.state.SetMachineState(machineState, false) + + // Setup Pools Quantity + // pkg1/pool1: 2 cores (should take 0, 1) + // pool2 (common): 4 cores (should take from available excluding 0, 1 AND 2, 3) + // commonAvailableCPUs should be {4-15}. + // pool2 needs 4 cores. It should get 4, 5, 6, 7 (if taking from NUMA 0 first) or spread. + // Since NUMA 0 has 4,5,6,7 available (4 cores). + // NUMA 1 has 8-15 available (8 cores). + // pool2 is FakedNUMAID. + poolsQuantityMap := map[string]map[int]int{ + "pkg1/pool1": { + commonstate.FakedNUMAID: 2, + }, + "pool2": { + commonstate.FakedNUMAID: 4, + }, + commonstate.PoolNameReclaim: { + commonstate.FakedNUMAID: 0, + }, + } + + isolatedQuantityMap := map[string]map[string]int{} + + // Seed entries for Reclaim pool (needed for reclaimOverlapNUMABinding check) + // And seed containers to prevent cleanPools from removing the pools + entries := state.PodEntries{ + commonstate.PoolNameReclaim: { + commonstate.FakedContainerName: &state.AllocationInfo{ + AllocationMeta: commonstate.GenerateGenericPoolAllocationMeta(commonstate.PoolNameReclaim), + AllocationResult: machine.NewCPUSet(14, 15), + OriginalAllocationResult: machine.NewCPUSet(14, 15), + TopologyAwareAssignments: map[int]machine.CPUSet{1: machine.NewCPUSet(14, 15)}, + }, + }, + "pod1": { + "container1": &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: "pod1", + PodNamespace: "default", + PodName: "pod1", + ContainerName: "container1", + OwnerPoolName: "pkg1/pool1", + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + }, + }, + "pod2": { + "container2": &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: "pod2", + PodNamespace: "default", + PodName: "pod2", + ContainerName: "container2", + OwnerPoolName: "pool2", + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + }, + }, + } + + err = p.adjustPoolsAndIsolatedEntries(poolsQuantityMap, isolatedQuantityMap, entries, machineState, false) + as.Nil(err) + + updatedEntries := p.state.GetPodEntries() + + // Verify Results + // pkg1/pool1 should be {0, 1} + pool1Entry := updatedEntries["pkg1/pool1"][commonstate.FakedContainerName] + as.NotNil(pool1Entry) + as.True(pool1Entry.AllocationResult.Equals(machine.NewCPUSet(0, 1)), "pool1 should have pinned CPUs 0,1, got %s", pool1Entry.AllocationResult.String()) + + // pool2 should NOT contain 0, 1 (used by pkg1) AND should NOT contain 2, 3 (reserved by pkg2 even if unused) + pool2Entry := updatedEntries["pool2"][commonstate.FakedContainerName] + as.NotNil(pool2Entry) + // Check intersection with pkg1 pinned + as.False(pool2Entry.AllocationResult.Intersection(machine.NewCPUSet(0, 1)).Size() > 0, "pool2 should not use pinned CPUs 0,1, got %s", pool2Entry.AllocationResult.String()) + // Check intersection with pkg2 pinned (unused but reserved) + as.False(pool2Entry.AllocationResult.Intersection(machine.NewCPUSet(2, 3)).Size() > 0, "pool2 should not use pinned CPUs 2,3 (reserved for pkg2), got %s", pool2Entry.AllocationResult.String()) + + // Verify pool2 size + as.Equal(4, pool2Entry.AllocationResult.Size(), "pool2 should have 4 cores") +} + +// TestDynamicPolicy_groupAndAllocatePools tests the groupAndAllocatePools function. +// It verifies that pools are correctly grouped into pinned and common categories, +// and that CPUs are allocated according to availability and constraints. +func TestDynamicPolicy_groupAndAllocatePools(t *testing.T) { + t.Parallel() + + type args struct { + poolsQuantityMap map[string]map[int]int + isolatedQuantityMap map[string]map[string]int + availableCPUs machine.CPUSet + rpPinnedCPUSet map[string]machine.CPUSet + reclaimOverlapShareRatio map[string]float64 + } + tests := []struct { + name string + args args + wantPools map[string]machine.CPUSet + wantIsolated map[string]map[string]machine.CPUSet + wantErr bool + }{ + { + name: "Scenario 1: Common Pools Only - Verifies that when no pools are pinned, all pools are treated as common and allocated from the general available CPU set.", + args: args{ + poolsQuantityMap: map[string]map[int]int{ + "pool1": {commonstate.FakedNUMAID: 2}, + }, + availableCPUs: machine.NewCPUSet(0, 1, 2, 3), + }, + wantPools: map[string]machine.CPUSet{ + "pool1": machine.NewCPUSet(0, 1), + }, + wantErr: false, + }, + { + name: "Scenario 2: Pinned Pools Only - Verifies that pools belonging to a resource package are correctly identified and allocated exclusively from that package's pinned CPU set.", + args: args{ + poolsQuantityMap: map[string]map[int]int{ + rputil.WrapOwnerPoolName("pool1", "pkg1"): {commonstate.FakedNUMAID: 2}, + }, + availableCPUs: machine.NewCPUSet(0, 1, 2, 3), + rpPinnedCPUSet: map[string]machine.CPUSet{ + "pkg1": machine.NewCPUSet(0, 1), + }, + }, + wantPools: map[string]machine.CPUSet{ + rputil.WrapOwnerPoolName("pool1", "pkg1"): machine.NewCPUSet(0, 1), + }, + wantErr: false, + }, + { + name: "Scenario 3: Mixed Pinned and Common Pools - Verifies that the function correctly splits pinned and common pools, allocating pinned pools from their specific sets and common pools from the remaining available CPUs.", + args: args{ + poolsQuantityMap: map[string]map[int]int{ + rputil.WrapOwnerPoolName("pool1", "pkg1"): {commonstate.FakedNUMAID: 2}, + "pool2": {commonstate.FakedNUMAID: 2}, + }, + availableCPUs: machine.NewCPUSet(0, 1, 2, 3), + rpPinnedCPUSet: map[string]machine.CPUSet{ + "pkg1": machine.NewCPUSet(0, 1), + }, + }, + wantPools: map[string]machine.CPUSet{ + rputil.WrapOwnerPoolName("pool1", "pkg1"): machine.NewCPUSet(0, 1), + "pool2": machine.NewCPUSet(2, 3), + }, + wantErr: false, + }, + { + name: "Scenario 4: Isolated Containers - Verifies that isolated containers are allocated dedicated CPUs from the common available set alongside common pools.", + args: args{ + poolsQuantityMap: map[string]map[int]int{ + "pool1": {commonstate.FakedNUMAID: 2}, + }, + isolatedQuantityMap: map[string]map[string]int{ + "pod1": {"container1": 2}, + }, + availableCPUs: machine.NewCPUSet(0, 1, 2, 3), + }, + wantPools: map[string]machine.CPUSet{ + "pool1": machine.NewCPUSet(2, 3), + }, + wantIsolated: map[string]map[string]machine.CPUSet{ + "pod1": {"container1": machine.NewCPUSet(0, 1)}, + }, + wantErr: false, + }, + { + name: "Scenario 5: Error - Pinned Pool Insufficient CPUs - Verifies that the function degrades gracefully and allocates available CPUs (partial) if a pinned pool requests more CPUs than are available in its pinned set.", + args: args{ + poolsQuantityMap: map[string]map[int]int{ + rputil.WrapOwnerPoolName("pool1", "pkg1"): {commonstate.FakedNUMAID: 4}, + }, + availableCPUs: machine.NewCPUSet(0, 1, 2, 3), + rpPinnedCPUSet: map[string]machine.CPUSet{ + "pkg1": machine.NewCPUSet(0, 1), + }, + }, + wantPools: map[string]machine.CPUSet{ + rputil.WrapOwnerPoolName("pool1", "pkg1"): machine.NewCPUSet(0, 1), + }, + wantErr: false, + }, + { + name: "Scenario 6: Error - Common Pool Insufficient CPUs - Verifies that the function degrades gracefully and allocates available CPUs (partial) if common pools request more CPUs than are available in the shared pool.", + args: args{ + poolsQuantityMap: map[string]map[int]int{ + "pool1": {commonstate.FakedNUMAID: 4}, + }, + availableCPUs: machine.NewCPUSet(0, 1, 2, 3), + rpPinnedCPUSet: map[string]machine.CPUSet{ + "pkg1": machine.NewCPUSet(0, 1), + }, + }, + wantPools: map[string]machine.CPUSet{ + "pool1": machine.NewCPUSet(2, 3), + }, + wantErr: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + as := require.New(t) + + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 2) + as.Nil(err) + + tmpDir, err := ioutil.TempDir("", "checkpoint-TestDynamicPolicy_groupAndAllocatePools") + as.Nil(err) + defer os.RemoveAll(tmpDir) + + p, err := getTestDynamicPolicyWithInitialization(cpuTopology, tmpDir) + as.Nil(err) + + // Clear state + p.state.SetPodEntries(state.PodEntries{}, false) + p.reservedCPUs = machine.NewCPUSet() + + gotPools, gotIsolated, err := p.groupAndAllocatePools(tt.args.poolsQuantityMap, tt.args.isolatedQuantityMap, tt.args.availableCPUs, tt.args.rpPinnedCPUSet, tt.args.reclaimOverlapShareRatio) + if (err != nil) != tt.wantErr { + t.Errorf("groupAndAllocatePools() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr { + // Filter out system pools (reclaim, reserve) for comparison + filteredPools := make(map[string]machine.CPUSet) + for k, v := range gotPools { + if k != commonstate.PoolNameReclaim && k != commonstate.PoolNameReserve { + filteredPools[k] = v + } + } + + if !reflect.DeepEqual(filteredPools, tt.wantPools) { + t.Errorf("groupAndAllocatePools() gotPools = %v, want %v", filteredPools, tt.wantPools) + } + + if len(gotIsolated) == 0 && len(tt.wantIsolated) == 0 { + // Both empty/nil, treat as equal + } else if !reflect.DeepEqual(gotIsolated, tt.wantIsolated) { + t.Errorf("groupAndAllocatePools() gotIsolated = %v, want %v", gotIsolated, tt.wantIsolated) + } + } + }) + } +} + +func mustGenerateDummyCPUTopology(numCPUs, numSockets, numaNum int) *machine.CPUTopology { + topo, err := machine.GenerateDummyCPUTopology(numCPUs, numSockets, numaNum) + if err != nil { + panic(err) + } + return topo +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go index d1bdfd8e22..34255eecd9 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go @@ -421,10 +421,7 @@ func (p *DynamicPolicy) clearResidualState(_ *coreconfig.Configuration, return } - p.state.SetPodEntries(podEntries, false) - p.state.SetMachineState(updatedMachineState, false) - - err = p.adjustAllocationEntries(false) + err = p.adjustAllocationEntries(podEntries, updatedMachineState, false) if err != nil { general.ErrorS(err, "adjustAllocationEntries failed") } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go index 78996fd447..8a94a0d0f7 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go @@ -161,7 +161,9 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Conte (*commonstate.AllocationMeta).CheckDedicatedNUMABindingNUMAExclusive)) var extraErr error - hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceCPU), p.extraStateFileAbsPath, availableNUMAs) + hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, p.extraStateFileAbsPath, availableNUMAs, []v1.ResourceName{ + v1.ResourceCPU, + }) if extraErr != nil { general.Infof("pod: %s/%s, container: %s GetHintsFromExtraStateFile failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, extraErr) @@ -303,10 +305,10 @@ func (p *DynamicPolicy) calculateHints( maskCount := mask.Count() if maskCount < minNUMAsCountNeeded { return - } else if numaBinding && !numaExclusive && numaNumber <= 1 && maskCount > 1 { + } else if numaBinding && !numaExclusive && maskCount > 1 && numaNumber <= 1 { // because it's hard to control memory allocation accurately, // we only support numa_binding but not exclusive container with request smaller than 1 NUMA - // pods with distribute evenly across numa annotation can occupy more than 1 NUMA + // pods with numa number more than 1 can occupy more than 1 NUMA return } @@ -371,7 +373,6 @@ func (p *DynamicPolicy) calculateHints( if numaNumber != 0 { minAffinitySize = numaNumber } - // Update hint to be preferred if they have minimum number of NUMA nodes for _, hint := range availableNumaHints { if len(hint.Nodes) == minAffinitySize { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner.go index f30f78109a..1fae443d43 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner.go @@ -29,6 +29,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/irqtuner" irqutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/irqtuner/utils" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" @@ -137,13 +138,33 @@ func (p *DynamicPolicy) getPodContainerInfos(podUID string, entry state.Containe return cis, nil } +// getPinnedResourcePackageIRQForbiddenCPUSet gets the irq forbidden cpuset from the pinned resource package +func (p *DynamicPolicy) getPinnedResourcePackageIRQForbiddenCPUSet() machine.CPUSet { + if p.conf.IRQForbiddenPinnedResourcePackageAttributeSelector == nil { + return machine.NewCPUSet() + } + + irqForbiddenCPUSet := cpuutil.GetAggResourcePackagePinnedCPUSet(p.conf.IRQForbiddenPinnedResourcePackageAttributeSelector, p.state.GetMachineState()) + if !irqForbiddenCPUSet.IsEmpty() { + general.InfofV(4, "irq forbidden cpuset from pinned resource package is %v", irqForbiddenCPUSet.String()) + } + return irqForbiddenCPUSet +} + // GetIRQForbiddenCores retrieves the cpu set of cores that are forbidden for irq binding. +// The forbidden cores include: +// 1. Reserved CPUs (system reserved). +// 2. CPUs pinned by specific resource packages (as defined by the configuration). func (p *DynamicPolicy) GetIRQForbiddenCores() (machine.CPUSet, error) { forbiddenCores := machine.NewCPUSet() // get irq forbidden cores from cpu plugin checkpoint forbiddenCores = forbiddenCores.Union(p.reservedCPUs) + // get irq forbidden cores from pinned resource package + irqForbiddenCPUSet := p.getPinnedResourcePackageIRQForbiddenCPUSet() + forbiddenCores = forbiddenCores.Union(irqForbiddenCPUSet) + general.Infof("get the irq forbidden cores %v", forbiddenCores) return forbiddenCores, nil } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner_test.go index 5a94a11140..1cfa82a46b 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_irq_tuner_test.go @@ -25,16 +25,22 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" irqutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/irqtuner/utils" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" podagent "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" + "github.com/kubewharf/katalyst-core/pkg/metaserver/npd" + "github.com/kubewharf/katalyst-core/pkg/metaserver/resourcepackage" cgroupcommon "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" "github.com/kubewharf/katalyst-core/pkg/util/machine" ) @@ -259,14 +265,98 @@ func TestDynamicPolicy_ListContainers(t *testing.T) { func TestDynamicPolicy_GetIRQForbiddenCores(t *testing.T) { t.Parallel() - as := require.New(t) - policyImpl := newTestDynamicPolicy(t, "get-irq-forbidden-cores") + tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetIRQForbiddenCores") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) - reservedCPU := []int{2, 4} - policyImpl.reservedCPUs = machine.NewCPUSet(reservedCPU...) - forbidden, err := policyImpl.GetIRQForbiddenCores() - as.NoError(err) - as.True(forbidden.IsSubsetOf(policyImpl.reservedCPUs)) + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 2) + require.NoError(t, err) + + policy, err := getTestDynamicPolicyWithoutInitialization(cpuTopology, tmpDir) + require.NoError(t, err) + + // Mock reserved CPUs + policy.reservedCPUs = machine.NewCPUSet(0, 1) + + // Prepare resource packages in NPD + npdFetcher := &npd.DummyNPDFetcher{ + NPD: &nodev1alpha1.NodeProfileDescriptor{ + Status: nodev1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []nodev1alpha1.ScopedNodeMetrics{ + { + Scope: "resource-package", + Metrics: []nodev1alpha1.MetricValue{ + { + MetricName: string(v1.ResourceCPU), + MetricLabels: map[string]string{ + "package-name": "pkg1", + "numa-id": "0", + "pinned-cpuset": "true", + "type": "forbidden", + }, + Value: *resource.NewQuantity(2, resource.DecimalSI), + Aggregator: func() *nodev1alpha1.Aggregator { a := nodev1alpha1.AggregatorMin; return &a }(), + }, + { + MetricName: string(v1.ResourceCPU), + MetricLabels: map[string]string{ + "package-name": "pkg2", + "numa-id": "1", + "pinned-cpuset": "true", + "type": "allowed", + }, + Value: *resource.NewQuantity(2, resource.DecimalSI), + Aggregator: func() *nodev1alpha1.Aggregator { a := nodev1alpha1.AggregatorMin; return &a }(), + }, + }, + }, + }, + }, + }, + } + policy.resourcePackageManager = resourcepackage.NewCachedResourcePackageManager(resourcepackage.NewResourcePackageManager(npdFetcher)) + stopCh := make(chan struct{}) + defer close(stopCh) + // Run cached manager to populate cache + go policy.resourcePackageManager.Run(stopCh) + time.Sleep(100 * time.Millisecond) + + // Mock machine state to include pinned CPUs for packages + // Note: In a real scenario, this state is populated by policy logic. + // Here we need to manually inject it into the state if we want GetAggResourcePackagePinnedCPUSet to find it. + // However, GetAggResourcePackagePinnedCPUSet reads from policy.state.GetMachineState(). + // We need to update the machine state with ResourcePackagePinnedCPUSet. + + // Assuming NUMA 0 has pkg1 pinned to CPUs 2, 3 + // Assuming NUMA 1 has pkg2 pinned to CPUs 4, 5 + machineState := policy.state.GetMachineState() + machineState[0].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg1": { + PinnedCPUSet: machine.NewCPUSet(2, 3), + Attributes: map[string]string{"type": "forbidden"}, + }, + } + machineState[1].ResourcePackageStates = map[string]*state.ResourcePackageState{ + "pkg2": { + PinnedCPUSet: machine.NewCPUSet(4, 5), + Attributes: map[string]string{"type": "other"}, + }, + } + policy.state.SetMachineState(machineState, false) + + // Configure attribute selector + selector, err := labels.Parse("type=forbidden") + require.NoError(t, err) + policy.conf.IRQForbiddenPinnedResourcePackageAttributeSelector = selector + + // Run the test + forbiddenCores, err := policy.GetIRQForbiddenCores() + require.NoError(t, err) + + // Expected: Reserved CPUs (0, 1) + Pinned CPUs for pkg1 (2, 3) = (0, 1, 2, 3) + // pkg2 is excluded because type=allowed != type=forbidden + expected := machine.NewCPUSet(0, 1, 2, 3) + assert.True(t, expected.Equals(forbiddenCores), "expected %v, got %v", expected, forbiddenCores) } func TestDynamicPolicy_GetExclusiveIRQCPUSet(t *testing.T) { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package.go new file mode 100644 index 0000000000..33891aa9c9 --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package.go @@ -0,0 +1,392 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dynamicpolicy + +import ( + "context" + "fmt" + "math" + "strconv" + "time" + + "k8s.io/apimachinery/pkg/util/sets" + + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/general" + "github.com/kubewharf/katalyst-core/pkg/util/machine" + "github.com/kubewharf/katalyst-core/pkg/util/metric" + utilresourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" +) + +func (p *DynamicPolicy) syncResourcePackagePinnedCPUSet() { + startTime := time.Now() + p.Lock() + defer func() { + p.Unlock() + general.InfoS("finished", + "duration", time.Since(startTime).String(), + ) + }() + + resourcePackages, err := p.resourcePackageManager.NodeResourcePackages(context.Background()) + if err != nil { + general.Errorf("failed to get node resource packages: %v", err) + _ = p.emitter.StoreInt64(util.MetricNameSyncResourcePackagePinnedCPUSetFailed, 1, metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}) + return + } + + pinnedCPUSetSize, err := resourcePackages.ListAllPinnedCPUSetSize() + if err != nil { + general.Errorf("failed to get all pinned cpuset size: %v", err) + _ = p.emitter.StoreInt64(util.MetricNameSyncResourcePackagePinnedCPUSetFailed, 1, metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}) + return + } + + interruptAllocationInfo := p.state.GetAllocationInfo(commonstate.PoolNameInterrupt, commonstate.FakedContainerName) + + machineState := p.state.GetMachineState() + podEntries := p.state.GetPodEntries() + + newResourcePackageStateMap := make(map[int]map[string]*state.ResourcePackageState) + stateChanged := false + + for _, numaID := range p.machineInfo.CPUDetails.NUMANodes().ToSliceInt() { + numaState := machineState[numaID] + if numaState == nil { + continue + } + + newPinnedMap, changed, err := p.syncNumaResourcePackage(numaID, numaState, pinnedCPUSetSize, interruptAllocationInfo, resourcePackages) + if err != nil { + general.Errorf("failed to sync resource package for numa %d: %v", numaID, err) + _ = p.emitter.StoreInt64(util.MetricNameSyncResourcePackagePinnedCPUSetFailed, 1, metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}, + metrics.MetricTag{Key: "numa_id", Val: strconv.Itoa(numaID)}) + return + } + + if newPinnedMap != nil { + newResourcePackageStateMap[numaID] = newPinnedMap + } + + if changed { + stateChanged = true + } + } + + if stateChanged { + general.InfoS("resource package pinned cpuset changed, updating state", "newResourcePackageStateMap", newResourcePackageStateMap) + for numaID, pkgs := range newResourcePackageStateMap { + if machineState[numaID] != nil { + machineState[numaID].ResourcePackageStates = pkgs + } + } + + err = p.adjustAllocationEntries(podEntries, machineState, true) + if err != nil { + general.Errorf("adjustAllocationEntries failed: %v", err) + return + } + + general.InfoS("syncResourcePackagePinnedCPUSet finished with state changed") + } else { + general.InfoS("syncResourcePackagePinnedCPUSet finished without state changed") + } + + for numaID, pkgs := range newResourcePackageStateMap { + for pkgName, rpState := range pkgs { + if rpState != nil { + _ = p.emitter.StoreInt64(util.MetricNameResourcePackagePinnedCPUSetSize, int64(rpState.PinnedCPUSet.Size()), metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "numa_id", Val: strconv.Itoa(numaID)}, + metrics.MetricTag{Key: "package_name", Val: pkgName}) + } + } + } +} + +func (p *DynamicPolicy) syncNumaResourcePackage( + numaID int, + numaState *state.NUMANodeState, + pinnedCPUSetSize map[int]map[string]int, + interruptAllocationInfo *state.AllocationInfo, + resourcePackages utilresourcepackage.NUMAResourcePackageItems, +) (map[string]*state.ResourcePackageState, bool, error) { + mandatoryCPUsMap := make(map[string]machine.CPUSet) + sharedRequestsMap := make(map[string]float64) + activePackages := sets.NewString() + sharedPodsMap := make(map[string][]*state.AllocationInfo) + stateChanged := false + newResourcePackageState := make(map[string]*state.ResourcePackageState) + pinnedPackages := sets.NewString() + + for _, containerEntries := range numaState.PodEntries { + if containerEntries.IsPoolEntry() { + continue + } + for _, allocationInfo := range containerEntries { + if allocationInfo == nil { + continue + } + + pkgName := allocationInfo.GetResourcePackageName() + if pkgName == "" { + continue + } + + activePackages.Insert(pkgName) + + if allocationInfo.CheckDedicated() { + if _, ok := mandatoryCPUsMap[pkgName]; !ok { + mandatoryCPUsMap[pkgName] = machine.NewCPUSet() + } + + // Ensure we only count CPUs on this NUMA node, handling cross-NUMA cases + dedicatedCPUs := allocationInfo.AllocationResult.Intersection(p.machineInfo.CPUDetails.CPUsInNUMANodes(numaID)) + mandatoryCPUsMap[pkgName] = mandatoryCPUsMap[pkgName].Union(dedicatedCPUs) + } else if allocationInfo.CheckSharedNUMABinding() { + sharedRequestsMap[pkgName] += allocationInfo.RequestQuantity + if sharedPodsMap[pkgName] == nil { + sharedPodsMap[pkgName] = make([]*state.AllocationInfo, 0) + } + sharedPodsMap[pkgName] = append(sharedPodsMap[pkgName], allocationInfo) + } + } + } + + availableCPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(numaID).Difference(p.reservedCPUs) + // exclude interrupt cpuset from available cpuset + if interruptAllocationInfo != nil { + availableCPUs = availableCPUs.Difference(interruptAllocationInfo.AllocationResult) + } + + allMandatoryPinned := machine.NewCPUSet() + allSharedPinnedRequest := float64(0) + if pkgs, ok := pinnedCPUSetSize[numaID]; ok { + for pkg := range pkgs { + if cset, exists := mandatoryCPUsMap[pkg]; exists { + allMandatoryPinned = allMandatoryPinned.Union(cset) + } + + if sharedReq, exists := sharedRequestsMap[pkg]; exists { + allSharedPinnedRequest += sharedReq + } + } + } + allocatedDedicatedNonPinned := numaState.AllocatedCPUSet.Difference(allMandatoryPinned) + + if pkgs, ok := resourcePackages[numaID]; ok { + for pkgName := range pkgs { + var targetSize int + isPinned := false + if sizeMap, ok := pinnedCPUSetSize[numaID]; ok { + if size, ok := sizeMap[pkgName]; ok { + targetSize = size + isPinned = true + } + } + + var currentState *state.ResourcePackageState + var currentPinned machine.CPUSet + if rpState, ok := numaState.ResourcePackageStates[pkgName]; ok && rpState != nil { + currentState = rpState + currentPinned = rpState.PinnedCPUSet + } + + var newPinned machine.CPUSet + + if isPinned { + pinnedPackages.Insert(pkgName) + mandatoryCPUs := mandatoryCPUsMap[pkgName] + sharedReq := sharedRequestsMap[pkgName] + + minSize := mandatoryCPUs.Size() + int(math.Ceil(sharedReq)) + if minSize > targetSize { + targetSize = minSize + } + + otherPinned := machine.NewCPUSet() + for otherPkg, rpState := range numaState.ResourcePackageStates { + if otherPkg != pkgName && rpState != nil { + otherPinned = otherPinned.Union(rpState.PinnedCPUSet) + } + } + + availableForPkg := availableCPUs.Difference(allocatedDedicatedNonPinned).Difference(otherPinned) + + if currentPinned.Size() < targetSize { + delta := targetSize - currentPinned.Size() + candidates := availableForPkg.Difference(currentPinned) + newCPUs, err := calculator.TakeByTopology(p.machineInfo, candidates, delta, true) + if err != nil { + general.Errorf("failed to expand pinned cpuset for pkg %s: %v", pkgName, err) + _ = p.emitter.StoreInt64(util.MetricNameSyncNumaResourcePackageFailed, 1, metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}, + metrics.MetricTag{Key: "numa_id", Val: strconv.Itoa(numaID)}, + metrics.MetricTag{Key: "package_name", Val: pkgName}, + metrics.MetricTag{Key: "reason", Val: "expand_failed"}) + newPinned = currentPinned + } else { + newPinned = currentPinned.Union(newCPUs) + } + } else if currentPinned.Size() > targetSize { + candidates := currentPinned.Difference(mandatoryCPUs) + keepSize := targetSize - mandatoryCPUs.Size() + + if keepSize > 0 { + kept, err := calculator.TakeByTopology(p.machineInfo, candidates, keepSize, true) + if err != nil { + general.Errorf("failed to shrink (select kept) for pkg %s: %v", pkgName, err) + _ = p.emitter.StoreInt64(util.MetricNameSyncNumaResourcePackageFailed, 1, metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)}, + metrics.MetricTag{Key: "numa_id", Val: strconv.Itoa(numaID)}, + metrics.MetricTag{Key: "package_name", Val: pkgName}, + metrics.MetricTag{Key: "reason", Val: "shrink_failed"}) + newPinned = currentPinned + } else { + newPinned = mandatoryCPUs.Union(kept) + } + } else { + newPinned = mandatoryCPUs + } + } else { + newPinned = currentPinned + } + } else { + newPinned = machine.NewCPUSet() + } + + newState := &state.ResourcePackageState{ + PinnedCPUSet: newPinned, + Attributes: resourcePackages.GetAttributesMap(numaID, pkgName), + } + newResourcePackageState[pkgName] = newState + + if !newState.Equals(currentState) { + general.InfoS("resource package state changed", + "numaID", numaID, + "pkgName", pkgName, + "oldState", currentState, + "newState", newState) + stateChanged = true + } else { + general.InfoS("resource package state not changed", + "numaID", numaID, + "pkgName", pkgName, + "state", currentState) + } + } + } + + for pkgName, rpState := range numaState.ResourcePackageStates { + if rpState == nil { + continue + } + // Check if resourcePackages[numaID] exists before accessing inner map + if pkgs, ok := resourcePackages[numaID]; !ok || pkgs == nil { + // If NUMA ID is not in config, check if package is active + if activePackages.Has(pkgName) { + newResourcePackageState[pkgName] = rpState.Clone() + if rpState.PinnedCPUSet.Size() > 0 { + pinnedPackages.Insert(pkgName) + } + general.Errorf("resource package %s removed from config (NUMA %d missing) but still has pods", pkgName, numaID) + } else { + stateChanged = true + general.Infof("resource package %s removed from numa %d (config missing)", pkgName, numaID) + } + } else { + // If NUMA ID exists, check if package is in config + if _, ok := pkgs[pkgName]; !ok { + if activePackages.Has(pkgName) { + newResourcePackageState[pkgName] = rpState.Clone() + if rpState.PinnedCPUSet.Size() > 0 { + pinnedPackages.Insert(pkgName) + } + general.Errorf("resource package %s removed from config but still has pods on numa %d", pkgName, numaID) + } else { + stateChanged = true + general.Infof("resource package %s removed from numa %d", pkgName, numaID) + } + } + } + } + + if stateChanged { + // Validate resource availability for non-pinned shared cores + totalPinnedCPUSet := machine.NewCPUSet() + for _, rpState := range newResourcePackageState { + if rpState != nil { + totalPinnedCPUSet = totalPinnedCPUSet.Union(rpState.PinnedCPUSet) + } + } + + availableForNonPinned := availableCPUs.Difference(totalPinnedCPUSet).Difference(numaState.AllocatedCPUSet) + totalNonPinnedRequest := 0.0 + hasNoPackagePod := false + + for _, containerEntries := range numaState.PodEntries { + if containerEntries.IsPoolEntry() { + continue + } + for _, allocationInfo := range containerEntries { + if allocationInfo == nil { + continue + } + + // Only check shared cores pods + if !allocationInfo.CheckSharedNUMABinding() { + continue + } + + pkgName := allocationInfo.GetResourcePackageName() + // Check if pod is non-pinned: + // 1. No package name + // 2. Package name exists but not in pinnedPackages + isPinned := false + if pkgName != "" { + if pinnedPackages.Has(pkgName) { + isPinned = true + } + } else { + hasNoPackagePod = true + } + + if !isPinned { + totalNonPinnedRequest += allocationInfo.RequestQuantity + } + } + } + + if totalNonPinnedRequest > float64(availableForNonPinned.Size()) { + general.Errorf("resource validation failed for numa %d: non-pinned request %.2f exceeds available capacity %d. "+ + "Pinned packages occupy %d CPUs. Has pods without package: %v. "+ + "Details: availableCPUs=%s, totalPinned=%s, availableForNonPinned=%s", + numaID, totalNonPinnedRequest, availableForNonPinned.Size(), totalPinnedCPUSet.Size(), hasNoPackagePod, + availableCPUs.String(), totalPinnedCPUSet.String(), availableForNonPinned.String()) + return nil, false, fmt.Errorf("insufficient resources for non-pinned shared pods on numa %d: request %.2f > available %d", + numaID, totalNonPinnedRequest, availableForNonPinned.Size()) + } + } + + return newResourcePackageState, stateChanged, nil +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package_test.go new file mode 100644 index 0000000000..33d0708380 --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package_test.go @@ -0,0 +1,707 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dynamicpolicy + +import ( + "context" + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + + nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" + cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" + metaresourcepackage "github.com/kubewharf/katalyst-core/pkg/metaserver/resourcepackage" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/machine" + utilresourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" +) + +type MockMetricsEmitter struct { + metrics.DummyMetrics + storedInt64 map[string][]int64 + storedTags map[string][][]metrics.MetricTag +} + +func NewMockMetricsEmitter() *MockMetricsEmitter { + return &MockMetricsEmitter{ + storedInt64: make(map[string][]int64), + storedTags: make(map[string][][]metrics.MetricTag), + } +} + +func (m *MockMetricsEmitter) StoreInt64(key string, val int64, emitType metrics.MetricTypeName, tags ...metrics.MetricTag) error { + m.storedInt64[key] = append(m.storedInt64[key], val) + m.storedTags[key] = append(m.storedTags[key], tags) + return nil +} + +func (m *MockMetricsEmitter) WithTags(unit string, commonTags ...metrics.MetricTag) metrics.MetricEmitter { + w := &metrics.MetricTagWrapper{MetricEmitter: m} + return w.WithTags(unit, commonTags...) +} + +type mockResourcePackageManager struct { + metaresourcepackage.ResourcePackageManager + items utilresourcepackage.NUMAResourcePackageItems + err error +} + +func (m *mockResourcePackageManager) NodeResourcePackages(ctx context.Context) (utilresourcepackage.NUMAResourcePackageItems, error) { + return m.items, m.err +} + +func TestSyncResourcePackageStates(t *testing.T) { + t.Parallel() + + tmpDir, err := ioutil.TempDir("", "checkpoint-test") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 2) + require.NoError(t, err) + // NUMA 0: CPUs [0-7] + // NUMA 1: CPUs [8-15] + + // Helper to create NUMAResourcePackageItems + createPkgItems := func(config map[int]map[string]int) utilresourcepackage.NUMAResourcePackageItems { + items := make(utilresourcepackage.NUMAResourcePackageItems) + for numa, pkgs := range config { + items[numa] = make(map[string]utilresourcepackage.ResourcePackageItem) + for pkgName, size := range pkgs { + pinned := true + items[numa][pkgName] = utilresourcepackage.ResourcePackageItem{ + ResourcePackage: nodev1alpha1.ResourcePackage{ + PackageName: pkgName, + Allocatable: &v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(int64(size), resource.DecimalSI), + }, + }, + Config: &utilresourcepackage.ResourcePackageConfig{ + PinnedCPUSet: &pinned, + }, + } + } + } + return items + } + + tests := []struct { + name string + initialState func(dp *DynamicPolicy) // Set up initial machine state and pods + resourcePackages utilresourcepackage.NUMAResourcePackageItems + verify func(t *testing.T, machineState state.NUMANodeMap, podEntries state.PodEntries) + checkMetrics func(t *testing.T, emitter *MockMetricsEmitter) + expectError bool + }{ + { + name: "Expand Pinned CPUSet", + initialState: func(dp *DynamicPolicy) { + // Initial: NUMA 0 has pkg-a pinned to [2,3] (size 2) + ms := dp.state.GetMachineState() + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-a"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(2, 3)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {"pkg-a": 4}, // Request expansion to 4 + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pinned := ms[0].ResourcePackageStates["pkg-a"].PinnedCPUSet + assert.Equal(t, 4, pinned.Size()) + assert.True(t, machine.NewCPUSet(2, 3).IsSubsetOf(pinned), "Should keep original CPUs") + }, + checkMetrics: func(t *testing.T, emitter *MockMetricsEmitter) { + vals := emitter.storedInt64[util.MetricNameResourcePackagePinnedCPUSetSize] + assert.NotEmpty(t, vals) + assert.Contains(t, vals, int64(4)) + }, + }, + { + name: "Shrink Pinned CPUSet with Shared Cores Constraint", + initialState: func(dp *DynamicPolicy) { + // Ensure deterministic reserved CPUs and clear existing pool + dp.reservedCPUs = machine.NewCPUSet(0, 1) + podEntries := dp.state.GetPodEntries() + delete(podEntries, commonstate.PoolNameReserve) + dp.state.SetPodEntries(podEntries, false) + + // Calculate valid CPUSet for pkg-b on NUMA 0 (need 4 CPUs) + cpus0 := dp.machineInfo.CPUDetails.CPUsInNUMANodes(0).Difference(dp.reservedCPUs) + pkgBCPUSet := machine.NewCPUSet(cpus0.ToSliceInt()[:4]...) + + // Pod constraint: Shared pod requesting 1 CPUs + podID := "pod-shared" + alloc := &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: podID, + PodName: podID, + ContainerName: "c1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationResourcePackageKey: "pkg-b", + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + cpuconsts.CPUStateAnnotationKeyNUMAHint: "0", + }, + }, + AllocationResult: pkgBCPUSet.Clone(), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: pkgBCPUSet.Clone(), + }, + RequestQuantity: 1.0, + } + dp.state.SetAllocationInfo(podID, "c1", alloc, false) + + mockPodInMetaServer(dp, alloc, "1") + + // Generate MS from Pods to ensure PodEntries are populated in NUMANodeState + podEntries = dp.state.GetPodEntries() + ms, err := state.GenerateMachineStateFromPodEntries(dp.machineInfo.CPUTopology, podEntries, dp.state.GetMachineState()) + require.NoError(t, err) + + // Initial: pkg-b pinned to pkgBCPUSet (size 4) + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-b"] = &state.ResourcePackageState{PinnedCPUSet: pkgBCPUSet.Clone()} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {"pkg-b": 2}, // Request shrink to 2 + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pinned := ms[0].ResourcePackageStates["pkg-b"].PinnedCPUSet + assert.Equal(t, 2, pinned.Size(), "Should be limited by shared request (1*2=2)") + + // Verify pod allocation updated + podAlloc := pe["pod-shared"]["c1"] + assert.Equal(t, pinned.Size(), podAlloc.AllocationResult.Size()) + assert.True(t, podAlloc.AllocationResult.Equals(pinned)) + }, + checkMetrics: func(t *testing.T, emitter *MockMetricsEmitter) { + vals := emitter.storedInt64[util.MetricNameResourcePackagePinnedCPUSetSize] + assert.NotEmpty(t, vals) + assert.Contains(t, vals, int64(2)) + }, + }, + { + name: "Preserve Dedicated Cores", + initialState: func(dp *DynamicPolicy) { + // Pod constraint: Dedicated pod using [2,3] + podID := "pod-dedicated" + alloc := &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: podID, + PodName: podID, + ContainerName: "c1", + QoSLevel: consts.PodAnnotationQoSLevelDedicatedCores, + Annotations: map[string]string{ + consts.PodAnnotationResourcePackageKey: "pkg-c", + }, + }, + AllocationResult: machine.NewCPUSet(2, 3), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(2, 3), + }, + } + dp.state.SetAllocationInfo(podID, "c1", alloc, false) + + // Generate MS + podEntries := dp.state.GetPodEntries() + ms, err := state.GenerateMachineStateFromPodEntries(dp.machineInfo.CPUTopology, podEntries, dp.state.GetMachineState()) + require.NoError(t, err) + + // Initial: pkg-c pinned to [2,3,4,5] (size 4) + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-c"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(2, 3, 4, 5)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {"pkg-c": 3}, // Request shrink to 3 + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pinned := ms[0].ResourcePackageStates["pkg-c"].PinnedCPUSet + assert.Equal(t, 3, pinned.Size()) + assert.True(t, machine.NewCPUSet(2, 3).IsSubsetOf(pinned), "Must contain dedicated cores") + }, + }, + { + name: "Remove Package", + initialState: func(dp *DynamicPolicy) { + // Initial: pkg-d pinned + ms := dp.state.GetMachineState() + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-d"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(6, 7)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {}, // Empty config + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + _, exists := ms[0].ResourcePackageStates["pkg-d"] + assert.False(t, exists, "Should be removed") + }, + }, + { + name: "Remove Package with Active Pods (Keep)", + initialState: func(dp *DynamicPolicy) { + // Pod exists + podID := "pod-e" + alloc := &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: podID, + PodName: podID, + ContainerName: "c1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationResourcePackageKey: "pkg-e", + }, + }, + AllocationResult: machine.NewCPUSet(6, 7), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(6, 7), + }, + } + dp.state.SetAllocationInfo(podID, "c1", alloc, false) + + // Generate MS + podEntries := dp.state.GetPodEntries() + ms, err := state.GenerateMachineStateFromPodEntries(dp.machineInfo.CPUTopology, podEntries, dp.state.GetMachineState()) + require.NoError(t, err) + + // Initial: pkg-e pinned + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-e"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(6, 7)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {}, // Empty config + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + _, exists := ms[0].ResourcePackageStates["pkg-e"] + assert.True(t, exists, "Should keep package because pods exist") + }, + }, + { + name: "Panic Prevention: Missing NUMA in Config", + initialState: func(dp *DynamicPolicy) { + // Initial: pkg-f pinned on NUMA 0 + ms := dp.state.GetMachineState() + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-f"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(0, 1)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: func() utilresourcepackage.NUMAResourcePackageItems { + // Create config ONLY for NUMA 1, missing NUMA 0 + items := createPkgItems(map[int]map[string]int{ + 1: {"pkg-g": 4}, + }) + // Explicitly ensure NUMA 0 is nil in the map + delete(items, 0) + return items + }(), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + // Should remove pkg-f from NUMA 0 because it's not in config (and no active pods) + _, exists := ms[0].ResourcePackageStates["pkg-f"] + assert.False(t, exists, "Should be removed safely without panic") + }, + }, + { + name: "Error Handling: Calculator Fail (Expand)", + initialState: func(dp *DynamicPolicy) { + // Initial: pkg-fail pinned to [0,1] + ms := dp.state.GetMachineState() + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-fail"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(0, 1)} + + // Occupy all other CPUs to force allocation failure + // We must use actual NUMA 0 CPUs from machineInfo because GenerateDummyCPUTopology might be interleaved. + cpusInNuma0 := dp.machineInfo.CPUDetails.CPUsInNUMANodes(0) + // Exclude reserved (if any) and current pinned + available := cpusInNuma0.Difference(dp.reservedCPUs).Difference(machine.NewCPUSet(0, 1)) + + // Set AllocatedCPUSet to occupy ALL available CPUs + ms[0].AllocatedCPUSet = available + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {"pkg-fail": 4}, // Want to expand to 4, but 0 available + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pinned := ms[0].ResourcePackageStates["pkg-fail"].PinnedCPUSet + assert.Equal(t, 2, pinned.Size(), "Should stay at original size due to allocation failure") + }, + checkMetrics: func(t *testing.T, emitter *MockMetricsEmitter) { + vals := emitter.storedInt64[util.MetricNameSyncNumaResourcePackageFailed] + assert.NotEmpty(t, vals) + // Check for "expand_failed" reason + found := false + for _, tags := range emitter.storedTags[util.MetricNameSyncNumaResourcePackageFailed] { + for _, tag := range tags { + if tag.Key == "reason" && tag.Val == "expand_failed" { + found = true + break + } + } + } + assert.True(t, found, "Should emit expand_failed metric") + + // Top level failure also emitted? + // syncNumaResourcePackage returns error if failure? + // The code swallows calculator error and returns newPinned = currentPinned. + // So syncNumaResourcePackage returns nil error. + // Thus MetricNameSyncResourcePackageStatesFailed is NOT emitted. + // This is correct behavior based on my implementation (error is logged and metric emitted, but function continues). + }, + }, + { + name: "Capacity Check: Non-Pinned Shared Pods Fit", + initialState: func(dp *DynamicPolicy) { + // NUMA 0 has 8 CPUs. Use even numbers to be safe with interleaved topology. + // Pkg-h pinned to [0,2] (size 2). + // Remaining available: 6 CPUs. + // Non-pinned shared pod requests 4 CPUs. + podID := "pod-non-pinned-fit" + alloc := &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: podID, + PodName: podID, + ContainerName: "c1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + // No package name => non-pinned + cpuconsts.CPUStateAnnotationKeyNUMAHint: "0", + }, + }, + RequestQuantity: 4.0, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(4, 5, 6, 7), // Just some assignment on NUMA 0 + }, + } + dp.state.SetAllocationInfo(podID, "c1", alloc, false) + mockPodInMetaServer(dp, alloc, "3") + + // Generate MS from Pods to ensure PodEntries are populated in NUMANodeState + podEntries := dp.state.GetPodEntries() + ms, err := state.GenerateMachineStateFromPodEntries(dp.machineInfo.CPUTopology, podEntries, dp.state.GetMachineState()) + require.NoError(t, err) + + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-h"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(0, 2)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {"pkg-h": 2}, // Keep size 2 + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pinned := ms[0].ResourcePackageStates["pkg-h"].PinnedCPUSet + assert.Equal(t, 2, pinned.Size()) + }, + }, + { + name: "Capacity Check: Non-Pinned Shared Pods Exceed", + initialState: func(dp *DynamicPolicy) { + // NUMA 0 has 8 CPUs. + // Pkg-i pinned to [0,2] (size 2). + // Request expansion to 4. + // Non-pinned shared pod requests 10 CPUs (surely exceeds). + podID := "pod-non-pinned-exceed" + alloc := &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: podID, + PodName: podID, + ContainerName: "c1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + // No package name => non-pinned + cpuconsts.CPUStateAnnotationKeyNUMAHint: "0", + }, + }, + RequestQuantity: 10.0, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(4, 5, 6, 7), // Just some assignment on NUMA 0 + }, + } + dp.state.SetAllocationInfo(podID, "c1", alloc, false) + mockPodInMetaServer(dp, alloc, "7") + + // Generate MS from Pods + podEntries := dp.state.GetPodEntries() + ms, err := state.GenerateMachineStateFromPodEntries(dp.machineInfo.CPUTopology, podEntries, dp.state.GetMachineState()) + require.NoError(t, err) + + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-i"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(0, 2)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {"pkg-i": 4}, // Request expansion to 4 + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pinned := ms[0].ResourcePackageStates["pkg-i"].PinnedCPUSet + assert.Equal(t, 2, pinned.Size(), "Should stay at original size due to validation failure") + }, + }, + { + name: "Capacity Check: No Package Pods Exceed", + initialState: func(dp *DynamicPolicy) { + // NUMA 0 has 8 CPUs. + // Pkg-j pinned to [0,2] (size 2). + // Config requests 6. + // Pod requests 10. + // Validation should fail. + podID := "pod-no-pkg-exceed" + alloc := &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: podID, + PodName: podID, + ContainerName: "c1", + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationResourcePackageKey: "", // Explicit empty + cpuconsts.CPUStateAnnotationKeyNUMAHint: "0", + }, + }, + RequestQuantity: 10.0, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(4, 5, 6, 7), // Just some assignment on NUMA 0 + }, + } + dp.state.SetAllocationInfo(podID, "c1", alloc, false) + mockPodInMetaServer(dp, alloc, "7") + + // Generate MS from Pods to ensure PodEntries are populated in NUMANodeState + podEntries := dp.state.GetPodEntries() + ms, err := state.GenerateMachineStateFromPodEntries(dp.machineInfo.CPUTopology, podEntries, dp.state.GetMachineState()) + require.NoError(t, err) + + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-j"] = &state.ResourcePackageState{PinnedCPUSet: machine.NewCPUSet(0, 2)} + dp.state.SetMachineState(ms, false) + }, + resourcePackages: createPkgItems(map[int]map[string]int{ + 0: {"pkg-j": 6}, // Request expansion to 6 + }), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pinned := ms[0].ResourcePackageStates["pkg-j"].PinnedCPUSet + assert.Equal(t, 2, pinned.Size(), "Should stay at original size due to validation failure") + }, + }, + { + name: "Non-Pinned Package stores Attributes", + initialState: func(dp *DynamicPolicy) { + // Initial: empty + }, + resourcePackages: func() utilresourcepackage.NUMAResourcePackageItems { + items := make(utilresourcepackage.NUMAResourcePackageItems) + items[0] = make(map[string]utilresourcepackage.ResourcePackageItem) + pinned := false + items[0]["pkg-unpinned"] = utilresourcepackage.ResourcePackageItem{ + ResourcePackage: nodev1alpha1.ResourcePackage{ + PackageName: "pkg-unpinned", + Allocatable: &v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(int64(4), resource.DecimalSI), + }, + Attributes: []nodev1alpha1.Attribute{ + {Name: "test-attr", Value: "test-val"}, + }, + }, + Config: &utilresourcepackage.ResourcePackageConfig{ + PinnedCPUSet: &pinned, + }, + } + return items + }(), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + pkgState, exists := ms[0].ResourcePackageStates["pkg-unpinned"] + assert.True(t, exists, "Package state should be stored") + assert.True(t, pkgState.PinnedCPUSet.IsEmpty(), "PinnedCPUSet should be empty") + assert.Equal(t, "test-val", pkgState.Attributes["test-attr"], "Attributes should be stored") + }, + }, + { + name: "Mixed Pinned and Non-Pinned Packages", + initialState: func(dp *DynamicPolicy) { + // Initial: pkg-pinned-1 has some allocated CPUSet, and some pods are running + ms := dp.state.GetMachineState() + if ms[0].ResourcePackageStates == nil { + ms[0].ResourcePackageStates = make(map[string]*state.ResourcePackageState) + } + ms[0].ResourcePackageStates["pkg-pinned-1"] = &state.ResourcePackageState{ + PinnedCPUSet: machine.NewCPUSet(0, 1), + Attributes: map[string]string{ + "type": "pinned", + }, + } + // Previous unpinned state should be updated + ms[0].ResourcePackageStates["pkg-unpinned-1"] = &state.ResourcePackageState{ + PinnedCPUSet: machine.NewCPUSet(), + Attributes: map[string]string{ + "old-attr": "old-val", + }, + } + dp.state.SetMachineState(ms, false) + }, + resourcePackages: func() utilresourcepackage.NUMAResourcePackageItems { + items := make(utilresourcepackage.NUMAResourcePackageItems) + items[0] = make(map[string]utilresourcepackage.ResourcePackageItem) + + pinnedTrue := true + items[0]["pkg-pinned-1"] = utilresourcepackage.ResourcePackageItem{ + ResourcePackage: nodev1alpha1.ResourcePackage{ + PackageName: "pkg-pinned-1", + Allocatable: &v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(int64(4), resource.DecimalSI), // Request expansion to 4 + }, + Attributes: []nodev1alpha1.Attribute{ + {Name: "type", Value: "pinned-expanded"}, + }, + }, + Config: &utilresourcepackage.ResourcePackageConfig{ + PinnedCPUSet: &pinnedTrue, + }, + } + + pinnedFalse := false + items[0]["pkg-unpinned-1"] = utilresourcepackage.ResourcePackageItem{ + ResourcePackage: nodev1alpha1.ResourcePackage{ + PackageName: "pkg-unpinned-1", + Allocatable: &v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(int64(2), resource.DecimalSI), + }, + Attributes: []nodev1alpha1.Attribute{ + {Name: "new-attr", Value: "new-val"}, // Update attributes + }, + }, + Config: &utilresourcepackage.ResourcePackageConfig{ + PinnedCPUSet: &pinnedFalse, + }, + } + + // Add a new package without PinnedCPUSet config (implicitly non-pinned) + items[0]["pkg-unpinned-implicit"] = utilresourcepackage.ResourcePackageItem{ + ResourcePackage: nodev1alpha1.ResourcePackage{ + PackageName: "pkg-unpinned-implicit", + Allocatable: &v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(int64(2), resource.DecimalSI), + }, + Attributes: []nodev1alpha1.Attribute{ + {Name: "implicit", Value: "true"}, + }, + }, + } + + return items + }(), + verify: func(t *testing.T, ms state.NUMANodeMap, pe state.PodEntries) { + // Verify pkg-pinned-1 expanded + pkgPinned1, exists := ms[0].ResourcePackageStates["pkg-pinned-1"] + assert.True(t, exists) + assert.Equal(t, 4, pkgPinned1.PinnedCPUSet.Size(), "PinnedCPUSet should be expanded to 4") + assert.Equal(t, "pinned-expanded", pkgPinned1.Attributes["type"], "Attributes should be updated") + + // Verify pkg-unpinned-1 attributes updated and cpuset is empty + pkgUnpinned1, exists := ms[0].ResourcePackageStates["pkg-unpinned-1"] + assert.True(t, exists) + assert.True(t, pkgUnpinned1.PinnedCPUSet.IsEmpty(), "PinnedCPUSet should be empty") + assert.Equal(t, "new-val", pkgUnpinned1.Attributes["new-attr"], "Attributes should be updated") + assert.NotContains(t, pkgUnpinned1.Attributes, "old-attr", "Old attributes should be removed if not present") + + // Verify pkg-unpinned-implicit stored attributes and cpuset is empty + pkgImplicit, exists := ms[0].ResourcePackageStates["pkg-unpinned-implicit"] + assert.True(t, exists) + assert.True(t, pkgImplicit.PinnedCPUSet.IsEmpty(), "PinnedCPUSet should be empty") + assert.Equal(t, "true", pkgImplicit.Attributes["implicit"]) + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + tmpDir, err := ioutil.TempDir("", "checkpoint-test") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + + dp, err := getTestDynamicPolicyWithInitialization(cpuTopology, tmpDir) + require.NoError(t, err) + + tt.initialState(dp) + + // Mock Metrics Emitter + mockEmitter := NewMockMetricsEmitter() + dp.emitter = mockEmitter + + // Mock Resource Package Manager + mockMgr := &mockResourcePackageManager{ + items: tt.resourcePackages, + } + dp.resourcePackageManager = metaresourcepackage.NewCachedResourcePackageManager(mockMgr) + stopCh := make(chan struct{}) + _ = dp.resourcePackageManager.Run(stopCh) + defer close(stopCh) + + // Run Sync + dp.syncResourcePackagePinnedCPUSet() + + // Verify + ms := dp.state.GetMachineState() + pe := dp.state.GetPodEntries() + tt.verify(t, ms, pe) + + if tt.checkMetrics != nil { + tt.checkMetrics(t, mockEmitter) + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package_test_helper.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package_test_helper.go new file mode 100644 index 0000000000..553da2d2ac --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_resource_package_test_helper.go @@ -0,0 +1,49 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dynamicpolicy + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" +) + +func mockPodInMetaServer(dp *DynamicPolicy, alloc *state.AllocationInfo, cpuReq string) { + mockPod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(alloc.PodUid), + Name: alloc.PodName, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: alloc.ContainerName, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(cpuReq), + }, + }, + }, + }, + }, + } + dp.metaServer.PodFetcher = &pod.PodFetcherStub{PodList: []*v1.Pod{mockPod}} +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go index 2d700c2bea..1417ebc847 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go @@ -39,6 +39,7 @@ import ( pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" utilfs "k8s.io/kubernetes/pkg/util/filesystem" + nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" katalystbase "github.com/kubewharf/katalyst-core/cmd/base" componentagent "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent" @@ -59,11 +60,14 @@ import ( "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory" "github.com/kubewharf/katalyst-core/pkg/config/generic" + coreconsts "github.com/kubewharf/katalyst-core/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" "github.com/kubewharf/katalyst-core/pkg/metaserver/kcc" + "github.com/kubewharf/katalyst-core/pkg/metaserver/npd" + "github.com/kubewharf/katalyst-core/pkg/metaserver/resourcepackage" "github.com/kubewharf/katalyst-core/pkg/metaserver/spd" "github.com/kubewharf/katalyst-core/pkg/metrics" metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool" @@ -162,6 +166,7 @@ func getTestDynamicPolicyWithoutInitialization( podDebugAnnoKeys: []string{podDebugAnnoKey}, numaNumberAnnotationKey: consts.PodAnnotationCPUEnhancementNumaNumber, numaIDsAnnotationKey: consts.PodAnnotationCPUEnhancementNumaIDs, + resourcePackageManager: resourcepackage.NewCachedResourcePackageManager(resourcepackage.NewResourcePackageManager(&npd.DummyNPDFetcher{NPD: &nodev1alpha1.NodeProfileDescriptor{}})), } // register allocation behaviors for pods with different QoS level @@ -539,6 +544,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"3"},"attributes":{"CpusetCpus":"1,8-9"}}}}`, + }, }, }, }, @@ -601,6 +609,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"2"},"attributes":{"CpusetCpus":"1,9"}}}}`, + }, }, }, }, @@ -663,6 +674,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"3"},"attributes":{"CpusetCpus":"1,8-9"}}}}`, + }, }, }, }, @@ -728,6 +742,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"2"},"attributes":{"CpusetCpus":"1,9"}}}}`, + }, }, }, }, @@ -789,6 +806,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"2"}}}}`, + }, }, }, }, @@ -851,6 +871,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"300m"}}}}`, + }, }, }, }, @@ -941,6 +964,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"1"}}}}`, + }, }, }, }, @@ -1105,6 +1131,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"300m"}}}}`, + }, }, }, }, @@ -1230,6 +1259,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"2":{"allocated":{"cpu":"2"},"attributes":{"CpusetCpus":"4,12"}},"3":{"allocated":{"cpu":"2"},"attributes":{"CpusetCpus":"6,14"}}}}`, + }, }, }, }, @@ -1295,6 +1327,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"2":{"allocated":{"cpu":"2"},"attributes":{"CpusetCpus":"4,12"}}}}`, + }, }, }, }, @@ -1388,6 +1423,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"2":{"allocated":{"cpu":"3"},"attributes":{"CpusetCpus":"4-5,12"}},"3":{"allocated":{"cpu":"3"},"attributes":{"CpusetCpus":"6-7,14"}}}}`, + }, }, }, }, @@ -1454,6 +1492,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"2":{"allocated":{"cpu":"4"},"attributes":{"CpusetCpus":"4-5,12-13"}},"3":{"allocated":{"cpu":"2"},"attributes":{"CpusetCpus":"6,14"}}}}`, + }, }, }, }, @@ -1505,7 +1546,7 @@ func TestAllocate(t *testing.T) { as.Equalf(tc.expectedResp, resp, "failed in test case: %s", tc.name) if tc.allowSharedCoresOverlapReclaimedCores { - err := dynamicPolicy.adjustAllocationEntries(true) + err := dynamicPolicy.adjustAllocationEntries(dynamicPolicy.state.GetPodEntries(), dynamicPolicy.state.GetMachineState(), true) as.NotNil(err) } @@ -4906,6 +4947,269 @@ func TestGetTopologyHints(t *testing.T) { }, cpuTopology: cpuTopology, }, + { + name: "req with numa number makes sure that the hints only belong to those numa nodes", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 2, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + consts.PodAnnotationCPUEnhancementKey: `{"katalyst.kubewharf.io/numa_number": "2"}`, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodName: testName, + PodNamespace: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceCPU): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + { + Nodes: []uint64{2, 3}, + Preferred: true, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationCPUEnhancementNumaNumber: "2", + }, + }, + cpuTopology: cpuTopology, + }, + { + name: "req with numa ID for one numa node only", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 1, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + consts.PodAnnotationCPUEnhancementKey: `{"katalyst.kubewharf.io/numa_ids": "1"}`, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodName: testName, + PodNamespace: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceCPU): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{1}, + Preferred: true, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationCPUEnhancementNumaIDs: "1", + }, + }, + cpuTopology: cpuTopology, + }, + { + name: "req with numa ID for multiple numa nodes", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 4, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + consts.PodAnnotationCPUEnhancementKey: `{"katalyst.kubewharf.io/numa_ids": "1-3"}`, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodName: testName, + PodNamespace: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceCPU): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{1, 2, 3}, + Preferred: true, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationCPUEnhancementNumaIDs: "1-3", + }, + }, + cpuTopology: cpuTopology, + }, + { + name: "numa IDs will override numa number", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 4, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + consts.PodAnnotationCPUEnhancementKey: `{"katalyst.kubewharf.io/numa_number": "2", "katalyst.kubewharf.io/numa_ids": "0-2"}`, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodName: testName, + PodNamespace: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceCPU): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1, 2}, + Preferred: true, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationCPUEnhancementNumaIDs: "0-2", + consts.PodAnnotationCPUEnhancementNumaNumber: "2", + }, + }, + cpuTopology: cpuTopology, + }, + { + name: "custom numa number and numa ids annotation are supported", + numaNumberAnnotationKey: "custom_numa_number_annotation", + numaIDsAnnotationKey: "custom_numa_ids_annotation", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 4, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + consts.PodAnnotationCPUEnhancementKey: `{"custom_numa_number_annotation": "2", "custom_numa_ids_annotation": "0-2"}`, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodName: testName, + PodNamespace: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceCPU), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceCPU): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1, 2}, + Preferred: true, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + "custom_numa_number_annotation": "2", + "custom_numa_ids_annotation": "0-2", + }, + }, + cpuTopology: cpuTopology, + }, } for _, tc := range testCases { @@ -7641,13 +7945,17 @@ func TestSwitchBetweenAPIs(t *testing.T) { as := require.New(t) lwEndedChan := make(chan time.Time) + lwStartedChan := make(chan struct{}) unimplementedGetAdviceCall := cpuAdvisorServer. On("GetAdvice", mock.Anything, mock.Anything). Once(). Return((*advisorapi.GetAdviceResponse)(nil), status.Error(codes.Unimplemented, "GetAdvice not implemented")) cpuAdvisorServer.On("ListAndWatch", mock.Anything, mock.Anything). Once(). - WaitUntil(lwEndedChan). + Run(func(args mock.Arguments) { + close(lwStartedChan) + <-lwEndedChan + }). Return(nil). NotBefore(unimplementedGetAdviceCall) @@ -7656,7 +7964,11 @@ func TestSwitchBetweenAPIs(t *testing.T) { defer dynamicPolicy.Stop() // Wait for the plugin to call advisor - time.Sleep(3 * time.Second) + select { + case <-lwStartedChan: + case <-time.After(10 * time.Second): + t.Fatalf("ListAndWatch not called") + } cpuAdvisorServer.AssertExpectations(t) // ListAndWatch in progress, simulate an upgrade diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/resize_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/resize_test.go index 4c12928042..5a96168e33 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/resize_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/resize_test.go @@ -34,6 +34,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" + coreconsts "github.com/kubewharf/katalyst-core/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/util/machine" ) @@ -1284,6 +1285,9 @@ func TestReclaimedCoresVPA(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"1"}}}}`, + }, }, }, }, @@ -1406,6 +1410,9 @@ func TestReclaimedCoresVPA(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"cpu":"3"}}}}`, + }, }, }, }, diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go index 73b7e8dc86..6aa7bc131e 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go @@ -60,11 +60,71 @@ type ( PodEntries map[string]ContainerEntries // Keyed by podUID. ) +type ResourcePackageState struct { + Attributes map[string]string `json:"attributes,omitempty"` + PinnedCPUSet machine.CPUSet `json:"pinned_cpuset,omitempty"` +} + +func (r *ResourcePackageState) GetAttributes() map[string]string { + if r == nil { + return nil + } + return r.Attributes +} + +func (r *ResourcePackageState) GetPinnedCPUSet() machine.CPUSet { + if r == nil { + return machine.NewCPUSet() + } + return r.PinnedCPUSet +} + +func (r *ResourcePackageState) Clone() *ResourcePackageState { + if r == nil { + return nil + } + clone := &ResourcePackageState{ + PinnedCPUSet: r.PinnedCPUSet.Clone(), + } + if r.Attributes != nil { + clone.Attributes = make(map[string]string, len(r.Attributes)) + for k, v := range r.Attributes { + clone.Attributes[k] = v + } + } + return clone +} + +// Equals checks if two ResourcePackageState are equal. +func (r *ResourcePackageState) Equals(other *ResourcePackageState) bool { + if r == other { + return true + } + if r == nil || other == nil { + return false + } + if !r.PinnedCPUSet.Equals(other.PinnedCPUSet) { + return false + } + if len(r.Attributes) != len(other.Attributes) { + return false + } + for k, v := range r.Attributes { + if otherV, ok := other.Attributes[k]; !ok || v != otherV { + return false + } + } + return true +} + type NUMANodeState struct { // equals to allocatable cpuset subtracting original allocation result of dedicated_cores with NUMA binding DefaultCPUSet machine.CPUSet `json:"default_cpuset,omitempty"` // equals to original allocation result of dedicated_cores with NUMA binding AllocatedCPUSet machine.CPUSet `json:"allocated_cpuset,omitempty"` + // equals to a map of resource package name to resource package state if the resource package's + // config has set `pinnedCPUSet` to `true`. + ResourcePackageStates map[string]*ResourcePackageState `json:"resource_package_states,omitempty"` PodEntries PodEntries `json:"pod_entries"` // pre-occupation pod entries which is for pod needs pre-occupation @@ -333,12 +393,23 @@ func (ns *NUMANodeState) Clone() *NUMANodeState { if ns == nil { return nil } - return &NUMANodeState{ + + clone := &NUMANodeState{ DefaultCPUSet: ns.DefaultCPUSet.Clone(), AllocatedCPUSet: ns.AllocatedCPUSet.Clone(), PodEntries: ns.PodEntries.Clone(), PreOccPodEntries: ns.PreOccPodEntries.Clone(), } + + if ns.ResourcePackageStates != nil { + clone.ResourcePackageStates = make(map[string]*ResourcePackageState) + + for pkgName, state := range ns.ResourcePackageStates { + clone.ResourcePackageStates[pkgName] = state.Clone() + } + } + + return clone } // GetAvailableCPUSet returns available cpuset in this numa @@ -537,6 +608,51 @@ func (nm NUMANodeMap) GetDefaultCPUSet() machine.CPUSet { return res } +// GetNUMAResourcePackagePinnedCPUSet returns a map of numa id to resource package name to pinned cpuset if the resource package's +// config has set `pinnedCPUSet` to `true`. +func (nm NUMANodeMap) GetNUMAResourcePackagePinnedCPUSet() map[int]map[string]machine.CPUSet { + numaResourcePackagePinnedCPUSet := make(map[int]map[string]machine.CPUSet) + for numaID, numaNodeState := range nm { + if _, ok := numaResourcePackagePinnedCPUSet[numaID]; !ok { + numaResourcePackagePinnedCPUSet[numaID] = make(map[string]machine.CPUSet) + } + for resourcePackage, rpState := range numaNodeState.ResourcePackageStates { + if rpState != nil && !rpState.PinnedCPUSet.IsEmpty() { + numaResourcePackagePinnedCPUSet[numaID][resourcePackage] = numaResourcePackagePinnedCPUSet[numaID][resourcePackage].Union(rpState.PinnedCPUSet) + } + } + } + return numaResourcePackagePinnedCPUSet +} + +// GetResourcePackagePinnedCPUSet returns a map of resource package name to pinned cpuset if the resource package's +// config has set `pinnedCPUSet` to `true`. +func (nm NUMANodeMap) GetResourcePackagePinnedCPUSet() map[string]machine.CPUSet { + rpPinnedCPUSet := make(map[string]machine.CPUSet) + for _, numaNodeState := range nm { + for resourcePackage, rpState := range numaNodeState.ResourcePackageStates { + if rpState != nil && !rpState.PinnedCPUSet.IsEmpty() { + rpPinnedCPUSet[resourcePackage] = rpPinnedCPUSet[resourcePackage].Union(rpState.PinnedCPUSet) + } + } + } + return rpPinnedCPUSet +} + +// GetNUMAResourcePackageStates returns a map of numa id to resource package name to resource package state +func (nm NUMANodeMap) GetNUMAResourcePackageStates() map[int]map[string]*ResourcePackageState { + numaResourcePackageStates := make(map[int]map[string]*ResourcePackageState) + for numaID, numaNodeState := range nm { + if _, ok := numaResourcePackageStates[numaID]; !ok { + numaResourcePackageStates[numaID] = make(map[string]*ResourcePackageState) + } + for resourcePackage, rpState := range numaNodeState.ResourcePackageStates { + numaResourcePackageStates[numaID][resourcePackage] = rpState.Clone() + } + } + return numaResourcePackageStates +} + // GetAvailableCPUSet returns available cpuset in this node func (nm NUMANodeMap) GetAvailableCPUSet(reservedCPUs machine.CPUSet) machine.CPUSet { return nm.GetDefaultCPUSet().Difference(reservedCPUs) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go index ba93926ef5..3c28b06720 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state_test.go @@ -540,7 +540,7 @@ func TestNewCheckpointState(t *testing.T) { } } }, - "checksum": 219359606 + "checksum": 4188873110 }`, "", &cpuPluginState{ diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go index 745f07c80a..d261c0f76c 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go @@ -30,6 +30,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/preoccupation" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" + rputil "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) type GetContainerRequestedCoresFunc func(allocationInfo *AllocationInfo) float64 @@ -144,7 +145,12 @@ func GetIsolatedQuantityMapFromPodEntries(podEntries PodEntries, ignoreAllocatio // GetSharedQuantityMapFromPodEntries returns a map to indicates quantity info for each shared pool, // and the map is formatted as pool -> quantity -func GetSharedQuantityMapFromPodEntries(podEntries PodEntries, ignoreAllocationInfos []*AllocationInfo, getContainerRequestedCores GetContainerRequestedCoresFunc) (map[string]map[int]int, error) { +func GetSharedQuantityMapFromPodEntries( + numaResourcePackagePinnedCPUSet map[int]map[string]machine.CPUSet, + podEntries PodEntries, + ignoreAllocationInfos []*AllocationInfo, + getContainerRequestedCores GetContainerRequestedCoresFunc, +) (map[string]map[int]int, error) { poolsQuantityMap := make(map[string]map[int]int) allocationInfosToCount := make([]*AllocationInfo, 0, len(podEntries)) for _, entries := range podEntries { @@ -172,7 +178,7 @@ func GetSharedQuantityMapFromPodEntries(podEntries PodEntries, ignoreAllocationI } } - err := CountAllocationInfosToPoolsQuantityMap(allocationInfosToCount, poolsQuantityMap, getContainerRequestedCores) + err := CountAllocationInfosToPoolsQuantityMap(numaResourcePackagePinnedCPUSet, allocationInfosToCount, poolsQuantityMap, getContainerRequestedCores) if err != nil { return nil, fmt.Errorf("CountAllocationInfosToPoolsQuantityMap faild with error: %v", err) } @@ -252,10 +258,26 @@ func GenerateMachineStateFromPodEntries(topology *machine.CPUTopology, podEntrie return nil, err } + updateResourcePackageStates(currentMachineState, originMachineState) updateMachineStatePreOccPodEntries(currentMachineState, originMachineState) return currentMachineState, nil } +func updateResourcePackageStates(currentMachineState, originMachineState NUMANodeMap) { + for numaID, originState := range originMachineState { + if originState == nil { + continue + } + + if currentState, ok := currentMachineState[numaID]; ok && currentState != nil && originState.ResourcePackageStates != nil { + currentState.ResourcePackageStates = make(map[string]*ResourcePackageState, len(originState.ResourcePackageStates)) + for pkgName, state := range originState.ResourcePackageStates { + currentState.ResourcePackageStates[pkgName] = state.Clone() + } + } + } +} + // updateMachineStatePreOccPodEntries update the pre-occupation pod from pod entries and origin machine state func updateMachineStatePreOccPodEntries(currentMachineState, originMachineState NUMANodeMap) { // override pre-occupation pod from pod entries @@ -340,7 +362,9 @@ func GetSharedBindingNUMAsFromQuantityMap(poolsQuantityMap map[string]map[int]in return res } -func CountAllocationInfosToPoolsQuantityMap(allocationInfos []*AllocationInfo, +func CountAllocationInfosToPoolsQuantityMap( + numaResourcePackagePinnedCPUSet map[int]map[string]machine.CPUSet, + allocationInfos []*AllocationInfo, poolsQuantityMap map[string]map[int]int, getContainerRequestedCores GetContainerRequestedCoresFunc, ) error { @@ -399,9 +423,20 @@ func CountAllocationInfosToPoolsQuantityMap(allocationInfos []*AllocationInfo, return fmt.Errorf("numaHintStr: %s indicates invalid numaSet numa_binding shared_cores", allocationInfo.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint]) } + + pkgName := allocationInfo.GetResourcePackageName() + if pkgName != "" && poolName != commonstate.EmptyOwnerPoolName { + if pinnedSets, ok := numaResourcePackagePinnedCPUSet[targetNUMAID]; ok { + if cpuSet, exists := pinnedSets[pkgName]; exists && cpuSet.Size() > 0 { + poolName = rputil.WrapOwnerPoolName(poolName, pkgName) + } + } + } } else { targetNUMAID = commonstate.FakedNUMAID poolName = allocationInfo.GetPoolName() + + // only numa binding allocation support resource package } if poolName == commonstate.EmptyOwnerPoolName { diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go index ab3f286ea2..d92eaa48fa 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util_test.go @@ -542,8 +542,9 @@ func TestCountAllocationInfosToPoolsQuantityMap(t *testing.T) { testName := "test" type args struct { - allocationInfos []*AllocationInfo - poolsQuantityMap map[string]map[int]int + numaResourcePackagePinnedCPUSet map[int]map[string]machine.CPUSet + allocationInfos []*AllocationInfo + poolsQuantityMap map[string]map[int]int } tests := []struct { name string @@ -652,6 +653,55 @@ func TestCountAllocationInfosToPoolsQuantityMap(t *testing.T) { wantErr: false, }, + { + name: "count allocationInfos to pools quantity map with resource package", + args: args{ + numaResourcePackagePinnedCPUSet: map[int]map[string]machine.CPUSet{ + 3: { + "package-a": machine.NewCPUSet(6, 7, 14), + }, + }, + allocationInfos: []*AllocationInfo{ + { + AllocationMeta: commonstate.AllocationMeta{ + PodUid: "373d08e4-7a6b-4293-aaaf-b135ff812ccc", + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + OwnerPoolName: "share-NUMA3", + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationResourcePackageKey: "package-a", + }, + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + }, + RampUp: false, + AllocationResult: machine.MustParse("6,7,14"), + OriginalAllocationResult: machine.MustParse("6,7,14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 3: machine.NewCPUSet(6, 7, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 3: machine.NewCPUSet(6, 7, 14), + }, + RequestQuantity: 1.1, + }, + }, + poolsQuantityMap: map[string]map[int]int{}, + }, + want: map[string]map[int]int{ + "package-a/share-NUMA3": { + 3: 3, + }, + }, + wantErr: false, + }, { name: "count allocationInfos to pools quantity map with invalid hint", args: args{ @@ -839,7 +889,7 @@ func TestCountAllocationInfosToPoolsQuantityMap(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - if err := CountAllocationInfosToPoolsQuantityMap(tt.args.allocationInfos, tt.args.poolsQuantityMap, func(allocationInfo *AllocationInfo) float64 { + if err := CountAllocationInfosToPoolsQuantityMap(tt.args.numaResourcePackagePinnedCPUSet, tt.args.allocationInfos, tt.args.poolsQuantityMap, func(allocationInfo *AllocationInfo) float64 { return allocationInfo.RequestQuantity }); (err != nil) != tt.wantErr { t.Errorf("CountAllocationInfosToPoolsQuantityMap() error = %v, wantErr %v", err, tt.wantErr) diff --git a/pkg/agent/qrm-plugins/cpu/util/util.go b/pkg/agent/qrm-plugins/cpu/util/util.go index 3fcb888430..780e207ad2 100644 --- a/pkg/agent/qrm-plugins/cpu/util/util.go +++ b/pkg/agent/qrm-plugins/cpu/util/util.go @@ -20,15 +20,20 @@ import ( "context" "fmt" "math" + "strconv" pkgerrors "github.com/pkg/errors" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/labels" "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" "github.com/kubewharf/katalyst-core/pkg/config" "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" "github.com/kubewharf/katalyst-core/pkg/config/generic" @@ -38,6 +43,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/util/machine" "github.com/kubewharf/katalyst-core/pkg/util/native" "github.com/kubewharf/katalyst-core/pkg/util/qos" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) const ( @@ -49,6 +55,9 @@ var ( ErrNoAvailableMemoryBandwidthHints = pkgerrors.New("no available memory bandwidth hints") ) +// GetCoresReservedForSystem calculates the CPU cores reserved for the system based on the configuration. +// It prioritizes the kubelet configuration if enabled, otherwise falls back to the static configuration. +// It returns a CPUSet representing the reserved cores. func GetCoresReservedForSystem(conf *config.Configuration, metaServer *metaserver.MetaServer, machineInfo *machine.KatalystMachineInfo, allCPUs machine.CPUSet) (machine.CPUSet, error) { if conf == nil { return machine.NewCPUSet(), fmt.Errorf("nil conf") @@ -157,7 +166,7 @@ func RegenerateHints(allocationInfo *state.AllocationInfo, regenerate bool) map[ // PackAllocationResponse fills pluginapi.ResourceAllocationResponse with information from AllocationInfo and pluginapi.ResourceRequest func PackAllocationResponse(allocationInfo *state.AllocationInfo, resourceName, ociPropertyName string, - isNodeResource, isScalarResource bool, req *pluginapi.ResourceRequest, + isNodeResource, isScalarResource bool, req *pluginapi.ResourceRequest, resourceAllocationAnnotations ...map[string]string, ) (*pluginapi.ResourceAllocationResponse, error) { if allocationInfo == nil { return nil, fmt.Errorf("packAllocationResponse got nil allocationInfo") @@ -188,6 +197,7 @@ func PackAllocationResponse(allocationInfo *state.AllocationInfo, resourceName, req.Hint, }, }, + Annotations: general.MergeAnnotations(resourceAllocationAnnotations...), }, }, }, @@ -197,6 +207,89 @@ func PackAllocationResponse(allocationInfo *state.AllocationInfo, resourceName, }, nil } +// GetCPUTopologyAllocationsAnnotations returns the topology-aware CPU allocation annotations for a given container. +// It handles different QoS levels: +// - For DedicatedCores: uses the actual assigned CPUSet size on each NUMA node. +// - For SharedCores/ReclaimedCores with NUMA binding: uses the pod's aggregated request quantity. +// This function only processes main containers and returns nil for sidecars or invalid allocation info. +func GetCPUTopologyAllocationsAnnotations(allocationInfo *state.AllocationInfo, + topologyAllocationAnnotationKey string, req *pluginapi.ResourceRequest, +) (map[string]string, error) { + // Skip processing if allocation info is invalid or it's not the main container. + if allocationInfo == nil || !allocationInfo.CheckMainContainer() { + return nil, nil + } + + assignments := allocationInfo.TopologyAwareAssignments + // Determine if this is a shared or reclaimed QoS level with NUMA binding enabled. + // This affects whether we use the request quantity or the physical assignment size. + isSharedOrReclaimedNUMABinding := allocationInfo.CheckSharedNUMABinding() || allocationInfo.CheckReclaimedNUMABinding() + + // For shared or reclaimed cores with NUMA binding, they must be restricted to a single NUMA node. + var reqQty float64 + if isSharedOrReclaimedNUMABinding { + if len(assignments) != 1 { + return nil, fmt.Errorf("shared/reclaimed cores with NUMA binding should be pinned to exactly 1 NUMA node, got %d", len(assignments)) + } + + // Use the aggregated pod request quantity for shared/reclaimed cores to represent the logical allocation. + _, reqFloat64, err := util.GetPodAggregatedRequestResource(req) + if err != nil { + return nil, fmt.Errorf("failed to get aggregated request resource: %v", err) + } + reqQty = reqFloat64 + } + + // Initialize the internal TopologyAllocation structure. + topologyAllocation := v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: make(map[string]v1alpha1.ZoneAllocation), + } + + for numaNode, assignment := range assignments { + var cpuQty float64 + var assignmentToReport machine.CPUSet + if isSharedOrReclaimedNUMABinding { + cpuQty = reqQty + // For shared/reclaimed cores, we don't report the CPUSet attribute in the annotation. + assignmentToReport = machine.NewCPUSet() + } else { + // For dedicated cores, the quantity is the physical count of CPUs assigned on this NUMA node. + cpuQty = float64(assignment.Size()) + assignmentToReport = assignment + } + + topologyAllocation[v1alpha1.TopologyTypeNuma][strconv.Itoa(numaNode)] = buildZoneAllocation(cpuQty, assignmentToReport) + } + + // Generate the final resource allocation annotations from the topology structure. + return util.MakeTopologyAllocationResourceAllocationAnnotations( + topologyAllocation, + topologyAllocationAnnotationKey, + ), nil +} + +// buildZoneAllocation creates a ZoneAllocation for a specific NUMA node. +// It uses NewMilliQuantity to accurately represent decimal CPU quantities (up to 0.001 core precision). +func buildZoneAllocation(cpuQty float64, assignment machine.CPUSet) v1alpha1.ZoneAllocation { + za := v1alpha1.ZoneAllocation{ + Allocated: map[v1.ResourceName]resource.Quantity{ + // Convert cores to milli-cores and round to the nearest integer. + v1.ResourceCPU: *resource.NewMilliQuantity(int64(math.Round(cpuQty*1000)), resource.DecimalSI), + }, + } + + // Only report the CPUSet attribute if the assignment is non-empty. + if !assignment.IsEmpty() { + za.Attributes = map[string]string{ + util.OCIPropertyNameCPUSetCPUs: assignment.String(), + } + } + + return za +} + +// AdvisorDegradation checks if the advisor is in a degraded state. +// It returns true if the advisor is unhealthy and reclaim is disabled. func AdvisorDegradation(advisorHealth, enableReclaim bool) bool { advisorDegradation := !advisorHealth && !enableReclaim @@ -205,6 +298,8 @@ func AdvisorDegradation(advisorHealth, enableReclaim bool) bool { return advisorDegradation } +// CPUIsSufficient checks if the requested CPU is within the available limits, +// considering a small tolerance for floating point comparison. func CPUIsSufficient(request, available float64) bool { // the minimal CPU core is 0.001 (1core = 1000m) return request < available+0.0001 @@ -262,6 +357,7 @@ func GetContainerRequestedCores(metaServer *metaserver.MetaServer, allocationInf return allocationInfo.RequestQuantity } +// PopulateHintsByAvailableNUMANodes appends topology hints for the given NUMA nodes to the hints list. func PopulateHintsByAvailableNUMANodes( numaNodes []int, hints *pluginapi.ListOfTopologyHints, @@ -396,6 +492,7 @@ func getDefaultCPUBurstPercent(dynamicConfig *dynamic.DynamicAgentConfiguration) return defaultCPUBurstPercent } +// CalculateCPUBurstFromPercent calculates the CPU burst value based on the given percentage and CPU quota. func CalculateCPUBurstFromPercent(percent float64, cpuQuota int64) uint64 { return uint64(float64(cpuQuota) * percent / 100) } @@ -429,3 +526,15 @@ func IsSoleSharedCoresPod(conf *config.Configuration, podList []*v1.Pod, dynamic return false } + +// GetAggResourcePackagePinnedCPUSet aggregates pinned CPUSets from resource packages that match the given attribute selector. +// It iterates through each NUMA node's resource packages, filters them using the attribute selector, +// and unions the pinned CPUSets from the corresponding machine state. +func GetAggResourcePackagePinnedCPUSet(attributeSelector labels.Selector, machineState state.NUMANodeMap) machine.CPUSet { + res := machine.NewCPUSet() + numaStates := machineState.GetNUMAResourcePackageStates() + for _, cpuset := range resourcepackage.GetNUMAMatchedPinnedCPUSet(numaStates, attributeSelector) { + res = res.Union(cpuset) + } + return res +} diff --git a/pkg/agent/qrm-plugins/cpu/util/util_test.go b/pkg/agent/qrm-plugins/cpu/util/util_test.go index a3a47d6e85..a3fcc46853 100644 --- a/pkg/agent/qrm-plugins/cpu/util/util_test.go +++ b/pkg/agent/qrm-plugins/cpu/util/util_test.go @@ -17,15 +17,19 @@ limitations under the License. package util import ( + "encoding/json" "reflect" "testing" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" "k8s.io/utils/ptr" + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" @@ -37,6 +41,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/finegrainedresource" "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm" "github.com/kubewharf/katalyst-core/pkg/config/generic" + coreconsts "github.com/kubewharf/katalyst-core/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/util/machine" ) @@ -808,6 +813,346 @@ func TestGetPodCPUBurstPercent(t *testing.T) { } } +// helper to extract topology allocation from annotations JSON +func parseCPUTopologyAllocationFromAnno(t *testing.T, annos map[string]string) v1alpha1.TopologyAllocation { + t.Helper() + if annos == nil { + return nil + } + raw, ok := annos[coreconsts.QRMPodAnnotationTopologyAllocationKey] + if !ok { + return nil + } + var ta v1alpha1.TopologyAllocation + if err := json.Unmarshal([]byte(raw), &ta); err != nil { + t.Fatalf("failed to unmarshal topology allocation: %v", err) + } + return ta +} + +func TestGetCPUTopologyAllocationsAnnotations(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + ai *state.AllocationInfo + req *pluginapi.ResourceRequest + wantNilAnno bool + wantErr bool + wantTopology v1alpha1.TopologyAllocation + }{ + { + name: "nil allocation info returns nil", + ai: nil, + req: nil, + wantNilAnno: true, + }, + { + name: "empty assignments produce empty NUMA map", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + }, + TopologyAwareAssignments: map[int]machine.CPUSet{}, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 2, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{}, + }, + }, + { + name: "non-empty assignments add quantities and attributes", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), + 2: machine.NewCPUSet(4), + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 2, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("2"), + }, + Attributes: map[string]string{ + "CpusetCpus": "0-1", + }, + }, + "2": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("1"), + }, + Attributes: map[string]string{ + "CpusetCpus": "4", + }, + }, + }, + }, + }, + { + name: "non-empty assignments add quantities and attributes (multiple zones)", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), // size 2 => "2" + 1: machine.NewCPUSet(3, 5, 6, 7), // size 4 => "4" + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 4, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("2"), + }, + Attributes: map[string]string{ + "CpusetCpus": "0-1", + }, + }, + "1": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + }, + Attributes: map[string]string{ + "CpusetCpus": "3,5-7", + }, + }, + }, + }, + }, + { + name: "shared/reclaimed with single NUMA and request quantity", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 2), + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 2, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + }, + { + name: "shared/reclaimed with single NUMA and decimal request quantity", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 2), + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 0.5, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("500m"), + }, + }, + }, + }, + }, + { + name: "shared/reclaimed with single NUMA and decimal request quantity (high precision)", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 2), + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 0.125, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("125m"), + }, + }, + }, + }, + }, + { + name: "reclaimed with single NUMA and decimal request quantity", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelReclaimedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(4, 5), + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 1.75, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "1": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("1750m"), + }, + }, + }, + }, + }, + { + name: "dedicated cores with multiple NUMAs (uses assignment size)", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelDedicatedCores, + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1), + 1: machine.NewCPUSet(8, 9, 10), + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 5, + }, + }, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("2"), + }, + Attributes: map[string]string{ + "CpusetCpus": "0-1", + }, + }, + "1": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("3"), + }, + Attributes: map[string]string{ + "CpusetCpus": "8-10", + }, + }, + }, + }, + }, + { + name: "shared/reclaimed with multiple NUMAs returns error", + ai: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + ContainerType: pluginapi.ContainerType_MAIN.String(), + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0), + 1: machine.NewCPUSet(1), + }, + }, + req: &pluginapi.ResourceRequest{ + ResourceName: string(v1.ResourceCPU), + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 2, + }, + }, + wantErr: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, err := GetCPUTopologyAllocationsAnnotations(tt.ai, coreconsts.QRMPodAnnotationTopologyAllocationKey, tt.req) + if tt.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if tt.wantNilAnno { + if got != nil { + t.Fatalf("expected nil annotations, got: %#v", got) + } + return + } + ta := parseCPUTopologyAllocationFromAnno(t, got) + if !reflect.DeepEqual(ta, tt.wantTopology) { + t.Fatalf("unexpected topology allocation. got=%v, want=%v", ta, tt.wantTopology) + } + }) + } +} + // generatePod returns a pod with the given name and QoS level annotation. func generatePod(name, qosLevel string) *v1.Pod { return &v1.Pod{ @@ -1033,3 +1378,107 @@ func TestIsSoleSharedCoresPod(t *testing.T) { }) } } + +func TestGetAggResourcePackagePinnedCPUSet(t *testing.T) { + t.Parallel() + + type args struct { + attributeSelector labels.Selector + machineState state.NUMANodeMap + } + tests := []struct { + name string + args args + want machine.CPUSet + }{ + { + name: "empty resource package map", + args: args{ + attributeSelector: labels.Everything(), + machineState: state.NUMANodeMap{}, + }, + want: machine.NewCPUSet(), + }, + { + name: "resource package matches selector", + args: args{ + attributeSelector: labels.SelectorFromSet(labels.Set{"key1": "value1"}), + machineState: state.NUMANodeMap{ + 0: { + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"key1": "value1"}, + PinnedCPUSet: machine.NewCPUSet(1, 2), + }, + }, + }, + }, + }, + want: machine.NewCPUSet(1, 2), + }, + { + name: "resource package does not match selector", + args: args{ + attributeSelector: labels.SelectorFromSet(labels.Set{"key1": "value2"}), + machineState: state.NUMANodeMap{ + 0: { + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"key1": "value1"}, + PinnedCPUSet: machine.NewCPUSet(1, 2), + }, + }, + }, + }, + }, + want: machine.NewCPUSet(), + }, + { + name: "missing machine state", + args: args{ + attributeSelector: labels.SelectorFromSet(labels.Set{"key1": "value1"}), + machineState: state.NUMANodeMap{}, + }, + want: machine.NewCPUSet(), + }, + { + name: "multiple numa nodes and packages", + args: args{ + attributeSelector: labels.SelectorFromSet(labels.Set{"type": "A"}), + machineState: state.NUMANodeMap{ + 0: { + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"type": "A"}, + PinnedCPUSet: machine.NewCPUSet(0, 1), + }, + }, + }, + 1: { + ResourcePackageStates: map[string]*state.ResourcePackageState{ + "pkg2": { + Attributes: map[string]string{"type": "A"}, + PinnedCPUSet: machine.NewCPUSet(2, 3), + }, + "pkg3": { + Attributes: map[string]string{"type": "B"}, + PinnedCPUSet: machine.NewCPUSet(4, 5), + }, + }, + }, + }, + }, + want: machine.NewCPUSet(0, 1, 2, 3), + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := GetAggResourcePackagePinnedCPUSet(tt.args.attributeSelector, tt.args.machineState) + if !got.Equals(tt.want) { + t.Errorf("GetAggResourcePackagePinnedCPUSet() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go b/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go index 458356770a..3e7a99e7d5 100644 --- a/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go +++ b/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go @@ -71,7 +71,7 @@ func (p *GPUMemPlugin) GetTopologyHints(ctx context.Context, req *pluginapi.Reso return nil, err } - _, gpuMemory, err := util.GetQuantityFromResourceRequests(req.ResourceRequests, p.ResourceName(), false) + _, gpuMemory, err := util.GetQuantityFromResourceRequests(req.ResourceRequests, p.ResourceName(), nil) if err != nil { return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) } @@ -458,7 +458,7 @@ func (p *GPUMemPlugin) Allocate( return nil, err } - _, gpuMemory, err := util.GetQuantityFromResourceRequests(resourceReq.ResourceRequests, p.ResourceName(), false) + _, gpuMemory, err := util.GetQuantityFromResourceRequests(resourceReq.ResourceRequests, p.ResourceName(), nil) if err != nil { return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) } diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter.go index 56c65fcd71..5076aebc86 100644 --- a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter.go +++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter.go @@ -34,7 +34,7 @@ func (s *GPUMemoryStrategy) Filter(ctx *allocate.AllocationContext, allAvailable return nil, fmt.Errorf("GPU topology is nil") } - _, gpuMemory, err := util.GetQuantityFromResourceRequests(ctx.ResourceReq.ResourceRequests, string(consts.ResourceGPUMemory), false) + _, gpuMemory, err := util.GetQuantityFromResourceRequests(ctx.ResourceReq.ResourceRequests, string(consts.ResourceGPUMemory), nil) if err != nil { general.Warningf("getReqQuantityFromResourceReq failed with error: %v, use default available devices", err) return allAvailableDevices, nil diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort.go index e4cfaef234..6e2291aa63 100644 --- a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort.go +++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort.go @@ -34,7 +34,7 @@ func (s *GPUMemoryStrategy) Sort(ctx *allocate.AllocationContext, filteredDevice return nil, fmt.Errorf("GPU topology is nil") } - _, gpuMemory, err := qrmutil.GetQuantityFromResourceRequests(ctx.ResourceReq.ResourceRequests, string(consts.ResourceGPUMemory), false) + _, gpuMemory, err := qrmutil.GetQuantityFromResourceRequests(ctx.ResourceReq.ResourceRequests, string(consts.ResourceGPUMemory), nil) if err != nil { general.Warningf("getReqQuantityFromResourceReq failed with error: %v, use default filtered devices", err) return filteredDevices, nil diff --git a/pkg/agent/qrm-plugins/gpu/util/util.go b/pkg/agent/qrm-plugins/gpu/util/util.go index c8c1331593..d31c3f5a0b 100644 --- a/pkg/agent/qrm-plugins/gpu/util/util.go +++ b/pkg/agent/qrm-plugins/gpu/util/util.go @@ -79,7 +79,7 @@ func GetGPUCount(req *pluginapi.ResourceRequest, deviceNames []string) (float64, gpuNames := sets.NewString() for _, resourceName := range deviceNames { - _, request, err := qrmutil.GetQuantityFromResourceRequests(req.ResourceRequests, resourceName, false) + _, request, err := qrmutil.GetQuantityFromResourceRequests(req.ResourceRequests, resourceName, nil) if err != nil && !errors.IsNotFound(err) { return 0, nil, err } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go index 0840cf9125..224a4dfa8e 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go @@ -166,12 +166,15 @@ type DynamicPolicy struct { numaAllocationReactor reactor.AllocationReactor numaBindResultResourceAllocationAnnotationKey string + topologyAllocationAnnotationKey string + + extraResourceNames []string } func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration, _ interface{}, agentName string, ) (bool, agent.Component, error) { - reservedMemory, err := getReservedMemory(conf, agentCtx.MetaServer, agentCtx.MachineInfo) + resourcesReservedMemory, err := getResourcesReservedMemory(conf, agentCtx.MetaServer, agentCtx.MachineInfo, conf.ExtraMemoryResources) if err != nil { return false, agent.ComponentStub{}, fmt.Errorf("getReservedMemoryFromOptions failed with error: %v", err) } @@ -180,11 +183,11 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration Key: util.QRMPluginPolicyTagName, Val: memconsts.MemoryResourcePluginPolicyNameDynamic, }) - resourcesReservedMemory := map[v1.ResourceName]map[int]uint64{ - v1.ResourceMemory: reservedMemory, - } + stateImpl, err := state.NewCheckpointState(conf.StateDirectoryConfiguration, memoryPluginStateFileName, - memconsts.MemoryResourcePluginPolicyNameDynamic, agentCtx.CPUTopology, agentCtx.MachineInfo, resourcesReservedMemory, conf.SkipMemoryStateCorruption, wrappedEmitter) + memconsts.MemoryResourcePluginPolicyNameDynamic, agentCtx.CPUTopology, agentCtx.MachineInfo, agentCtx.MemoryTopology, resourcesReservedMemory, conf.SkipMemoryStateCorruption, + wrappedEmitter, conf.ExtraMemoryResources, + ) if err != nil { return false, agent.ComponentStub{}, fmt.Errorf("NewCheckpointState failed with error: %v", err) } @@ -238,6 +241,8 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration resctrlHinter: newResctrlHinter(&conf.ResctrlConfig, wrappedEmitter), enableNonBindingShareCoresMemoryResourceCheck: conf.EnableNonBindingShareCoresMemoryResourceCheck, numaBindResultResourceAllocationAnnotationKey: conf.NUMABindResultResourceAllocationAnnotationKey, + topologyAllocationAnnotationKey: conf.TopologyAllocationAnnotationKey, + extraResourceNames: conf.ExtraMemoryResources, } policyImplement.allocationHandlers = map[string]util.AllocationHandler{ @@ -594,7 +599,7 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context, return nil, err } - reqInt, _, err := util.GetQuantityFromResourceReq(req) + resourceReqInt, _, err := util.GetQuantityMapFromResourceReq(req) if err != nil { return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) } @@ -607,7 +612,7 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context, "podRole", req.PodRole, "containerType", req.ContainerType, "qosLevel", qosLevel, - "memoryReq(bytes)", reqInt, + "memoryReq map(bytes)", resourceReqInt, "isDebugPod", isDebugPod) if req.ContainerType == pluginapi.ContainerType_INIT || isDebugPod { @@ -727,54 +732,59 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context, defer p.RUnlock() podResources := make(map[string]*pluginapi.ContainerResources) - podEntries := p.state.GetPodResourceEntries()[v1.ResourceMemory] + podResourceEntries := p.state.GetPodResourceEntries() needUpdateMachineState := false - for podUID, containerEntries := range podEntries { - if podResources[podUID] == nil { - podResources[podUID] = &pluginapi.ContainerResources{} - } - mainContainerAllocationInfo, _ := podEntries.GetMainContainerAllocation(podUID) - for containerName, allocationInfo := range containerEntries { - if allocationInfo == nil { - continue + for resourceName, podEntries := range podResourceEntries { + for podUID, containerEntries := range podEntries { + if podResources[podUID] == nil { + podResources[podUID] = &pluginapi.ContainerResources{} } - if allocationInfo.CheckSideCar() && mainContainerAllocationInfo != nil { - if applySidecarAllocationInfoFromMainContainer(allocationInfo, mainContainerAllocationInfo) { - general.Infof("pod: %s/%s sidecar container: %s update its allocation", - allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName) - p.state.SetAllocationInfo(v1.ResourceMemory, podUID, containerName, allocationInfo, true) - needUpdateMachineState = true + mainContainerAllocationInfo, _ := podEntries.GetMainContainerAllocation(podUID) + for containerName, allocationInfo := range containerEntries { + if allocationInfo == nil { + continue } - } - if podResources[podUID].ContainerResources == nil { - podResources[podUID].ContainerResources = make(map[string]*pluginapi.ResourceAllocation) - } + if allocationInfo.CheckSideCar() && mainContainerAllocationInfo != nil { + if applySidecarAllocationInfoFromMainContainer(allocationInfo, mainContainerAllocationInfo) { + general.Infof("pod: %s/%s sidecar container: %s update its allocation", + allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName) + p.state.SetAllocationInfo(resourceName, podUID, containerName, allocationInfo, true) + needUpdateMachineState = true + } + } - resourceAllocation, err := allocationInfo.GetResourceAllocation() - if err != nil { - errMsg := "allocationInfo.GetResourceAllocation failed" - general.ErrorS(err, errMsg, - "podNamespace", allocationInfo.PodNamespace, - "podName", allocationInfo.PodName, - "containerName", allocationInfo.ContainerName) - return nil, fmt.Errorf(errMsg) - } + if podResources[podUID].ContainerResources == nil { + podResources[podUID].ContainerResources = make(map[string]*pluginapi.ResourceAllocation) + } - if p.resctrlHinter != nil { - p.resctrlHinter.HintResourceAllocation(allocationInfo.AllocationMeta, resourceAllocation) - } + resourceAllocation, err := podResourceEntries.GetResourceAllocation(podUID, containerName) + if err != nil { + errMsg := "allocationInfo.GetResourceAllocation failed" + general.ErrorS(err, errMsg, + "podNamespace", allocationInfo.PodNamespace, + "podName", allocationInfo.PodName, + "containerName", allocationInfo.ContainerName, + "resourceName", resourceName) + return nil, fmt.Errorf(errMsg) + } + + if p.resctrlHinter != nil { + p.resctrlHinter.HintResourceAllocation(allocationInfo.AllocationMeta, resourceAllocation) + } - podResources[podUID].ContainerResources[containerName] = resourceAllocation + podResources[podUID].ContainerResources[containerName] = resourceAllocation + } } } if needUpdateMachineState { general.Infof("GetResourcesAllocation update machine state") - podResourceEntries := p.state.GetPodResourceEntries() - resourcesState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + podResourceEntries = p.state.GetPodResourceEntries() + resourcesState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Infof("GetResourcesAllocation GenerateMachineStateFromPodEntries failed with error: %v", err) return nil, fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err) @@ -798,42 +808,45 @@ func (p *DynamicPolicy) GetTopologyAwareResources(_ context.Context, p.RLock() defer p.RUnlock() - allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) - if allocationInfo == nil { + resourceAllocationInfo := p.state.GetResourceAllocationInfo(req.PodUid, req.ContainerName) + if resourceAllocationInfo == nil { return nil, fmt.Errorf("pod: %s, container: %s is not show up in memory plugin state", req.PodUid, req.ContainerName) } - topologyAwareQuantityList := util.GetTopologyAwareQuantityFromAssignmentsSize(allocationInfo.TopologyAwareAllocations) - resp := &pluginapi.GetTopologyAwareResourcesResponse{ - PodUid: allocationInfo.PodUid, - PodName: allocationInfo.PodName, - PodNamespace: allocationInfo.PodNamespace, - ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{ - ContainerName: allocationInfo.ContainerName, - }, - } + var resp *pluginapi.GetTopologyAwareResourcesResponse - if allocationInfo.CheckSideCar() { - resp.ContainerTopologyAwareResources.AllocatedResources = map[string]*pluginapi.TopologyAwareResource{ - string(v1.ResourceMemory): { + for resourceName, allocationInfo := range resourceAllocationInfo { + topologyAwareQuantityList := util.GetTopologyAwareQuantityFromAssignmentsSize(allocationInfo.TopologyAwareAllocations) + if resp == nil { + resp = &pluginapi.GetTopologyAwareResourcesResponse{ + PodUid: allocationInfo.PodUid, + PodName: allocationInfo.PodName, + PodNamespace: allocationInfo.PodNamespace, + ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{ + ContainerName: allocationInfo.ContainerName, + AllocatedResources: make(map[string]*pluginapi.TopologyAwareResource), + }, + } + } + + if allocationInfo.CheckSideCar() { + resp.ContainerTopologyAwareResources.AllocatedResources[string(resourceName)] = &pluginapi.TopologyAwareResource{ IsNodeResource: false, IsScalarResource: true, AggregatedQuantity: 0, OriginalAggregatedQuantity: 0, TopologyAwareQuantityList: nil, OriginalTopologyAwareQuantityList: nil, - }, - } - } else { - resp.ContainerTopologyAwareResources.AllocatedResources = map[string]*pluginapi.TopologyAwareResource{ - string(v1.ResourceMemory): { + } + } else { + resp.ContainerTopologyAwareResources.AllocatedResources[string(resourceName)] = &pluginapi.TopologyAwareResource{ IsNodeResource: false, IsScalarResource: true, AggregatedQuantity: float64(allocationInfo.AggregatedQuantity), OriginalAggregatedQuantity: float64(allocationInfo.AggregatedQuantity), TopologyAwareQuantityList: topologyAwareQuantityList, OriginalTopologyAwareQuantityList: topologyAwareQuantityList, - }, + } } } @@ -847,42 +860,46 @@ func (p *DynamicPolicy) GetTopologyAwareAllocatableResources(context.Context, p.RLock() defer p.RUnlock() - machineState := p.state.GetMachineState()[v1.ResourceMemory] + allocatableResources := make(map[string]*pluginapi.AllocatableTopologyAwareResource) + resourceMachineState := p.state.GetMachineState() numaNodes := p.topology.CPUDetails.NUMANodes().ToSliceInt() - topologyAwareAllocatableQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(machineState)) - topologyAwareCapacityQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(machineState)) - - var aggregatedAllocatableQuantity, aggregatedCapacityQuantity uint64 = 0, 0 - for _, numaNode := range numaNodes { - numaNodeState := machineState[numaNode] - if numaNodeState == nil { - return nil, fmt.Errorf("nil numaNodeState for NUMA: %d", numaNode) + + for resourceName, machineState := range resourceMachineState { + topologyAwareAllocatableQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(machineState)) + topologyAwareCapacityQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(machineState)) + + var aggregatedAllocatableQuantity, aggregatedCapacityQuantity uint64 = 0, 0 + for _, numaNode := range numaNodes { + numaNodeState := machineState[numaNode] + if numaNodeState == nil { + return nil, fmt.Errorf("nil numaNodeState for NUMA: %d", numaNode) + } + + topologyAwareAllocatableQuantityList = append(topologyAwareAllocatableQuantityList, &pluginapi.TopologyAwareQuantity{ + ResourceValue: float64(numaNodeState.Allocatable), + Node: uint64(numaNode), + }) + topologyAwareCapacityQuantityList = append(topologyAwareCapacityQuantityList, &pluginapi.TopologyAwareQuantity{ + ResourceValue: float64(numaNodeState.TotalMemSize), + Node: uint64(numaNode), + }) + aggregatedAllocatableQuantity += numaNodeState.Allocatable + aggregatedCapacityQuantity += numaNodeState.TotalMemSize } - topologyAwareAllocatableQuantityList = append(topologyAwareAllocatableQuantityList, &pluginapi.TopologyAwareQuantity{ - ResourceValue: float64(numaNodeState.Allocatable), - Node: uint64(numaNode), - }) - topologyAwareCapacityQuantityList = append(topologyAwareCapacityQuantityList, &pluginapi.TopologyAwareQuantity{ - ResourceValue: float64(numaNodeState.TotalMemSize), - Node: uint64(numaNode), - }) - aggregatedAllocatableQuantity += numaNodeState.Allocatable - aggregatedCapacityQuantity += numaNodeState.TotalMemSize + allocatableResources[string(resourceName)] = &pluginapi.AllocatableTopologyAwareResource{ + IsNodeResource: false, + IsScalarResource: true, + AggregatedAllocatableQuantity: float64(aggregatedAllocatableQuantity), + TopologyAwareAllocatableQuantityList: topologyAwareAllocatableQuantityList, + AggregatedCapacityQuantity: float64(aggregatedCapacityQuantity), + TopologyAwareCapacityQuantityList: topologyAwareCapacityQuantityList, + } } return &pluginapi.GetTopologyAwareAllocatableResourcesResponse{ - AllocatableResources: map[string]*pluginapi.AllocatableTopologyAwareResource{ - string(v1.ResourceMemory): { - IsNodeResource: false, - IsScalarResource: true, - AggregatedAllocatableQuantity: float64(aggregatedAllocatableQuantity), - TopologyAwareAllocatableQuantityList: topologyAwareAllocatableQuantityList, - AggregatedCapacityQuantity: float64(aggregatedCapacityQuantity), - TopologyAwareCapacityQuantityList: topologyAwareCapacityQuantityList, - }, - }, + AllocatableResources: allocatableResources, }, nil } @@ -894,6 +911,7 @@ func (p *DynamicPolicy) GetResourcePluginOptions(context.Context, PreStartRequired: false, WithTopologyAlignment: true, NeedReconcile: true, + ExtraResources: p.extraResourceNames, }, nil } @@ -940,7 +958,7 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, } }() - reqInt, _, err := util.GetQuantityFromResourceReq(req) + resourceReqInt, _, err := util.GetQuantityMapFromResourceReq(req) if err != nil { return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) } @@ -952,7 +970,7 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, "podType", req.PodType, "podRole", req.PodRole, "qosLevel", qosLevel, - "memoryReq(bytes)", reqInt, + "memoryReq map(bytes)", resourceReqInt, "hint", req.Hint) if req.ContainerType == pluginapi.ContainerType_INIT { @@ -999,6 +1017,7 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, p.Lock() defer func() { // calls sys-advisor to inform the latest container + // currently, sys-advisor only supports v1.ResourceMemory, and hugepages is not supported if p.enableMemoryAdvisor && respErr == nil && req.ContainerType != pluginapi.ContainerType_INIT { _, err := p.advisorClient.AddContainer(ctx, &advisorsvc.ContainerMetadata{ PodUid: req.PodUid, @@ -1010,7 +1029,7 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, Labels: maputil.CopySS(req.Labels), Annotations: maputil.CopySS(req.Annotations), QosLevel: qosLevel, - RequestQuantity: uint64(reqInt), + RequestQuantity: uint64(resourceReqInt[v1.ResourceMemory]), }) if err != nil { resp = nil @@ -1053,38 +1072,39 @@ func (p *DynamicPolicy) Allocate(ctx context.Context, return }() - allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) - if allocationInfo != nil && allocationInfo.AggregatedQuantity >= uint64(reqInt) && !util.PodInplaceUpdateResizing(req) { - general.InfoS("already allocated and meet requirement", + resourceAllocationInfo := p.state.GetResourceAllocationInfo(req.PodUid, req.ContainerName) + // The length of all current allocation for every resource should be the same as the length of requested resources. + if len(resourceAllocationInfo) > 0 && len(resourceAllocationInfo) != len(resourceReqInt) { + general.ErrorS(fmt.Errorf("number of existing allocated resources: %d does not match number of resource requests: %d", + len(resourceAllocationInfo), len(resourceReqInt)), + "allocation error", "podNamespace", req.PodNamespace, "podName", req.PodName, - "containerName", req.ContainerName, - "memoryReq(bytes)", reqInt, - "currentResult(bytes)", allocationInfo.AggregatedQuantity) - return &pluginapi.ResourceAllocationResponse{ - PodUid: req.PodUid, - PodNamespace: req.PodNamespace, - PodName: req.PodName, - ContainerName: req.ContainerName, - ContainerType: req.ContainerType, - ContainerIndex: req.ContainerIndex, - PodRole: req.PodRole, - PodType: req.PodType, - ResourceName: string(v1.ResourceMemory), - AllocationResult: &pluginapi.ResourceAllocation{ - ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ - string(v1.ResourceMemory): { - OciPropertyName: util.OCIPropertyNameCPUSetMems, - IsNodeResource: false, - IsScalarResource: true, - AllocatedQuantity: float64(allocationInfo.AggregatedQuantity), - AllocationResult: allocationInfo.NumaAllocationResult.String(), - }, - }, - }, - Labels: general.DeepCopyMap(req.Labels), - Annotations: general.DeepCopyMap(req.Annotations), - }, nil + "containerName", req.ContainerName) + return nil, fmt.Errorf("number of existing allocated resources: %d does not match number of resource requests: %d", + len(resourceAllocationInfo), len(resourceReqInt)) + } + + for resName, allocationInfo := range resourceAllocationInfo { + reqInt, ok := resourceReqInt[resName] + if !ok { + general.ErrorS(fmt.Errorf("unable to find request quantity for resource that is already allocated"), + "allocation error", + "podNamespace", req.PodNamespace, + "podName", req.PodName, + "containerName", req.ContainerName, + "resourceName", resName) + return nil, fmt.Errorf("unable to find request quantity for resource that is already allocated") + } + + if allocationInfo != nil && allocationInfo.AggregatedQuantity >= uint64(reqInt) && !util.PodInplaceUpdateResizing(req) { + general.InfoS("already allocated and meet requirement", + "podNamespace", req.PodNamespace, + "podName", req.PodName, + "containerName", req.ContainerName, + "memoryReq(bytes)", resourceReqInt, + "currentResult(bytes)", allocationInfo.AggregatedQuantity) + } } if p.allocationHandlers[qosLevel] == nil { @@ -1115,7 +1135,8 @@ func (p *DynamicPolicy) removePod(podUID string, persistCheckpoint bool) error { delete(podEntries, podUID) } - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("pod: %s, GenerateMachineStateFromPodEntries failed with error: %v", podUID, err) return fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err) @@ -1145,7 +1166,8 @@ func (p *DynamicPolicy) removeContainer(podUID, containerName string, persistChe return nil } - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("pod: %s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", podUID, containerName, err) return fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err) @@ -1278,7 +1300,7 @@ func (p *DynamicPolicy) hasLastLevelEnhancementKey(lastLevelEnhancementKey strin func (p *DynamicPolicy) checkNonBindingShareCoresMemoryResource(req *pluginapi.ResourceRequest) (bool, error) { reqInt, _, err := util.GetPodAggregatedRequestResource(req) if err != nil { - return false, fmt.Errorf("GetQuantityFromResourceReq failed with error: %v", err) + return false, fmt.Errorf("GetQuantityMapFromResourceReq failed with error: %v", err) } shareCoresAllocated := uint64(reqInt) diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_advisor_handler.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_advisor_handler.go index 021f017e44..18d8668e5f 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_advisor_handler.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_advisor_handler.go @@ -379,7 +379,8 @@ func (p *DynamicPolicy) handleAdvisorResp(advisorResp *advisorsvc.ListAndWatchRe } } - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { return fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err) } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go index 5a372798e1..cade65c3ce 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers.go @@ -108,150 +108,156 @@ func (p *DynamicPolicy) numaBindingAllocationHandler(ctx context.Context, return p.numaBindingAllocationSidecarHandler(ctx, req, qosLevel, persistCheckpoint) } - // use the pod aggregated request to instead of main container. - podAggregatedRequest, _, err := util.GetPodAggregatedRequestResource(req) + podAggregatedResourceRequests, _, err := util.GetPodAggregatedRequestResourceMap(req) if err != nil { - return nil, fmt.Errorf("GetPodAggregatedRequestResource failed with error: %v", err) + return nil, fmt.Errorf("GetPodAggregatedRequestResourceMao failed with error: %v", err) } + // resourceAllocationInfo stores the final allocation info for each resource name. + resourceAllocationInfo := make(map[v1.ResourceName]*state.AllocationInfo) machineState := p.state.GetMachineState() - memoryState := machineState[v1.ResourceMemory] - podResourceEntries := p.state.GetPodResourceEntries() - podEntries := podResourceEntries[v1.ResourceMemory] - allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) - if allocationInfo != nil { - if allocationInfo.AggregatedQuantity >= uint64(podAggregatedRequest) && !util.PodInplaceUpdateResizing(req) { - general.InfoS("already allocated and meet requirement", + for resourceName, requestQuantity := range podAggregatedResourceRequests { + memoryState := machineState[resourceName] + + podEntries := podResourceEntries[resourceName] + + allocationInfo := p.state.GetAllocationInfo(resourceName, req.PodUid, req.ContainerName) + if allocationInfo != nil { + if allocationInfo.AggregatedQuantity >= uint64(requestQuantity) && !util.PodInplaceUpdateResizing(req) { + general.InfoS("already allocated and meet requirement", + "podNamespace", req.PodNamespace, + "podName", req.PodName, + "containerName", req.ContainerName, + "memoryReq(bytes)", requestQuantity, + "currentResult(bytes)", allocationInfo.AggregatedQuantity) + + resourceAllocationInfo[resourceName] = allocationInfo + continue + } + general.InfoS("not meet requirement, clear record and re-allocate", "podNamespace", req.PodNamespace, "podName", req.PodName, "containerName", req.ContainerName, - "memoryReq(bytes)", podAggregatedRequest, + "memoryReq(bytes)", requestQuantity, "currentResult(bytes)", allocationInfo.AggregatedQuantity) - resp, packErr := packAllocationResponse(allocationInfo, req, nil) - if packErr != nil { - general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", - req.PodNamespace, req.PodName, req.ContainerName, packErr) - return nil, fmt.Errorf("packAllocationResponse failed with error: %v", packErr) + if !allocationInfo.CheckNUMABinding() { + general.Errorf("pod: %s/%s, container: %s, resource %s, request to memory inplace update resize allocation, but origin allocation info is not numa_binding, reject it", + req.PodNamespace, req.PodName, req.ContainerName, resourceName) + return nil, fmt.Errorf("cannot change from non-numa_binding to numa_binding during inplace update for resource: %s", resourceName) } - return resp, nil - } - general.InfoS("not meet requirement, clear record and re-allocate", - "podNamespace", req.PodNamespace, - "podName", req.PodName, - "containerName", req.ContainerName, - "memoryReq(bytes)", podAggregatedRequest, - "currentResult(bytes)", allocationInfo.AggregatedQuantity) - if !allocationInfo.CheckNUMABinding() { - general.Errorf("pod: %s/%s, container: %s request to memory inplace update resize allocation, but origin allocation info is not numa_binding, reject it", - req.PodNamespace, req.PodName, req.ContainerName) - return nil, fmt.Errorf("cannot change from non-numa_binding to numa_binding during inplace update") - } + // remove the main container of this pod (the main container involve the whole pod requests), and the + // sidecar container request in state is zero. + containerEntries := podEntries[req.PodUid] + delete(containerEntries, req.ContainerName) - // remove the main container of this pod (the main container involve the whole pod requests), and the - // sidecar container request in state is zero. - containerEntries := podEntries[req.PodUid] - delete(containerEntries, req.ContainerName) + var stateErr error + memoryState, stateErr = state.GenerateMemoryStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podEntries, p.state.GetReservedMemory(), resourceName) + if stateErr != nil { + general.ErrorS(stateErr, "generateMemoryMachineStateByPodEntries failed", + "podNamespace", req.PodNamespace, + "podName", req.PodName, + "containerName", req.ContainerName, + "memoryReq(bytes)", requestQuantity, + "currentResult(bytes)", allocationInfo.AggregatedQuantity) + return nil, fmt.Errorf("generateMemoryMachineStateByPodEntries failed with error: %v", stateErr) + } + } else if util.PodInplaceUpdateResizing(req) { + general.Errorf("pod %s/%s, container: %s, resource: %s, request to memory inplace update resize, but no origin allocation info", + req.PodNamespace, req.PodName, req.ContainerName, resourceName) + return nil, fmt.Errorf("no origin allocation info for resource %s", resourceName) + } - var stateErr error - memoryState, stateErr = state.GenerateMemoryStateFromPodEntries(p.state.GetMachineInfo(), podEntries, p.state.GetReservedMemory()) - if stateErr != nil { - general.ErrorS(stateErr, "generateMemoryMachineStateByPodEntries failed", + // call calculateMemoryAllocation to update memoryState in-place, + // and we can use this adjusted state to pack allocation results + err = p.calculateMemoryAllocation(req, memoryState, qosLevel, requestQuantity) + if err != nil { + general.ErrorS(err, "unable to allocate Memory", "podNamespace", req.PodNamespace, "podName", req.PodName, "containerName", req.ContainerName, - "memoryReq(bytes)", podAggregatedRequest, - "currentResult(bytes)", allocationInfo.AggregatedQuantity) - return nil, fmt.Errorf("generateMemoryMachineStateByPodEntries failed with error: %v", stateErr) + "memoryReq", requestQuantity) + return nil, err } - } else if util.PodInplaceUpdateResizing(req) { - general.Errorf("pod %s/%s, container: %s request to memory inplace update resize, but no origin allocation info", - req.PodNamespace, req.PodName, req.ContainerName) - return nil, fmt.Errorf("no origin allocation info") - } - // call calculateMemoryAllocation to update memoryState in-place, - // and we can use this adjusted state to pack allocation results - err = p.calculateMemoryAllocation(req, memoryState, qosLevel, podAggregatedRequest) - if err != nil { - general.ErrorS(err, "unable to allocate Memory", + topologyAwareAllocations := make(map[int]uint64) + result := machine.NewCPUSet() + var aggregatedQuantity uint64 = 0 + for numaNode, numaNodeState := range memoryState { + if numaNodeState.PodEntries[req.PodUid][req.ContainerName] != nil { + result = result.Union(machine.NewCPUSet(numaNode)) + aggregatedQuantity += numaNodeState.PodEntries[req.PodUid][req.ContainerName].AggregatedQuantity + topologyAwareAllocations[numaNode] = numaNodeState.PodEntries[req.PodUid][req.ContainerName].AggregatedQuantity + } + } + + general.InfoS("allocate memory successfully", "podNamespace", req.PodNamespace, "podName", req.PodName, "containerName", req.ContainerName, - "memoryReq", podAggregatedRequest) - return nil, err - } + "reqMemoryQuantity", requestQuantity, + "numaAllocationResult", result.String()) - topologyAwareAllocations := make(map[int]uint64) - result := machine.NewCPUSet() - var aggregatedQuantity uint64 = 0 - for numaNode, numaNodeState := range memoryState { - if numaNodeState.PodEntries[req.PodUid][req.ContainerName] != nil { - result = result.Union(machine.NewCPUSet(numaNode)) - aggregatedQuantity += numaNodeState.PodEntries[req.PodUid][req.ContainerName].AggregatedQuantity - topologyAwareAllocations[numaNode] = numaNodeState.PodEntries[req.PodUid][req.ContainerName].AggregatedQuantity + allocationInfo = &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(req, qosLevel), + AggregatedQuantity: aggregatedQuantity, + NumaAllocationResult: result.Clone(), + TopologyAwareAllocations: topologyAwareAllocations, } - } - general.InfoS("allocate memory successfully", - "podNamespace", req.PodNamespace, - "podName", req.PodName, - "containerName", req.ContainerName, - "reqMemoryQuantity", podAggregatedRequest, - "numaAllocationResult", result.String()) + if !qosutil.AnnotationsIndicateNUMAExclusive(req.Annotations) { + // shared cores with numa binding and non distribute evenly across numa pods cannot occupy multiple NUMA nodes + if (qosLevel == apiconsts.PodAnnotationQoSLevelSharedCores || !qosutil.AnnotationsIndicateDistributeEvenlyAcrossNuma(req.Annotations)) && len(req.Hint.Nodes) != 1 { + return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+ + "not equal to 1", len(req.Hint.Nodes)) + } + allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes) + } - allocationInfo = &state.AllocationInfo{ - AllocationMeta: state.GenerateMemoryContainerAllocationMeta(req, qosLevel), - AggregatedQuantity: aggregatedQuantity, - NumaAllocationResult: result.Clone(), - TopologyAwareAllocations: topologyAwareAllocations, + // Set the final allocationInfo for the resource name + resourceAllocationInfo[resourceName] = allocationInfo } - if !qosutil.AnnotationsIndicateNUMAExclusive(req.Annotations) { - if len(req.Hint.Nodes) != 1 { - return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+ - "not equal to 1", len(req.Hint.Nodes)) + for resName, allocationInfo := range resourceAllocationInfo { + p.state.SetAllocationInfo(resName, req.PodUid, req.ContainerName, allocationInfo, persistCheckpoint) + + // only v1.ResourceMemory can be adjusted + if resName == v1.ResourceMemory { + err = p.adjustAllocationEntries(persistCheckpoint) + if err != nil { + return nil, fmt.Errorf("adjustAllocationEntries failed with error: %v", err) + } + } + + // update the numa allocation result for numa binding pod + err = p.updateSpecifiedNUMAAllocation(ctx, allocationInfo) + if err != nil { + general.Errorf("pod: %s/%s, container: %s updateSpecifiedNUMAAllocation failed with error: %v", + req.PodNamespace, req.PodName, req.ContainerName, err) + return nil, fmt.Errorf("updateSpecifiedNUMAAllocation failed with error: %v", err) } - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) } - p.state.SetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName, allocationInfo, persistCheckpoint) + resourceTopologyAllocationsAnnotations := getMemoryTopologyAllocationsAnnotations(resourceAllocationInfo, p.topologyAllocationAnnotationKey) podResourceEntries = p.state.GetPodResourceEntries() - machineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + machineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { - general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", + general.Errorf("pod: %s/%s, container: %s, GenerateMachineStateFromPodEntries failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) return nil, fmt.Errorf("calculate memoryState by updated pod entries failed with error: %v", err) } p.state.SetMachineState(machineState, persistCheckpoint) - err = p.adjustAllocationEntries(persistCheckpoint) - if err != nil { - return nil, fmt.Errorf("adjustAllocationEntries failed with error: %v", err) - } - - resp, err := packAllocationResponse(allocationInfo, req, nil) - if err != nil { - general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", - req.PodNamespace, req.PodName, req.ContainerName, err) - return nil, fmt.Errorf("packAllocationResponse failed with error: %v", err) - } - - // update the numa allocation result for numa binding pod - err = p.updateSpecifiedNUMAAllocation(ctx, allocationInfo) - if err != nil { - general.Errorf("pod: %s/%s, container: %s updateSpecifiedNUMAAllocation failed with error: %v", - req.PodNamespace, req.PodName, req.ContainerName, err) - return nil, fmt.Errorf("updateSpecifiedNUMAAllocation failed with error: %v", err) - } - - return resp, nil + return packAllocationResponse(resourceAllocationInfo, req, resourceTopologyAllocationsAnnotations) } +// reclaimedCoresBestEffortNUMABindingAllocationHandler allocates reclaimed cores with numa binding pods in best-effort manner. +// Note that this only supports v1.ResourceMemory, hugepages is not supported. func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx context.Context, req *pluginapi.ResourceRequest, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { @@ -302,7 +308,9 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx "containerName", req.ContainerName, "memoryReq(bytes)", allocationInfo.AggregatedQuantity, "currentResult(bytes)", allocationInfo.AggregatedQuantity) - return packAllocationResponse(allocationInfo, req, nil) + return packAllocationResponse(map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: allocationInfo, + }, req, nil) } general.InfoS("not meet requirement, clear record and re-allocate", @@ -318,7 +326,7 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx delete(containerEntries, req.ContainerName) var stateErr error - memoryState, stateErr = state.GenerateMemoryStateFromPodEntries(p.state.GetMachineInfo(), podEntries, p.state.GetReservedMemory()) + memoryState, stateErr = state.GenerateMemoryStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podEntries, p.state.GetReservedMemory(), v1.ResourceMemory) if stateErr != nil { general.ErrorS(stateErr, "generateMemoryMachineStateByPodEntries failed", "podNamespace", req.PodNamespace, @@ -342,7 +350,7 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualBindingNUMAs.Contains(int(req.Hint.Nodes[0])) || !nonReclaimActualBindingNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) { - allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0]) + allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes) numaAllocationResult = machine.NewCPUSet(int(req.Hint.Nodes[0])) } else { numaAllocationResult = nonReclaimActualBindingNUMAs @@ -356,7 +364,8 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx p.state.SetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName, allocationInfo, persistCheckpoint) - machineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetPodResourceEntries(), p.state.GetMachineState(), p.state.GetReservedMemory()) + machineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), p.state.GetPodResourceEntries(), + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -370,12 +379,7 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx } } - resp, err := packAllocationResponse(allocationInfo, req, p.getReclaimedResourceAllocationAnnotations(allocationInfo)) - if err != nil { - general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", - req.PodNamespace, req.PodName, req.ContainerName, err) - return nil, fmt.Errorf("packAllocationResponse failed with error: %v", err) - } + var resourceTopologyAllocationsAnnotations map[v1.ResourceName]map[string]string // we only support updating the NUMA allocation results for pods with explicit NUMA binding annotation if qosutil.AnnotationsIndicateNUMABinding(req.Annotations) { @@ -385,6 +389,21 @@ func (p *DynamicPolicy) reclaimedCoresBestEffortNUMABindingAllocationHandler(ctx req.PodNamespace, req.PodName, req.ContainerName, err) return nil, fmt.Errorf("updateSpecifiedNUMAAllocation failed with error: %v", err) } + resourceTopologyAllocationsAnnotations = getMemoryTopologyAllocationsAnnotations(map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: allocationInfo, + }, p.topologyAllocationAnnotationKey) + } + + resp, err := packAllocationResponse(map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: allocationInfo, + }, req, + map[v1.ResourceName]map[string]string{v1.ResourceMemory: p.getReclaimedResourceAllocationAnnotations(allocationInfo)}, + resourceTopologyAllocationsAnnotations, + ) + if err != nil { + general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", + req.PodNamespace, req.PodName, req.ContainerName, err) + return nil, fmt.Errorf("packAllocationResponse failed with error: %v", err) } general.InfoS("allocate memory successfully", @@ -409,36 +428,43 @@ func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingAllocationHandler(_ cont func (p *DynamicPolicy) numaBindingAllocationSidecarHandler(_ context.Context, req *pluginapi.ResourceRequest, qosLevel string, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { + resourceAllocationInfo := make(map[v1.ResourceName]*state.AllocationInfo) podResourceEntries := p.state.GetPodResourceEntries() - podEntries := podResourceEntries[v1.ResourceMemory] - if podEntries[req.PodUid] == nil { - general.Infof("there is no pod entry, pod: %s/%s, sidecar: %s, waiting next reconcile", - req.PodNamespace, req.PodName, req.ContainerName) - return &pluginapi.ResourceAllocationResponse{}, nil - } + for resourceName := range req.ResourceRequests { + podEntries := podResourceEntries[v1.ResourceName(resourceName)] + if podEntries[req.PodUid] == nil { + general.Infof("there is no pod entry, pod: %s/%s, sidecar: %s, waiting next reconcile", + req.PodNamespace, req.PodName, req.ContainerName) + return &pluginapi.ResourceAllocationResponse{}, nil + } - // todo: consider sidecar without reconcile in vpa - mainContainerAllocationInfo, ok := podEntries.GetMainContainerAllocation(req.PodUid) - if !ok { - general.Infof("main container is not found for pod: %s/%s, sidecar: %s, waiting next reconcile", - req.PodNamespace, req.PodName, req.ContainerName) - return &pluginapi.ResourceAllocationResponse{}, nil - } + // todo: consider sidecar without reconcile in vpa + mainContainerAllocationInfo, ok := podEntries.GetMainContainerAllocation(req.PodUid) + if !ok { + general.Infof("main container is not found for pod: %s/%s, sidecar: %s, waiting next reconcile", + req.PodNamespace, req.PodName, req.ContainerName) + return &pluginapi.ResourceAllocationResponse{}, nil + } + + allocationInfo := &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(req, qosLevel), + AggregatedQuantity: 0, // not count sidecar quantity + TopologyAwareAllocations: nil, // not count sidecar quantity + } - allocationInfo := &state.AllocationInfo{ - AllocationMeta: state.GenerateMemoryContainerAllocationMeta(req, qosLevel), - AggregatedQuantity: 0, // not count sidecar quantity - TopologyAwareAllocations: nil, // not count sidecar quantity + applySidecarAllocationInfoFromMainContainer(allocationInfo, mainContainerAllocationInfo) + + resourceAllocationInfo[v1.ResourceName(resourceName)] = allocationInfo } - applySidecarAllocationInfoFromMainContainer(allocationInfo, mainContainerAllocationInfo) + for resourceName, allocationInfo := range resourceAllocationInfo { + p.state.SetAllocationInfo(resourceName, req.PodUid, req.ContainerName, allocationInfo, persistCheckpoint) + } - // update pod entries directly. if one of subsequent steps is failed, - // we will delete current allocationInfo from podEntries in defer function of allocation function. - p.state.SetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName, allocationInfo, persistCheckpoint) podResourceEntries = p.state.GetPodResourceEntries() - resourcesState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Infof("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -446,7 +472,7 @@ func (p *DynamicPolicy) numaBindingAllocationSidecarHandler(_ context.Context, } p.state.SetMachineState(resourcesState, persistCheckpoint) - resp, err := packAllocationResponse(allocationInfo, req, nil) + resp, err := packAllocationResponse(resourceAllocationInfo, req, nil) if err != nil { general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -465,38 +491,47 @@ func (p *DynamicPolicy) allocateNUMAsWithoutNUMABindingPods(_ context.Context, } machineState := p.state.GetMachineState() - resourceState := machineState[v1.ResourceMemory] - numaWithoutNUMABindingPods := resourceState.GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() + resourceAllocationInfo := make(map[v1.ResourceName]*state.AllocationInfo) - allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) - if allocationInfo != nil { - general.Infof("pod: %s/%s, container: %s change cpuset.mems from: %s to %s", - req.PodNamespace, req.PodName, req.ContainerName, allocationInfo.NumaAllocationResult.String(), numaWithoutNUMABindingPods.String()) - } + for resourceName := range req.ResourceRequests { + resourceState := machineState[v1.ResourceName(resourceName)] + numaWithoutNUMABindingPods := resourceState.GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() - // use real container request size here - reqInt, _, err := util.GetQuantityFromResourceReq(req) - if err != nil { - return nil, fmt.Errorf("GetQuantityFromResourceReq failed with error: %v", err) + allocationInfo := p.state.GetAllocationInfo(v1.ResourceName(resourceName), req.PodUid, req.ContainerName) + if allocationInfo != nil { + general.Infof("pod: %s/%s, container: %s change cpuset.mems from: %s to %s", + req.PodNamespace, req.PodName, req.ContainerName, allocationInfo.NumaAllocationResult.String(), numaWithoutNUMABindingPods.String()) + } + + reqInt, _, err := util.GetQuantityFromResourceRequests(req.ResourceRequests, resourceName, req.Annotations) + if err != nil { + return nil, fmt.Errorf("GetQuantityFromResourceReq failed with error: %v", err) + } + + allocationInfo = &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(req, qosLevel), + NumaAllocationResult: numaWithoutNUMABindingPods.Clone(), + AggregatedQuantity: uint64(reqInt), + } + + resourceAllocationInfo[v1.ResourceName(resourceName)] = allocationInfo } - allocationInfo = &state.AllocationInfo{ - AllocationMeta: state.GenerateMemoryContainerAllocationMeta(req, qosLevel), - NumaAllocationResult: numaWithoutNUMABindingPods.Clone(), - AggregatedQuantity: uint64(reqInt), + for resourceName, allocationInfo := range resourceAllocationInfo { + p.state.SetAllocationInfo(resourceName, req.PodUid, req.ContainerName, allocationInfo, persistCheckpoint) } - p.state.SetAllocationInfo(v1.ResourceMemory, allocationInfo.PodUid, allocationInfo.ContainerName, allocationInfo, persistCheckpoint) podResourceEntries := p.state.GetPodResourceEntries() - machineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, machineState, p.state.GetReservedMemory()) + machineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, machineState, p.state.GetReservedMemory(), + p.extraResourceNames) if err != nil { general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) return nil, fmt.Errorf("calculate resourceState by updated pod entries failed with error: %v", err) } - resp, err := packAllocationResponse(allocationInfo, req, nil) + resp, err := packAllocationResponse(resourceAllocationInfo, req, nil) if err != nil { general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -509,6 +544,7 @@ func (p *DynamicPolicy) allocateNUMAsWithoutNUMABindingPods(_ context.Context, // allocateTargetNUMAs returns target numa nodes as allocation results, // and it will store the allocation in states. +// Note that allocateTargetNUMAs only allocates v1.ResourceMemory. func (p *DynamicPolicy) allocateTargetNUMAs(req *pluginapi.ResourceRequest, qosLevel string, targetNUMAs machine.CPUSet, persistCheckpoint bool, ) (*pluginapi.ResourceAllocationResponse, error) { @@ -530,14 +566,17 @@ func (p *DynamicPolicy) allocateTargetNUMAs(req *pluginapi.ResourceRequest, p.state.SetAllocationInfo(v1.ResourceMemory, allocationInfo.PodUid, allocationInfo.ContainerName, allocationInfo, persistCheckpoint) podResourceEntries := p.state.GetPodResourceEntries() - machineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + machineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) return nil, fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err) } - resp, err := packAllocationResponse(allocationInfo, req, nil) + resp, err := packAllocationResponse(map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: allocationInfo, + }, req, nil) if err != nil { general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -572,7 +611,8 @@ func (p *DynamicPolicy) adjustAllocationEntries(persistCheckpoint bool) error { p.adjustAllocationEntriesForSystemCores(numaSetChangedContainers, podEntries, machineState) p.adjustAllocationEntriesForReclaimedCores(numaSetChangedContainers, podEntries, machineState) - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { return fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err) } @@ -597,14 +637,21 @@ func (p *DynamicPolicy) adjustAllocationEntries(persistCheckpoint bool) error { // it will update the passed by machineState in-place; so the function will be // called `calculateXXX` rather than `allocateXXX` func (p *DynamicPolicy) calculateMemoryAllocation(req *pluginapi.ResourceRequest, machineState state.NUMANodeMap, qosLevel string, podAggregatedRequest int) error { + numaBinding := qosutil.AnnotationsIndicateNUMABinding(req.Annotations) + numaExclusive := qosutil.AnnotationsIndicateNUMAExclusive(req.Annotations) + distributeEvenlyAcrossNUMANodes := qosutil.AnnotationsIndicateDistributeEvenlyAcrossNuma(req.Annotations) + if req.Hint == nil { return fmt.Errorf("hint is nil") } else if len(req.Hint.Nodes) == 0 { return fmt.Errorf("hint is empty") - } else if qosutil.AnnotationsIndicateNUMABinding(req.Annotations) && - !qosutil.AnnotationsIndicateNUMAExclusive(req.Annotations) && + } else if numaBinding && !numaExclusive && !distributeEvenlyAcrossNUMANodes && len(req.Hint.Nodes) > 1 { return fmt.Errorf("NUMA not exclusive binding container has request larger than 1 NUMA") + } else if qosLevel == apiconsts.PodAnnotationQoSLevelSharedCores && numaBinding && distributeEvenlyAcrossNUMANodes { + return fmt.Errorf("shared cores with numa binding and distribute evenly across numa is not supported at the same time") + } else if numaExclusive && distributeEvenlyAcrossNUMANodes { + return fmt.Errorf("NUMA exclusive and distribute evenly across numa is not supported at the same time") } hintNumaNodes := machine.NewCPUSet(util.HintToIntArray(req.Hint)...) @@ -624,7 +671,8 @@ func (p *DynamicPolicy) calculateMemoryAllocation(req *pluginapi.ResourceRequest return fmt.Errorf("calculateExclusiveMemory failed with error: %v", err) } } else { - leftQuantity, err = calculateMemoryInNumaNodes(req, machineState, hintNumaNodes.ToSliceInt(), uint64(podAggregatedRequest), qosLevel) + leftQuantity, err = calculateMemoryInNumaNodes(req, machineState, hintNumaNodes.ToSliceInt(), + uint64(podAggregatedRequest), qosLevel, distributeEvenlyAcrossNUMANodes) if err != nil { return fmt.Errorf("calculateMemoryInNumaNodes failed with error: %v", err) } @@ -695,8 +743,20 @@ func calculateExclusiveMemory(req *pluginapi.ResourceRequest, // the given container, and returns the remaining un-satisfied quantity. func calculateMemoryInNumaNodes(req *pluginapi.ResourceRequest, machineState state.NUMANodeMap, numaNodes []int, - reqQuantity uint64, qosLevel string, + reqQuantity uint64, qosLevel string, distributeEvenlyAcrossNuma bool, ) (leftQuantity uint64, err error) { + var requestPerNuma uint64 + if distributeEvenlyAcrossNuma { + if len(numaNodes) == 0 { + return reqQuantity, fmt.Errorf("NUMA nodes is empty and need to distribute evenly across numa nodes") + } + + if int(reqQuantity)%len(numaNodes) != 0 { + return reqQuantity, fmt.Errorf("request quantity %d is not divisible by numa nodes number %d", reqQuantity, len(numaNodes)) + } + + requestPerNuma = reqQuantity / uint64(len(numaNodes)) + } for _, numaNode := range numaNodes { var curNumaNodeAllocated uint64 @@ -705,13 +765,27 @@ func calculateMemoryInNumaNodes(req *pluginapi.ResourceRequest, return reqQuantity, fmt.Errorf("NUMA: %d has nil state", numaNode) } - if reqQuantity < numaNodeState.Free { - curNumaNodeAllocated = reqQuantity - reqQuantity = 0 + if distributeEvenlyAcrossNuma { + // allocate exactly requestPerNuma from each NUMA node + if numaNodeState.Free < requestPerNuma { + return reqQuantity, fmt.Errorf( + "NUMA %d does not have enough free memory to distribute evenly across numa: need %d, have %d", + numaNode, requestPerNuma, numaNodeState.Free, + ) + } + + curNumaNodeAllocated = requestPerNuma + reqQuantity -= requestPerNuma } else { - curNumaNodeAllocated = numaNodeState.Free - reqQuantity -= numaNodeState.Free + if reqQuantity < numaNodeState.Free { + curNumaNodeAllocated = reqQuantity + reqQuantity = 0 + } else { + curNumaNodeAllocated = numaNodeState.Free + reqQuantity -= numaNodeState.Free + } } + numaNodeState.Free -= curNumaNodeAllocated numaNodeState.Allocated += curNumaNodeAllocated @@ -736,15 +810,17 @@ func calculateMemoryInNumaNodes(req *pluginapi.ResourceRequest, return reqQuantity, nil } -// packAllocationResponse fills pluginapi.ResourceAllocationResponse with information from AllocationInfo and pluginapi.ResourceRequest -func packAllocationResponse(allocationInfo *state.AllocationInfo, req *pluginapi.ResourceRequest, resourceAllocationAnnotations map[string]string) (*pluginapi.ResourceAllocationResponse, error) { - if allocationInfo == nil { - return nil, fmt.Errorf("packAllocationResponse got nil allocationInfo") +// packAllocationResponse fills pluginapi.ResourceAllocationResponse with information from map of resource to AllocationInfo and pluginapi.ResourceRequest +func packAllocationResponse(resourceAllocationInfo map[v1.ResourceName]*state.AllocationInfo, req *pluginapi.ResourceRequest, + resourceAllocationAnnotations ...map[v1.ResourceName]map[string]string, +) (*pluginapi.ResourceAllocationResponse, error) { + if resourceAllocationInfo == nil { + return nil, fmt.Errorf("packAllocationResponse got nil resourceAllocationInfo") } else if req == nil { return nil, fmt.Errorf("packAllocationResponse got nil request") } - return &pluginapi.ResourceAllocationResponse{ + resp := &pluginapi.ResourceAllocationResponse{ PodUid: req.PodUid, PodNamespace: req.PodNamespace, PodName: req.PodName, @@ -755,25 +831,43 @@ func packAllocationResponse(allocationInfo *state.AllocationInfo, req *pluginapi PodType: req.PodType, ResourceName: string(v1.ResourceMemory), AllocationResult: &pluginapi.ResourceAllocation{ - ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ - string(v1.ResourceMemory): { - OciPropertyName: util.OCIPropertyNameCPUSetMems, - IsNodeResource: false, - IsScalarResource: true, - Annotations: resourceAllocationAnnotations, - AllocatedQuantity: float64(allocationInfo.AggregatedQuantity), - AllocationResult: allocationInfo.NumaAllocationResult.String(), - ResourceHints: &pluginapi.ListOfTopologyHints{ - Hints: []*pluginapi.TopologyHint{ - req.Hint, - }, - }, - }, - }, + ResourceAllocation: make(map[string]*pluginapi.ResourceAllocationInfo), }, Labels: general.DeepCopyMap(req.Labels), Annotations: general.DeepCopyMap(req.Annotations), - }, nil + } + + for resourceName, allocationInfo := range resourceAllocationInfo { + if allocationInfo == nil { + continue + } + + mergedResourceAnnotations := make([]map[string]string, 0, len(resourceAllocationAnnotations)) + for _, ra := range resourceAllocationAnnotations { + if ra == nil { + continue + } + if annos, ok := ra[resourceName]; ok { + mergedResourceAnnotations = append(mergedResourceAnnotations, annos) + } + } + + resp.AllocationResult.ResourceAllocation[string(resourceName)] = &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + Annotations: general.MergeAnnotations(mergedResourceAnnotations...), + AllocatedQuantity: float64(allocationInfo.AggregatedQuantity), + AllocationResult: allocationInfo.NumaAllocationResult.String(), + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{ + req.Hint, + }, + }, + } + } + + return resp, nil } func (p *DynamicPolicy) adjustAllocationEntriesForSharedCores(numaSetChangedContainers map[string]map[string]*state.AllocationInfo, diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers_test.go index da6cc55a60..dd495390b6 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_allocation_handlers_test.go @@ -18,6 +18,7 @@ package dynamicpolicy import ( "context" + "fmt" "io/ioutil" "os" "testing" @@ -122,7 +123,7 @@ func TestSharedCoresAllocationHandler(t *testing.T) { as.Nil(err) defer os.RemoveAll(tmpDir) - policy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) + policy, err := getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) as.Nil(err) as.NotNil(policy) @@ -416,11 +417,17 @@ func TestNumaBindingAllocationHandler(t *testing.T) { Topology: []info.Node{ { Memory: 100 * 1024 * 1024 * 1024, + HugePages: []info.HugePagesInfo{ + { + PageSize: 2 * 1024, + NumPages: 1024, + }, + }, }, }, } - policy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) + policy, err := getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) as.Nil(err) // Pre-populate state for some tests @@ -516,6 +523,29 @@ func TestNumaBindingAllocationHandler(t *testing.T) { qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, expectErr: true, // Should fail because no origin allocation info (simulated by not allocating first) }, + { + name: "allocate memory and hugepages resources", + req: &pluginapi.ResourceRequest{ + PodUid: "pod-new-hugepages", + PodNamespace: "default", + PodName: "pod-new-hugepages", + ContainerName: "container-1", + ContainerType: pluginapi.ContainerType_MAIN, + Annotations: map[string]string{ + apiconsts.PodAnnotationMemoryEnhancementNumaBinding: apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1024 * 1024, + "hugepages-2Mi": 2 * 1024, + }, + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + }, + }, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + expectErr: false, + }, } // For the "inplace update resize: non-binding to binding" test, we need to first allocate it WITHOUT binding, then try to update WITH binding. @@ -819,3 +849,477 @@ func TestCalculateMemoryAllocation(t *testing.T) { }) } } + +func TestCalculateMemoryInNumaNodes(t *testing.T) { + t.Parallel() + + type args struct { + req *pluginapi.ResourceRequest + machineState state.NUMANodeMap + numaNodes []int + reqQuantity uint64 + qosLevel string + distributeEvenlyAcrossNuma bool + } + tests := []struct { + name string + args args + want uint64 + wantErr bool + wantMachine state.NUMANodeMap + }{ + { + name: "distributeEvenlyAcrossNuma=true, empty numaNodes", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + machineState: state.NUMANodeMap{}, + numaNodes: []int{}, + reqQuantity: 100, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: true, + }, + want: 100, + wantErr: true, + wantMachine: state.NUMANodeMap{}, + }, + { + name: "distributeEvenlyAcrossNuma=true, reqQuantity not divisible", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{Free: 200, Allocated: 0}, + 1: &state.NUMANodeState{Free: 200, Allocated: 0}, + }, + numaNodes: []int{0, 1}, + reqQuantity: 101, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: true, + }, + want: 101, + wantErr: true, + wantMachine: state.NUMANodeMap{0: {Free: 200, Allocated: 0}, 1: {Free: 200, Allocated: 0}}, + }, + { + name: "distributeEvenlyAcrossNuma=true, numaNodeState nil", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + machineState: state.NUMANodeMap{}, + numaNodes: []int{0}, + reqQuantity: 100, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: true, + }, + want: 100, + wantErr: true, + wantMachine: state.NUMANodeMap{}, + }, + { + name: "distributeEvenlyAcrossNuma=true, insufficient free memory", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{Free: 40, Allocated: 0}, + 1: &state.NUMANodeState{Free: 200, Allocated: 0}, + }, + numaNodes: []int{0, 1}, + reqQuantity: 100, // 50 per numa + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: true, + }, + want: 100, + wantErr: true, + wantMachine: state.NUMANodeMap{0: {Free: 40, Allocated: 0}, 1: {Free: 200, Allocated: 0}}, + }, + { + name: "distributeEvenlyAcrossNuma=true, success", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{Free: 200, Allocated: 0}, + 1: &state.NUMANodeState{Free: 200, Allocated: 0}, + }, + numaNodes: []int{0, 1}, + reqQuantity: 100, // 50 per numa + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: true, + }, + want: 0, + wantErr: false, + wantMachine: state.NUMANodeMap{ + 0: { + Free: 150, + Allocated: 50, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 50, + NumaAllocationResult: machine.NewCPUSet(0), + TopologyAwareAllocations: map[int]uint64{ + 0: 50, + }, + }, + }, + }, + }, + 1: { + Free: 150, + Allocated: 50, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 50, + NumaAllocationResult: machine.NewCPUSet(1), + TopologyAwareAllocations: map[int]uint64{ + 1: 50, + }, + }, + }, + }, + }, + }, + }, + { + name: "distributeEvenlyAcrossNuma=false, numaNodeState nil", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + }, + machineState: state.NUMANodeMap{}, + numaNodes: []int{0}, + reqQuantity: 100, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: false, + }, + want: 100, + wantErr: true, + wantMachine: state.NUMANodeMap{}, + }, + { + name: "distributeEvenlyAcrossNuma=false, reqQuantity fully satisfied by first NUMA", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{Free: 200, Allocated: 0}, + 1: &state.NUMANodeState{Free: 200, Allocated: 0}, + }, + numaNodes: []int{0, 1}, + reqQuantity: 100, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: false, + }, + want: 0, + wantErr: false, + wantMachine: state.NUMANodeMap{ + 0: { + Free: 100, + Allocated: 100, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 100, + NumaAllocationResult: machine.NewCPUSet(0), + TopologyAwareAllocations: map[int]uint64{ + 0: 100, + }, + }, + }, + }, + }, + 1: {Free: 200, Allocated: 0}, + }, + }, + { + name: "distributeEvenlyAcrossNuma=false, reqQuantity partially satisfied", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{Free: 100, Allocated: 0}, + 1: &state.NUMANodeState{Free: 150, Allocated: 0}, + }, + numaNodes: []int{0, 1}, + reqQuantity: 300, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: false, + }, + want: 50, // 300 - 100 - 150 = 50 + wantErr: false, + wantMachine: state.NUMANodeMap{ + 0: { + Free: 0, + Allocated: 100, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 100, + NumaAllocationResult: machine.NewCPUSet(0), + TopologyAwareAllocations: map[int]uint64{ + 0: 100, + }, + }, + }, + }, + }, + 1: { + Free: 0, + Allocated: 150, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 150, + NumaAllocationResult: machine.NewCPUSet(1), + TopologyAwareAllocations: map[int]uint64{ + 1: 150, + }, + }, + }, + }, + }, + }, + }, + { + name: "distributeEvenlyAcrossNuma=false, reqQuantity exactly satisfied", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, + machineState: state.NUMANodeMap{ + 0: &state.NUMANodeState{Free: 100, Allocated: 0}, + 1: &state.NUMANodeState{Free: 150, Allocated: 0}, + }, + numaNodes: []int{0, 1}, + reqQuantity: 250, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: false, + }, + want: 0, + wantErr: false, + wantMachine: state.NUMANodeMap{ + 0: { + Free: 0, + Allocated: 100, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 100, + NumaAllocationResult: machine.NewCPUSet(0), + TopologyAwareAllocations: map[int]uint64{ + 0: 100, + }, + }, + }, + }, + }, + 1: { + Free: 0, + Allocated: 150, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-1", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 150, + NumaAllocationResult: machine.NewCPUSet(1), + TopologyAwareAllocations: map[int]uint64{ + 1: 150, + }, + }, + }, + }, + }, + }, + }, + { + name: "distributeEvenlyAcrossNuma=false, existing pod entries", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-2", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, + machineState: state.NUMANodeMap{ + 0: { + Free: 100, + Allocated: 100, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AggregatedQuantity: 100, + }, + }, + }, + }, + }, + numaNodes: []int{0}, + reqQuantity: 50, + qosLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + distributeEvenlyAcrossNuma: false, + }, + want: 0, + wantErr: false, + wantMachine: state.NUMANodeMap{ + 0: { + Free: 50, + Allocated: 150, + PodEntries: state.PodEntries{ + "pod-1": state.ContainerEntries{ + "container-1": &state.AllocationInfo{ + AggregatedQuantity: 100, + }, + "container-2": &state.AllocationInfo{ + AllocationMeta: state.GenerateMemoryContainerAllocationMeta(&pluginapi.ResourceRequest{ + PodUid: "pod-1", + ContainerName: "container-2", + PodNamespace: "default", + PodName: "test-pod", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + }, apiconsts.PodAnnotationQoSLevelSharedCores), + AggregatedQuantity: 50, + NumaAllocationResult: machine.NewCPUSet(0), + TopologyAwareAllocations: map[int]uint64{ + 0: 50, + }, + }, + }, + }, + }, + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, err := calculateMemoryInNumaNodes(tt.args.req, tt.args.machineState, tt.args.numaNodes, tt.args.reqQuantity, tt.args.qosLevel, tt.args.distributeEvenlyAcrossNuma) + if (err != nil) != tt.wantErr { + t.Errorf("calculateMemoryInNumaNodes() error = %v, wantErr %v", err, tt.wantErr) + return + } + assert.Equal(t, tt.want, got) + + // Deep compare machineState, ignoring NumaAllocationResult and TopologyAwareAllocations in AllocationInfo + // because machine.NewCPUSet(numaNode) creates a new object each time, which will fail deep equality. + // Instead, we compare the string representation of NumaAllocationResult and the content of TopologyAwareAllocations. + if !tt.wantErr { + for numaID, wantNumaState := range tt.wantMachine { + gotNumaState := tt.args.machineState[numaID] + assert.NotNil(t, gotNumaState, fmt.Sprintf("NUMA %d state is nil in actual machine state", numaID)) + assert.Equal(t, wantNumaState.Free, gotNumaState.Free, fmt.Sprintf("NUMA %d Free mismatch", numaID)) + assert.Equal(t, wantNumaState.Allocated, gotNumaState.Allocated, fmt.Sprintf("NUMA %d Allocated mismatch", numaID)) + + for podUID, wantContainerEntries := range wantNumaState.PodEntries { + gotContainerEntries, ok := gotNumaState.PodEntries[podUID] + assert.True(t, ok, fmt.Sprintf("Pod %s not found in NUMA %d", podUID, numaID)) + + for containerName, wantAllocInfo := range wantContainerEntries { + gotAllocInfo, ok := gotContainerEntries[containerName] + assert.True(t, ok, fmt.Sprintf("Container %s not found for Pod %s in NUMA %d", containerName, podUID, numaID)) + + assert.Equal(t, wantAllocInfo.AggregatedQuantity, gotAllocInfo.AggregatedQuantity, fmt.Sprintf("AggregatedQuantity mismatch for %s/%s in NUMA %d", podUID, containerName, numaID)) + assert.Equal(t, wantAllocInfo.NumaAllocationResult.String(), gotAllocInfo.NumaAllocationResult.String(), fmt.Sprintf("NumaAllocationResult mismatch for %s/%s in NUMA %d", podUID, containerName, numaID)) + assert.Equal(t, wantAllocInfo.TopologyAwareAllocations, gotAllocInfo.TopologyAwareAllocations, fmt.Sprintf("TopologyAwareAllocations mismatch for %s/%s in NUMA %d", podUID, containerName, numaID)) + assert.Equal(t, wantAllocInfo.AllocationMeta, gotAllocInfo.AllocationMeta, fmt.Sprintf("AllocationMeta mismatch for %s/%s in NUMA %d", podUID, containerName, numaID)) + } + } + } + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go index dff75d8617..c2693a0ce1 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_async_handler.go @@ -161,7 +161,8 @@ func (p *DynamicPolicy) setExtraControlKnobByConfigs(_ *coreconfig.Configuration } var resourcesMachineState state.NUMANodeResourcesMap - resourcesMachineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) return @@ -492,7 +493,8 @@ func (p *DynamicPolicy) clearResidualState(_ *coreconfig.Configuration, } } - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) return @@ -817,7 +819,8 @@ func (p *DynamicPolicy) syncOOMPriority(conf *coreconfig.Configuration, } var resourcesMachineState state.NUMANodeResourcesMap - resourcesMachineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err = state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) return diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go index 15b7cc2fc5..23d76ed1ad 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go @@ -24,6 +24,7 @@ import ( v1 "k8s.io/api/core/v1" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + "k8s.io/kubernetes/pkg/apis/core/v1/helper" apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" @@ -138,22 +139,35 @@ func (p *DynamicPolicy) numaBindingHintHandler(_ context.Context, }) } - podAggregatedRequest, _, err := util.GetPodAggregatedRequestResource(req) + requestedResources, _, err := util.GetPodAggregatedRequestResourceMap(req) if err != nil { - return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) + return nil, fmt.Errorf("get pod aggregated request map failed with error %v", err) } resourcesMachineState := p.state.GetMachineState() var hints map[string]*pluginapi.ListOfTopologyHints - allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) - if allocationInfo != nil { - if allocationInfo.NumaAllocationResult.Size() != 1 { - general.Errorf("pod: %s/%s, container: %s is share cores with numa binding, but its numa set length is %d", - req.PodNamespace, req.PodName, req.ContainerName, allocationInfo.NumaAllocationResult.Size()) - return nil, fmt.Errorf("invalid numa set size") + resourceAllocationInfo := p.state.GetResourceAllocationInfo(req.PodUid, req.ContainerName) + if len(resourceAllocationInfo) != 0 { + // Check if there is scale up for hugepages + for resName, allocationInfo := range resourceAllocationInfo { + if allocationInfo == nil { + continue + } + + if _, ok := requestedResources[resName]; !ok || !helper.IsHugePageResourceName(resName) { + continue + } + + // Scale up for hugepages is not supported + if allocationInfo.AggregatedQuantity < uint64(requestedResources[resName]) { + general.Errorf("memory's already allocated with smaller quantity(%d) than requested(%d) for pod(%s_%s(%s))", + allocationInfo.AggregatedQuantity, requestedResources[resName], allocationInfo.PodName, allocationInfo.PodNamespace, allocationInfo.PodUid) + return nil, fmt.Errorf("resource %v already allocated with smaller quantity(%v < %v)", req.ResourceName, allocationInfo.AggregatedQuantity, + requestedResources[resName]) + } } - hints = regenerateHints(allocationInfo, util.PodInplaceUpdateResizing(req)) + hints = regenerateHints(resourceAllocationInfo, util.PodInplaceUpdateResizing(req), req, requestedResources) // clear the current container and regenerate machine state in follow cases: // 1. regenerateHints failed. @@ -168,38 +182,22 @@ func (p *DynamicPolicy) numaBindingHintHandler(_ context.Context, return nil, fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) } } - - if allocationInfo.NumaAllocationResult.Size() != 1 { - general.Errorf("pod: %s/%s, container: %s is snb, but its numa size is %d", - req.PodNamespace, req.PodName, req.ContainerName, allocationInfo.NumaAllocationResult.Size()) - return nil, fmt.Errorf("invalid hints for inplace update pod") + } else { + // if hints exists in extra state-file, prefer to use them + totalAvailableNUMAs := p.topology.CPUDetails.NUMANodes() + for resource := range requestedResources { + availableNUMA := resourcesMachineState[resource].GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() + totalAvailableNUMAs = totalAvailableNUMAs.Intersection(availableNUMA) } - machineMemoryState := resourcesMachineState[v1.ResourceMemory] - nodeID := allocationInfo.NumaAllocationResult.ToSliceInt()[0] - nodeMemoryState := machineMemoryState[nodeID] - - // the main container aggregated quantity involve all container requests of the pod in memory admit. - originPodAggregatedRequest := allocationInfo.AggregatedQuantity - general.Infof("pod: %s/%s, main container: %s request to memory inplace update resize (%d->%d)", - req.PodNamespace, req.PodName, req.ContainerName, originPodAggregatedRequest, podAggregatedRequest) - - if uint64(podAggregatedRequest) > nodeMemoryState.Free && uint64(podAggregatedRequest) > originPodAggregatedRequest { // scaling up and no left resource to scale out - general.Infof("pod: %s/%s, container: %s request to memory inplace update resize (%d->%d, diff: %d), but no enough memory(%d)", - req.PodNamespace, req.PodName, req.ContainerName, originPodAggregatedRequest, podAggregatedRequest, uint64(podAggregatedRequest)-originPodAggregatedRequest, nodeMemoryState.Free) - return nil, fmt.Errorf("memory inplace update resize scale out failed with no enough resource") + requestedResourcesList := make([]v1.ResourceName, 0, len(requestedResources)) + for resName := range requestedResources { + requestedResourcesList = append(requestedResourcesList, resName) } - general.Infof("pod: %s/%s, container: %s request inplace update resize, there is enough resource for it in current NUMA", - req.PodNamespace, req.PodName, req.ContainerName) - hints = regenerateHints(allocationInfo, false) - } else { - // if hints exists in extra state-file, prefer to use them - availableNUMAs := resourcesMachineState[v1.ResourceMemory].GetNUMANodesWithoutSharedOrDedicatedNUMABindingPods() - var extraErr error - hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceMemory), - p.extraStateFileAbsPath, availableNUMAs) + hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, p.extraStateFileAbsPath, + totalAvailableNUMAs, requestedResourcesList) if extraErr != nil { general.Infof("pod: %s/%s, container: %s GetHintsFromExtraStateFile failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, extraErr) @@ -210,7 +208,7 @@ func (p *DynamicPolicy) numaBindingHintHandler(_ context.Context, // otherwise, calculate hint for container without allocated memory var calculateErr error // calculate hint for container without allocated memory - hints, calculateErr = p.calculateHints(uint64(podAggregatedRequest), resourcesMachineState, req) + hints, calculateErr = p.calculateHints(resourcesMachineState, req, requestedResources) if calculateErr != nil { general.Errorf("failed to calculate hints for pod: %s/%s, container: %s, error: %v", req.PodNamespace, req.PodName, req.ContainerName, calculateErr) @@ -249,7 +247,7 @@ func (p *DynamicPolicy) reclaimedCoresWithNUMABindingHintHandler(_ context.Conte allocationInfo := p.state.GetAllocationInfo(v1.ResourceMemory, req.PodUid, req.ContainerName) if allocationInfo != nil { - hints = regenerateHints(allocationInfo, util.PodInplaceUpdateResizing(req)) + hints = regenerateSingleResourceHints(allocationInfo, util.PodInplaceUpdateResizing(req), v1.ResourceMemory) if hints == nil { if uint64(podAggregatedRequest) > allocationInfo.AggregatedQuantity { resourcesMachineState, err = p.clearContainerAndRegenerateMachineState(req) @@ -278,7 +276,7 @@ func (p *DynamicPolicy) reclaimedCoresWithNUMABindingHintHandler(_ context.Conte general.Infof("pod: %s/%s, container: %s request memory inplace update resize, there is enough resource for it in current NUMA", req.PodNamespace, req.PodName, req.ContainerName) - hints = regenerateHints(allocationInfo, false) + hints = regenerateSingleResourceHints(allocationInfo, false, v1.ResourceMemory) } } @@ -312,7 +310,8 @@ func (p *DynamicPolicy) clearContainerAndRegenerateMachineState(req *pluginapi.R } var err error - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), podResourceEntries, p.state.GetMachineState(), p.state.GetReservedMemory()) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(p.state.GetMachineInfo(), p.state.GetMemoryTopology(), podResourceEntries, + p.state.GetMachineState(), p.state.GetReservedMemory(), p.extraResourceNames) if err != nil { general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", req.PodNamespace, req.PodName, req.ContainerName, err) @@ -330,18 +329,19 @@ func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingHintHandler(_ context.Co // calculateHints is a helper function to calculate the topology hints // with the given container requests. -func (p *DynamicPolicy) calculateHints(reqInt uint64, +func (p *DynamicPolicy) calculateHints( resourcesMachineState state.NUMANodeResourcesMap, req *pluginapi.ResourceRequest, + requestedResources map[v1.ResourceName]int, ) (map[string]*pluginapi.ListOfTopologyHints, error) { - machineState := resourcesMachineState[v1.ResourceMemory] + mainMachineState := resourcesMachineState[v1.ResourceMemory] - if len(machineState) == 0 { - return nil, fmt.Errorf("calculateHints with empty machineState") + if len(mainMachineState) == 0 { + return nil, fmt.Errorf("calculateHints with empty memory machine state") } - numaNodes := make([]int, 0, len(machineState)) - for numaNode := range machineState { + numaNodes := make([]int, 0, len(mainMachineState)) + for numaNode := range mainMachineState { numaNodes = append(numaNodes, numaNode) } @@ -354,50 +354,85 @@ func (p *DynamicPolicy) calculateHints(reqInt uint64, sort.Ints(numaNodes) } - bytesPerNUMA, err := machineState.BytesPerNUMA() - if err != nil { - return nil, fmt.Errorf("getBytesPerNUMAFromMachineState failed with error: %v", err) - } - - minNUMAsCountNeeded, _, err := util.GetNUMANodesCountToFitMemoryReq(reqInt, bytesPerNUMA, len(machineState)) - if err != nil { - return nil, fmt.Errorf("GetNUMANodesCountToFitMemoryReq failed with error: %v", err) - } reqAnnotations := req.Annotations + sharedCores := qosutil.AnnotationsIndicateSharedCores(reqAnnotations) numaBinding := qosutil.AnnotationsIndicateNUMABinding(reqAnnotations) numaExclusive := qosutil.AnnotationsIndicateNUMAExclusive(reqAnnotations) - - // because it's hard to control memory allocation accurately, - // we only support numa_binding but not exclusive container with request smaller than 1 NUMA - if numaBinding && !numaExclusive && minNUMAsCountNeeded > 1 { - return nil, fmt.Errorf("NUMA not exclusive binding container has request larger than 1 NUMA") - } + distributeEvenlyAcrossNuma := qosutil.AnnotationsIndicateDistributeEvenlyAcrossNuma(reqAnnotations) numaPerSocket, err := p.topology.NUMAsPerSocket() if err != nil { return nil, fmt.Errorf("NUMAsPerSocket failed with error: %v", err) } - numaToFreeMemoryBytes := make(map[int]uint64, len(numaNodes)) + // minAffinitySize is the smallest number of NUMA nodes needed for a hint. + // Initialize minAffinitySize to include all NUMA cells. + minAffinitySize := len(numaNodes) - for _, nodeID := range numaNodes { - if machineState[nodeID] == nil { - general.Warningf("NUMA: %d has nil state", nodeID) - numaToFreeMemoryBytes[nodeID] = 0 - continue + // minNUMAsCountNeeded is the minimum number of NUMA nodes needed to fulfill the resource requests given the requested quantity + // and the number of bytes in one NUMA node. + // Initialize minNUMAsCountNeeded to include all NUMA cells. + minNUMAsCountNeeded := len(numaNodes) + + freeMemoryByResourceAndNUMA := make(map[v1.ResourceName]map[int]uint64, len(requestedResources)) + + for resourceName, requestedSize := range requestedResources { + // calculate all the free memory for each resource and each numa node + machineState := resourcesMachineState[resourceName] + if len(machineState) == 0 { + return nil, fmt.Errorf("calculateHints with empty mainMachineState for resource %s", resourceName) } - if numaExclusive && machineState[nodeID].Allocated > 0 { - numaToFreeMemoryBytes[nodeID] = 0 - general.Warningf("numa_exclusive container skip NUMA: %d allocated: %d", - nodeID, machineState[nodeID].Allocated) - } else { - numaToFreeMemoryBytes[nodeID] = machineState[nodeID].Free + bytesPerNUMA, err := machineState.BytesPerNUMA() + if err != nil { + return nil, fmt.Errorf("BytesPerNUMA from machine state failed with error: %v", err) + } + + currMinNUMAsCount, _, err := util.GetNUMANodesCountToFitMemoryReq(uint64(requestedSize), bytesPerNUMA, len(machineState)) + if err != nil { + return nil, fmt.Errorf("GetNUMANodesCountToFitMemoryReq failed with error: %v", err) + } + + if currMinNUMAsCount < minNUMAsCountNeeded { + minNUMAsCountNeeded = currMinNUMAsCount + } + + // because it's hard to control memory allocation accurately, + // we only support numa_binding but not exclusive container with request smaller than 1 NUMA + // pods with distribute evenly across numa annotation can occupy more than 1 NUMA + if numaBinding && !numaExclusive && !distributeEvenlyAcrossNuma && minNUMAsCountNeeded > 1 { + return nil, fmt.Errorf("NUMA not exclusive binding container with no distribute_evenly_across_numa" + + " has request larger than 1 NUMA") + } + + if numaExclusive && distributeEvenlyAcrossNuma { + return nil, fmt.Errorf("NUMA exclusive and distribute_evenly_across_numa is not supported at the same time") + } + + if sharedCores && numaBinding && distributeEvenlyAcrossNuma { + return nil, fmt.Errorf("shared cores with numa binding and distribute_evenly_across_numa is not supported at the same time") + } + + freeMemoryByResourceAndNUMA[resourceName] = make(map[int]uint64, len(numaNodes)) + for _, nodeID := range numaNodes { + if machineState[nodeID] == nil { + general.Warningf("NUMA: %d has nil state for resource %s", nodeID, resourceName) + freeMemoryByResourceAndNUMA[resourceName][nodeID] = 0 + continue + } + + if numaExclusive && machineState[nodeID].Allocated > 0 { + freeMemoryByResourceAndNUMA[resourceName][nodeID] = 0 + general.Warningf("numa_exclusive container skip NUMA: %d allocated: %d for resource: %s", + nodeID, machineState[nodeID].Allocated, resourceName) + } else { + freeMemoryByResourceAndNUMA[resourceName][nodeID] = machineState[nodeID].Free + } } } - general.Infof("calculate hints with req: %d, numaToFreeMemoryBytes: %+v", - reqInt, numaToFreeMemoryBytes) + general.Infof("calculate hints with requested resources: %+v, freeMemoryByResourceAndNUMA: %+v", + requestedResources, freeMemoryByResourceAndNUMA) numaBound := len(numaNodes) if numaBound > machine.LargeNUMAsPoint { @@ -405,64 +440,145 @@ func (p *DynamicPolicy) calculateHints(reqInt uint64, numaBound = minNUMAsCountNeeded + 1 } - var availableNumaHints []*pluginapi.TopologyHint + availableNumaHints := make(map[string]*pluginapi.ListOfTopologyHints) machine.IterateBitMasks(numaNodes, numaBound, func(mask machine.BitMask) { maskCount := mask.Count() if maskCount < minNUMAsCountNeeded { return - } else if numaBinding && !numaExclusive && maskCount > 1 { + } else if numaBinding && !numaExclusive && !distributeEvenlyAcrossNuma && maskCount > 1 { // because it's hard to control memory allocation accurately, // we only support numa_binding but not exclusive container with request smaller than 1 NUMA + // pods with distribute evenly across numa annotation can occupy more than 1 NUMA return } maskBits := mask.GetBits() numaCountNeeded := mask.Count() - var freeBytesInMask uint64 = 0 + totalFreeSize := map[v1.ResourceName]uint64{} + totalAllocatableSize := map[v1.ResourceName]uint64{} + for _, nodeID := range maskBits { - freeBytesInMask += numaToFreeMemoryBytes[nodeID] + for resourceName := range requestedResources { + machineState := resourcesMachineState[resourceName] + + if _, ok := totalFreeSize[resourceName]; !ok { + totalFreeSize[resourceName] = 0 + } + totalFreeSize[resourceName] += machineState[nodeID].Free + + if _, ok := totalAllocatableSize[resourceName]; !ok { + totalAllocatableSize[resourceName] = 0 + } + totalAllocatableSize[resourceName] += machineState[nodeID].Allocatable + } } - if freeBytesInMask < reqInt { - return + for resourceName, requestedSize := range requestedResources { + // verify that for all memory types the node mask has enough allocatable resources + if totalAllocatableSize[resourceName] < uint64(requestedSize) { + return + } } - crossSockets, err := machine.CheckNUMACrossSockets(maskBits, p.topology) - if err != nil { - return - } else if numaCountNeeded <= numaPerSocket && crossSockets { - return + // set the minimum amount of NUMA nodes that can satisfy the container resources requests + if mask.Count() < minAffinitySize { + minAffinitySize = mask.Count() } - availableNumaHints = append(availableNumaHints, &pluginapi.TopologyHint{ - Nodes: machine.MaskToUInt64Array(mask), - Preferred: len(maskBits) == minNUMAsCountNeeded, - }) + // Start generating hints for each memory resource type + for resourceName, requestedSize := range requestedResources { + var freeBytesInMask uint64 = 0 + for _, nodeID := range maskBits { + freeBytesInMask += freeMemoryByResourceAndNUMA[resourceName][nodeID] + } + + if freeBytesInMask < uint64(requestedSize) { + return + } + + crossSockets, err := machine.CheckNUMACrossSockets(maskBits, p.topology) + if err != nil { + return + } else if numaCountNeeded <= numaPerSocket && crossSockets { + return + } + + // check if the mask can be evenly allocated + if distributeEvenlyAcrossNuma && maskCount > 1 { + if requestedSize%maskCount != 0 { + return + } + + requestedSizePerNode := requestedSize / maskCount + machineState := resourcesMachineState[resourceName] + + // Check if each node has enough free memory + for _, nodeID := range maskBits { + if machineState[nodeID] == nil { + return + } + + if machineState[nodeID].Free < uint64(requestedSizePerNode) { + return + } + } + } + + if _, ok := availableNumaHints[string(resourceName)]; !ok { + availableNumaHints[string(resourceName)] = &pluginapi.ListOfTopologyHints{ + Hints: make([]*pluginapi.TopologyHint, 0), + } + } + + // Append to the slice and assign the result back to the map + availableNumaHints[string(resourceName)].Hints = append(availableNumaHints[string(resourceName)].Hints, &pluginapi.TopologyHint{ + Nodes: machine.MaskToUInt64Array(mask), + Preferred: false, + }) + } }) - // todo support numa_binding without numa_exclusive in the future - if numaBinding && numaExclusive { - err = p.preferAvailableNumaHintsByPreOccupation(req, machineState, availableNumaHints) - if err != nil { - return nil, fmt.Errorf("preferAvailableNumaHintsByPreOccupation failed with error: %v", err) + for resourceName := range requestedResources { + // update hints preferred according to whether the minimal amount of NUMA nodes are used. + topologyHintsList, ok := availableNumaHints[string(resourceName)] + if !ok { + general.Warningf("calculateHints got no available memory hints for resource: %s, pod: %s/%s, container: %s", + resourceName, req.PodNamespace, req.PodName, req.ContainerName) + return nil, errNoAvailableMemoryHints } - } - // NOTE: because grpc is inability to distinguish between an empty array and nil, - // we return an error instead of an empty array. - // we should resolve this issue if we need manage multi resource in one plugin. - if len(availableNumaHints) == 0 { - general.Warningf("calculateHints got no available memory hints for pod: %s/%s, container: %s", - req.PodNamespace, req.PodName, req.ContainerName) - return nil, errNoAvailableMemoryHints + hints := topologyHintsList.Hints + + // NOTE: because grpc is inability to distinguish between an empty array and nil, + // we return an error instead of an empty array. + // we should resolve this issue if we need manage multi resource in one plugin. + if len(hints) == 0 { + general.Warningf("calculateHints got no available memory hints for resource: %s, pod: %s/%s, container: %s", + resourceName, req.PodNamespace, req.PodName, req.ContainerName) + return nil, errNoAvailableMemoryHints + } + + for _, hint := range hints { + hint.Preferred = p.isHintPreferred(hint.Nodes, minAffinitySize) + } + + machineState := resourcesMachineState[resourceName] + + // todo support numa_binding without numa_exclusive in the future + if numaBinding && numaExclusive { + err = p.preferAvailableNumaHintsByPreOccupation(req, machineState, hints) + if err != nil { + return nil, fmt.Errorf("preferAvailableNumaHintsByPreOccupation failed with error: %v", err) + } + } } - return map[string]*pluginapi.ListOfTopologyHints{ - string(v1.ResourceMemory): { - Hints: availableNumaHints, - }, - }, nil + return availableNumaHints, nil +} + +func (p *DynamicPolicy) isHintPreferred(maskBits []uint64, minAffinitySize int) bool { + return len(maskBits) == minAffinitySize } // calculateHints is a helper function to calculate the topology hints @@ -653,10 +769,50 @@ func (p *DynamicPolicy) filterNUMANodesByNonBindingReclaimedRequestedQuantity(no return filteredNUMANodes } -// regenerateHints regenerates hints for container that'd already been allocated memory, +// regenerateHints regenerates hints for all resource requests for a container that'd already been allocated memory, // and regenerateHints will assemble hints based on already-existed AllocationInfo, // without any calculation logics at all -func regenerateHints(allocationInfo *state.AllocationInfo, regenerate bool) map[string]*pluginapi.ListOfTopologyHints { +func regenerateHints(allAllocationInfo map[v1.ResourceName]*state.AllocationInfo, regenerate bool, + req *pluginapi.ResourceRequest, requestedResources map[v1.ResourceName]int, +) map[string]*pluginapi.ListOfTopologyHints { + hints := map[string]*pluginapi.ListOfTopologyHints{} + + if regenerate { + general.ErrorS(nil, "need to regenerate hints", + "podNamespace", req.PodNamespace, + "podName", req.PodName, + "podUID", req.PodUid, "containerName", req.ContainerName) + return nil + } + + if len(allAllocationInfo) != len(requestedResources) { + general.Errorf("number of requested resources by the container differs from state resources, podName: %v, containerName: %v", + req.PodName, req.ContainerName) + return nil + } + + for resourceName, allocInfo := range allAllocationInfo { + if allocInfo == nil { + continue + } + + singleResourceHints := regenerateSingleResourceHints(allocInfo, false, resourceName) + if singleResourceHints == nil { + continue + } + + hints[string(resourceName)] = singleResourceHints[string(resourceName)] + } + + return hints +} + +// regenerateSingleResourceHints regenerates hints for a single resource for a container that has already been allocated memory, +// and regenerateSingleResourceHints will assemble hints based on already-existed AllocationInfo, +// without any calculation logics at all +func regenerateSingleResourceHints(allocationInfo *state.AllocationInfo, regenerate bool, + resourceName v1.ResourceName, +) map[string]*pluginapi.ListOfTopologyHints { hints := map[string]*pluginapi.ListOfTopologyHints{} if regenerate { @@ -673,8 +829,10 @@ func regenerateHints(allocationInfo *state.AllocationInfo, regenerate bool) map[ "podNamespace", allocationInfo.PodNamespace, "podName", allocationInfo.PodName, "containerName", allocationInfo.ContainerName, - "hint", allocatedNumaNodes) - hints[string(v1.ResourceMemory)] = &pluginapi.ListOfTopologyHints{ + "hint", allocatedNumaNodes, + "resourceName", resourceName) + + hints[string(resourceName)] = &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ { Nodes: allocatedNumaNodes, @@ -682,6 +840,7 @@ func regenerateHints(allocationInfo *state.AllocationInfo, regenerate bool) map[ }, }, } + return hints } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers_test.go new file mode 100644 index 0000000000..8c1f857483 --- /dev/null +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers_test.go @@ -0,0 +1,630 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package dynamicpolicy + +import ( + "context" + "os" + "testing" + + info "github.com/google/cadvisor/info/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + + "github.com/kubewharf/katalyst-api/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/util/machine" +) + +func TestDynamicPolicy_numaBindingHintHandler(t *testing.T) { + t.Parallel() + + type args struct { + req *pluginapi.ResourceRequest + } + tests := []struct { + name string + args args + wantErr bool + want *pluginapi.ResourceHintsResponse + }{ + { + name: "test for sidecar container", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_SIDECAR, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_SIDECAR, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): nil, + }, + }, + }, + { + name: "test for dedicated cores with numa binding without numa exclusive main container", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1024 * 1024 * 1024, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + { + Nodes: []uint64{1}, + Preferred: true, + }, + { + Nodes: []uint64{2}, + Preferred: true, + }, + { + Nodes: []uint64{3}, + Preferred: true, + }, + }, + }, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + { + name: "test for dedicated cores with numa binding with distribute evenly across numa", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 160 * 1024 * 1024 * 1024, // 160Gi + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + { + Nodes: []uint64{2, 3}, + Preferred: true, + }, + { + Nodes: []uint64{0, 1, 2, 3}, + Preferred: false, + }, + }, + }, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + { + name: "shared cores with numa binding and distribute evenly across numa will return error", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 160 * 1024 * 1024 * 1024, // 160Gi + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + wantErr: true, + want: nil, + }, + { + name: "test for hugepages-2Mi dedicated cores with numa binding without numa exclusive main container", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + "hugepages-2Mi": 2 * 1024 * 1024 * 1024, // 2Gi + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + "hugepages-2Mi": { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + { + Nodes: []uint64{1}, + Preferred: true, + }, + { + Nodes: []uint64{2}, + Preferred: true, + }, + { + Nodes: []uint64{3}, + Preferred: true, + }, + }, + }, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + { + name: "test for hugepages-2Mi dedicated cores with numa binding with numa exclusive main container", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + "hugepages-2Mi": 4 * 1024 * 1024 * 1024, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + }, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + "hugepages-2Mi": { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + { + Nodes: []uint64{2, 3}, + Preferred: true, + }, + { + Nodes: []uint64{0, 1, 2}, + Preferred: false, + }, + { + Nodes: []uint64{0, 1, 3}, + Preferred: false, + }, + { + Nodes: []uint64{0, 2, 3}, + Preferred: false, + }, + { + Nodes: []uint64{1, 2, 3}, + Preferred: false, + }, + { + Nodes: []uint64{0, 1, 2, 3}, + Preferred: false, + }, + }, + }, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + }, + }, + }, + { + name: "test for hugepages-2Mi dedicated cores without numa exclusive with distribute evenly across numa", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + "hugepages-2Mi": 4 * 1024 * 1024 * 1024, // 6Gi can be split into 2 or 4 numa nodes + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + "hugepages-2Mi": { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + { + Nodes: []uint64{2, 3}, + Preferred: true, + }, + { + Nodes: []uint64{0, 1, 2, 3}, + Preferred: false, + }, + }, + }, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + { + name: "not enough memory for hugepages-2Mi returns error", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + "hugepages-2Mi": 1000 * 1024 * 1024 * 1024, // 1000Gi + }, + }, + }, + wantErr: true, + }, + { + name: "test for hugepages-1Gi dedicated cores without numa exclusive with distribute evenly across numa", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + "hugepages-1Gi": 16 * 1024 * 1024 * 1024, // 16Gi can fit onto 2 or 4 NUMA nodes + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + "hugepages-1Gi": { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + { + Nodes: []uint64{2, 3}, + Preferred: true, + }, + { + Nodes: []uint64{0, 1, 2, 3}, + Preferred: false, + }, + }, + }, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + { + name: "distribute evenly across numa and numa exclusive not supported", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1024 * 1024 * 1024, + "hugepages-2Mi": 2 * 1024 * 1024 * 1024, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + }, + }, + wantErr: true, + }, + { + name: "get topology hints for both memory and hugepages-2Mi", + args: args{ + req: &pluginapi.ResourceRequest{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1024 * 1024 * 1024, + "hugepages-2Mi": 2 * 1024 * 1024 * 1024, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + wantErr: false, + want: &pluginapi.ResourceHintsResponse{ + PodUid: "pod1_uid", + PodName: "pod1", + ContainerName: "container1", + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + { + Nodes: []uint64{1}, + Preferred: true, + }, + { + Nodes: []uint64{2}, + Preferred: true, + }, + { + Nodes: []uint64{3}, + Preferred: true, + }, + }, + }, + "hugepages-2Mi": { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + { + Nodes: []uint64{1}, + Preferred: true, + }, + { + Nodes: []uint64{2}, + Preferred: true, + }, + { + Nodes: []uint64{3}, + Preferred: true, + }, + }, + }, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + tmpDir, err := os.MkdirTemp("", "checkpoint-TestNumaBindingHintHandler") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) + assert.NoError(t, err) + machineInfo := &info.MachineInfo{ + Topology: []info.Node{ + { + Id: 0, + Memory: 100 * 1024 * 1024 * 1024, // 100 GB + HugePages: []info.HugePagesInfo{ + { + PageSize: 2 * 1024, // 2Mi + NumPages: 1024, + }, + { + PageSize: 1 * 1024 * 1024, // 1Gi + NumPages: 8, + }, + }, + }, + { + Id: 1, + Memory: 100 * 1024 * 1024 * 1024, + HugePages: []info.HugePagesInfo{ + { + PageSize: 2 * 1024, // 2Mi + NumPages: 1024, + }, + { + PageSize: 1 * 1024 * 1024, // 1Gi + NumPages: 8, + }, + }, + }, + { + Id: 2, + Memory: 100 * 1024 * 1024 * 1024, + HugePages: []info.HugePagesInfo{ + { + PageSize: 2 * 1024, // 2Mi + NumPages: 1024, + }, + { + PageSize: 1 * 1024 * 1024, // 1Gi + NumPages: 8, + }, + }, + }, + { + Id: 3, + Memory: 100 * 1024 * 1024 * 1024, + HugePages: []info.HugePagesInfo{ + { + PageSize: 2 * 1024, // 2Mi + NumPages: 1024, + }, + { + PageSize: 1 * 1024 * 1024, // 1Gi + NumPages: 8, + }, + }, + }, + }, + } + + policy, err := getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) + assert.NoError(t, err) + + got, err := policy.numaBindingHintHandler(context.Background(), tt.args.req) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go index a0cd7c136a..0cdfb1aa15 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_test.go @@ -111,16 +111,104 @@ var fakeConf = &config.Configuration{ }, } +var fakeConfWithExtraResources = &config.Configuration{ + AgentConfiguration: &configagent.AgentConfiguration{ + GenericAgentConfiguration: &configagent.GenericAgentConfiguration{ + GenericQRMPluginConfiguration: &qrmconfig.GenericQRMPluginConfiguration{ + UseKubeletReservedConfig: false, + }, + }, + StaticAgentConfiguration: &configagent.StaticAgentConfiguration{ + QRMPluginsConfiguration: &qrmconfig.QRMPluginsConfiguration{ + MemoryQRMPluginConfig: &qrmconfig.MemoryQRMPluginConfig{ + ReservedMemoryGB: 4, + ExtraMemoryResources: []string{"hugepages-2Mi", "hugepages-1Gi"}, + }, + }, + }, + }, +} + func getTestDynamicPolicyWithInitialization( topology *machine.CPUTopology, machineInfo *info.MachineInfo, stateFileDirectory string, ) (*DynamicPolicy, error) { - reservedMemory, err := getReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo) + resourcesReservedMemory, err := getResourcesReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo, + []string{string(v1.ResourceMemory)}) if err != nil { return nil, err } - resourcesReservedMemory := map[v1.ResourceName]map[int]uint64{ - v1.ResourceMemory: reservedMemory, + qosConfig := generic.NewQoSConfiguration() + qosConfig.SetExpandQoSLevelSelector(consts.PodAnnotationQoSLevelSharedCores, map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }) + qosConfig.SetExpandQoSLevelSelector(consts.PodAnnotationQoSLevelDedicatedCores, map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }) + qosConfig.SetExpandQoSLevelSelector(consts.PodAnnotationQoSLevelReclaimedCores, map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + }) + + stateDirectoryConfig := &statedirectory.StateDirectoryConfiguration{ + StateFileDirectory: stateFileDirectory, + } + stateImpl, err := state.NewCheckpointState(stateDirectoryConfig, memoryPluginStateFileName, + memconsts.MemoryResourcePluginPolicyNameDynamic, topology, machineInfo, nil, resourcesReservedMemory, false, + metrics.DummyMetrics{}, fakeConf.ExtraMemoryResources) + if err != nil { + return nil, fmt.Errorf("NewCheckpointState failed with error: %v", err) + } + + policyImplement := &DynamicPolicy{ + topology: topology, + dynamicConf: dynamic.NewDynamicAgentConfiguration(), + featureGateManager: featuregatenegotiation.NewFeatureGateManager(config.NewConfiguration()), + qosConfig: qosConfig, + state: stateImpl, + emitter: metrics.DummyMetrics{}, + migratingMemory: make(map[string]map[string]bool), + stopCh: make(chan struct{}), + podDebugAnnoKeys: []string{podDebugAnnoKey}, + enableReclaimNUMABinding: true, + enableNonBindingShareCoresMemoryResourceCheck: true, + topologyAllocationAnnotationKey: coreconsts.QRMPodAnnotationTopologyAllocationKey, + numaBindResultResourceAllocationAnnotationKey: coreconsts.QRMResourceAnnotationKeyNUMABindResult, + extraResourceNames: fakeConf.ExtraMemoryResources, + } + + policyImplement.allocationHandlers = map[string]util.AllocationHandler{ + consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresAllocationHandler, + consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresAllocationHandler, + consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresAllocationHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresAllocationHandler, + } + + policyImplement.hintHandlers = map[string]util.HintHandler{ + consts.PodAnnotationQoSLevelSharedCores: policyImplement.sharedCoresHintHandler, + consts.PodAnnotationQoSLevelDedicatedCores: policyImplement.dedicatedCoresHintHandler, + consts.PodAnnotationQoSLevelReclaimedCores: policyImplement.reclaimedCoresHintHandler, + consts.PodAnnotationQoSLevelSystemCores: policyImplement.systemCoresHintHandler, + } + + policyImplement.asyncWorkers = asyncworker.NewAsyncWorkers(memoryPluginAsyncWorkersName, policyImplement.emitter) + + policyImplement.defaultAsyncLimitedWorkers = asyncworker.NewAsyncLimitedWorkers(memoryPluginAsyncWorkersName, defaultAsyncWorkLimit, policyImplement.emitter) + policyImplement.asyncLimitedWorkersMap = map[string]*asyncworker.AsyncLimitedWorkers{ + memoryPluginAsyncWorkTopicMovePage: asyncworker.NewAsyncLimitedWorkers(memoryPluginAsyncWorkTopicMovePage, movePagesWorkLimit, policyImplement.emitter), + } + + policyImplement.numaAllocationReactor = reactor.DummyAllocationReactor{} + + return policyImplement, nil +} + +func getTestDynamicPolicyWithExtraResourcesWithInitialization( + topology *machine.CPUTopology, machineInfo *info.MachineInfo, stateFileDirectory string, +) (*DynamicPolicy, error) { + resourcesReservedMemory, err := getResourcesReservedMemory(fakeConfWithExtraResources, &metaserver.MetaServer{}, machineInfo, + []string{string(v1.ResourceMemory)}) + if err != nil { + return nil, err } qosConfig := generic.NewQoSConfiguration() @@ -138,7 +226,8 @@ func getTestDynamicPolicyWithInitialization( StateFileDirectory: stateFileDirectory, } stateImpl, err := state.NewCheckpointState(stateDirectoryConfig, memoryPluginStateFileName, - memconsts.MemoryResourcePluginPolicyNameDynamic, topology, machineInfo, resourcesReservedMemory, false, metrics.DummyMetrics{}) + memconsts.MemoryResourcePluginPolicyNameDynamic, topology, machineInfo, nil, resourcesReservedMemory, false, + metrics.DummyMetrics{}, fakeConfWithExtraResources.ExtraMemoryResources) if err != nil { return nil, fmt.Errorf("NewCheckpointState failed with error: %v", err) } @@ -155,7 +244,9 @@ func getTestDynamicPolicyWithInitialization( podDebugAnnoKeys: []string{podDebugAnnoKey}, enableReclaimNUMABinding: true, enableNonBindingShareCoresMemoryResourceCheck: true, + topologyAllocationAnnotationKey: coreconsts.QRMPodAnnotationTopologyAllocationKey, numaBindResultResourceAllocationAnnotationKey: coreconsts.QRMResourceAnnotationKeyNUMABindResult, + extraResourceNames: fakeConfWithExtraResources.ExtraMemoryResources, } policyImplement.allocationHandlers = map[string]util.AllocationHandler{ @@ -471,6 +562,7 @@ func TestAllocate(t *testing.T) { req *pluginapi.ResourceRequest expectedResp *pluginapi.ResourceAllocationResponse enhancementDefaultValues map[string]string + expectedAllocationInfos map[v1.ResourceName]*state.AllocationInfo }{ { name: "req for init container", @@ -682,6 +774,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"memory":"7Gi"}}}}`, + }, }, }, }, @@ -735,6 +830,9 @@ func TestAllocate(t *testing.T) { IsScalarResource: true, AllocatedQuantity: 2147483648, AllocationResult: machine.NewCPUSet(0).String(), + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"memory":"2Gi"}}}}`, + }, ResourceHints: &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ { @@ -804,6 +902,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"memory":"7Gi"}}}}`, + }, }, }, }, @@ -868,6 +969,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"memory":"2Gi"}}}}`, + }, }, }, }, @@ -928,6 +1032,9 @@ func TestAllocate(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"memory":"2Gi"}}}}`, + }, }, }, }, @@ -940,6 +1047,66 @@ func TestAllocate(t *testing.T) { }, }, }, + { + name: "req for shared cores main container for memory and hugepages", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + "hugepages-2Mi": 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + expectedResp: &pluginapi.ResourceAllocationResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + AllocationResult: &pluginapi.ResourceAllocation{ + ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ + string(v1.ResourceMemory): { + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{nil}, + }, + }, + "hugepages-2Mi": { + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{nil}, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + }, { name: "req for reclaim_cores with actual numa_binding main container", req: &pluginapi.ResourceRequest{ @@ -981,6 +1148,7 @@ func TestAllocate(t *testing.T) { AllocatedQuantity: 2147483648, Annotations: map[string]string{ coreconsts.QRMResourceAnnotationKeyNUMABindResult: "0", + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{}}}`, }, AllocationResult: machine.NewCPUSet(0).String(), ResourceHints: &pluginapi.ListOfTopologyHints{ @@ -1055,116 +1223,375 @@ func TestAllocate(t *testing.T) { }, }, }, - } - - for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - as := require.New(t) - tmpDir, err := ioutil.TempDir("", "checkpoint-TestAllocate") - as.Nil(err) - - dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) - as.Nil(err) - - if tc.enhancementDefaultValues != nil { - dynamicPolicy.qosConfig.QoSEnhancementDefaultValues = tc.enhancementDefaultValues - } - - dynamicPolicy.enableMemoryAdvisor = true - dynamicPolicy.advisorClient = advisorsvc.NewStubAdvisorServiceClient() - - resp, err := dynamicPolicy.Allocate(context.Background(), tc.req) - as.Nil(err) - - tc.expectedResp.PodUid = tc.req.PodUid - as.Equalf(tc.expectedResp, resp, "failed in test case: %s", tc.name) - - os.RemoveAll(tmpDir) - }) - } -} - -func TestAllocateForPod(t *testing.T) { - t.Parallel() - - as := require.New(t) - cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) - as.Nil(err) - - machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) - as.Nil(err) - - testName := "test" - - tmpDir, err := ioutil.TempDir("", "checkpoint-TestAllocateForPod") - as.Nil(err) - - dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) - as.Nil(err) - - req := &pluginapi.PodResourceRequest{ - PodUid: string(uuid.NewUUID()), - PodNamespace: testName, - PodName: testName, - ResourceName: string(v1.ResourceMemory), - ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 1073741824, - }, - } - - _, err = dynamicPolicy.AllocateForPod(context.Background(), req) - as.NotNil(err) - os.RemoveAll(tmpDir) -} - -func TestGetPodTopologyHints(t *testing.T) { - t.Parallel() - - as := require.New(t) - cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) - as.Nil(err) - - machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) - as.Nil(err) - - testName := "test" - - tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetPodTopologyHints") - as.Nil(err) - - dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) - as.Nil(err) - - req := &pluginapi.PodResourceRequest{ - PodUid: string(uuid.NewUUID()), - PodNamespace: testName, - PodName: testName, - ResourceName: string(v1.ResourceMemory), - ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 1073741824, - }, - } - - _, err = dynamicPolicy.GetPodTopologyHints(context.Background(), req) - as.NotNil(err) - os.RemoveAll(tmpDir) -} - -func TestGetTopologyHints(t *testing.T) { - t.Parallel() - - as := require.New(t) - cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) - as.Nil(err) - - machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) - as.Nil(err) - - testName := "test" - + { + name: "req for memory and hugepages for shared cores with numa binding main container", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + "hugepages-2Mi": 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + expectedResp: &pluginapi.ResourceAllocationResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + AllocationResult: &pluginapi.ResourceAllocation{ + ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ + string(v1.ResourceMemory): { + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0).String(), + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"memory":"2Gi"}}}}`, + }, + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + }, + }, + }, + "hugepages-2Mi": { + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0).String(), + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"hugepages-2Mi":"2Gi"}}}}`, + }, + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + }, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + { + name: "req for memory and hugepages for dedicated core with numa binding without numa exclusive with distribute evenly across numa", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0, 1}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + "hugepages-2Mi": 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false", "distribute_evenly_across_numa": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + expectedResp: &pluginapi.ResourceAllocationResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + AllocationResult: &pluginapi.ResourceAllocation{ + ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ + string(v1.ResourceMemory): { + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0, 1).String(), + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"memory":"1Gi"}},"1":{"allocated":{"memory":"1Gi"}}}}`, + }, + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + }, + }, + }, + "hugepages-2Mi": { + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0, 1).String(), + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"hugepages-2Mi":"1Gi"}},"1":{"allocated":{"hugepages-2Mi":"1Gi"}}}}`, + }, + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + }, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: "false", + }, + }, + expectedAllocationInfos: map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: { + AggregatedQuantity: 2147483648, + NumaAllocationResult: machine.NewCPUSet(0, 1), + TopologyAwareAllocations: map[int]uint64{ + 0: 1073741824, // should be distributed evenly across 2 numa nodes + 1: 1073741824, + }, + }, + "hugepages-2Mi": { + AggregatedQuantity: 2147483648, + NumaAllocationResult: machine.NewCPUSet(0, 1), + TopologyAwareAllocations: map[int]uint64{ + 0: 1073741824, // should be distributed evenly across 2 numa nodes + 1: 1073741824, + }, + }, + }, + }, + { + name: "test for dedicated cores with numa binding without numa exclusive for only hugepages", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + "hugepages-2Mi": 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + expectedResp: &pluginapi.ResourceAllocationResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + AllocationResult: &pluginapi.ResourceAllocation{ + ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ + "hugepages-2Mi": { + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0).String(), + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{"allocated":{"hugepages-2Mi":"2Gi"}}}}`, + }, + ResourceHints: &pluginapi.ListOfTopologyHints{ + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + }, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + as := require.New(t) + tmpDir, err := ioutil.TempDir("", "checkpoint-TestAllocate") + as.Nil(err) + + dynamicPolicy, err := getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) + as.Nil(err) + + if tc.enhancementDefaultValues != nil { + dynamicPolicy.qosConfig.QoSEnhancementDefaultValues = tc.enhancementDefaultValues + } + + dynamicPolicy.enableMemoryAdvisor = true + dynamicPolicy.advisorClient = advisorsvc.NewStubAdvisorServiceClient() + + resp, err := dynamicPolicy.Allocate(context.Background(), tc.req) + as.Nil(err) + + tc.expectedResp.PodUid = tc.req.PodUid + as.Equalf(tc.expectedResp, resp, "failed in test case: %s", tc.name) + + if tc.expectedAllocationInfos != nil { + for resourceName, expectedAllocationInfo := range tc.expectedAllocationInfos { + actualAllocationInfo := dynamicPolicy.state.GetAllocationInfo(resourceName, tc.req.PodUid, tc.req.ContainerName) + as.NotNilf(actualAllocationInfo, "failed in test case: %s", tc.name) + + as.Equalf(expectedAllocationInfo.AggregatedQuantity, actualAllocationInfo.AggregatedQuantity, "failed in test case: %s", tc.name) + as.Equalf(expectedAllocationInfo.NumaAllocationResult, actualAllocationInfo.NumaAllocationResult, "failed in test case: %s", tc.name) + as.Equalf(expectedAllocationInfo.TopologyAwareAllocations, actualAllocationInfo.TopologyAwareAllocations, "failed in test case: %s", tc.name) + + } + } + + os.RemoveAll(tmpDir) + }) + } +} + +func TestAllocateForPod(t *testing.T) { + t.Parallel() + + as := require.New(t) + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) + as.Nil(err) + + machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) + as.Nil(err) + + testName := "test" + + tmpDir, err := ioutil.TempDir("", "checkpoint-TestAllocateForPod") + as.Nil(err) + + dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) + as.Nil(err) + + req := &pluginapi.PodResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1073741824, + }, + } + + _, err = dynamicPolicy.AllocateForPod(context.Background(), req) + as.NotNil(err) + os.RemoveAll(tmpDir) +} + +func TestGetPodTopologyHints(t *testing.T) { + t.Parallel() + + as := require.New(t) + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) + as.Nil(err) + + machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) + as.Nil(err) + + testName := "test" + + tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetPodTopologyHints") + as.Nil(err) + + dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) + as.Nil(err) + + req := &pluginapi.PodResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1073741824, + }, + } + + _, err = dynamicPolicy.GetPodTopologyHints(context.Background(), req) + as.NotNil(err) + os.RemoveAll(tmpDir) +} + +func TestGetTopologyHints(t *testing.T) { + t.Parallel() + + as := require.New(t) + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) + as.Nil(err) + + machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) + as.Nil(err) + + testName := "test" + testCases := []struct { name string req *pluginapi.ResourceRequest @@ -1273,12 +1700,122 @@ func TestGetTopologyHints(t *testing.T) { consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, }, Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + }, + }, + }, + { + name: "req for system_cores main container", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1073741824, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): nil, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + }, + { + name: "req for dedicated_cores with numa_binding & numa_exclusive main container", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 10737418240, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + expectedResp: &pluginapi.ResourceHintsResponse{ + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ + string(v1.ResourceMemory): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0, 1}, + Preferred: true, + }, + { + Nodes: []uint64{2, 3}, + Preferred: true, + }, + { + Nodes: []uint64{0, 1, 2}, + Preferred: false, + }, + { + Nodes: []uint64{0, 1, 3}, + Preferred: false, + }, + { + Nodes: []uint64{0, 2, 3}, + Preferred: false, + }, + { + Nodes: []uint64{1, 2, 3}, + Preferred: false, + }, + { + Nodes: []uint64{0, 1, 2, 3}, + Preferred: false, + }, + }, + }, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, }, }, }, { - name: "req for system_cores main container", + name: "req for dedicated_cores with numa_binding & not numa_exclusive main container", req: &pluginapi.ResourceRequest{ PodUid: string(uuid.NewUUID()), PodNamespace: testName, @@ -1290,11 +1827,12 @@ func TestGetTopologyHints(t *testing.T) { ResourceRequests: map[string]float64{ string(v1.ResourceMemory): 1073741824, }, - Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, - }, Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, }, }, expectedResp: &pluginapi.ResourceHintsResponse{ @@ -1305,18 +1843,39 @@ func TestGetTopologyHints(t *testing.T) { ContainerIndex: 0, ResourceName: string(v1.ResourceMemory), ResourceHints: map[string]*pluginapi.ListOfTopologyHints{ - string(v1.ResourceMemory): nil, + string(v1.ResourceMemory): { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + { + Nodes: []uint64{1}, + Preferred: true, + }, + { + Nodes: []uint64{2}, + Preferred: true, + }, + { + Nodes: []uint64{3}, + Preferred: true, + }, + }, + }, }, Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, }, Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: "false", }, }, }, { - name: "req for dedicated_cores with numa_binding & numa_exclusive main container", + name: "req for dedicated_cores with numa_binding & default numa_exclusive true main container", req: &pluginapi.ResourceRequest{ PodUid: string(uuid.NewUUID()), PodNamespace: testName, @@ -1330,7 +1889,7 @@ func TestGetTopologyHints(t *testing.T) { }, Annotations: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, - consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, }, Labels: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, @@ -1386,9 +1945,12 @@ func TestGetTopologyHints(t *testing.T) { consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, }, }, + enhancementDefaultValues: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + }, }, { - name: "req for dedicated_cores with numa_binding & not numa_exclusive main container", + name: "req for dedicated_cores with numa_binding & without numa_exclusive main container", req: &pluginapi.ResourceRequest{ PodUid: string(uuid.NewUUID()), PodNamespace: testName, @@ -1402,7 +1964,7 @@ func TestGetTopologyHints(t *testing.T) { }, Annotations: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, - consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, }, Labels: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, @@ -1441,14 +2003,13 @@ func TestGetTopologyHints(t *testing.T) { consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, }, Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, - consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, - consts.PodAnnotationMemoryEnhancementNumaExclusive: "false", + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, }, }, }, { - name: "req for dedicated_cores with numa_binding & default numa_exclusive true main container", + name: "req for hugepages resource and memory for dedicated cores with numa_binding & without numa exclusive main container", req: &pluginapi.ResourceRequest{ PodUid: string(uuid.NewUUID()), PodNamespace: testName, @@ -1458,7 +2019,8 @@ func TestGetTopologyHints(t *testing.T) { ContainerIndex: 0, ResourceName: string(v1.ResourceMemory), ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 10737418240, + string(v1.ResourceMemory): 1073741824, + "hugepages-2Mi": 1073741824, }, Annotations: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, @@ -1479,32 +2041,40 @@ func TestGetTopologyHints(t *testing.T) { string(v1.ResourceMemory): { Hints: []*pluginapi.TopologyHint{ { - Nodes: []uint64{0, 1}, + Nodes: []uint64{0}, Preferred: true, }, { - Nodes: []uint64{2, 3}, + Nodes: []uint64{1}, Preferred: true, }, { - Nodes: []uint64{0, 1, 2}, - Preferred: false, + Nodes: []uint64{2}, + Preferred: true, }, { - Nodes: []uint64{0, 1, 3}, - Preferred: false, + Nodes: []uint64{3}, + Preferred: true, }, + }, + }, + "hugepages-2Mi": { + Hints: []*pluginapi.TopologyHint{ { - Nodes: []uint64{0, 2, 3}, - Preferred: false, + Nodes: []uint64{0}, + Preferred: true, }, { - Nodes: []uint64{1, 2, 3}, - Preferred: false, + Nodes: []uint64{1}, + Preferred: true, }, { - Nodes: []uint64{0, 1, 2, 3}, - Preferred: false, + Nodes: []uint64{2}, + Preferred: true, + }, + { + Nodes: []uint64{3}, + Preferred: true, }, }, }, @@ -1513,17 +2083,13 @@ func TestGetTopologyHints(t *testing.T) { consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, }, Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, - consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, - consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, }, }, - enhancementDefaultValues: map[string]string{ - consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, - }, }, { - name: "req for dedicated_cores with numa_binding & without numa_exclusive main container", + name: "req for shared_cores with numa_binding main container", req: &pluginapi.ResourceRequest{ PodUid: string(uuid.NewUUID()), PodNamespace: testName, @@ -1536,11 +2102,11 @@ func TestGetTopologyHints(t *testing.T) { string(v1.ResourceMemory): 1073741824, }, Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, }, Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, }, }, expectedResp: &pluginapi.ResourceHintsResponse{ @@ -1573,16 +2139,16 @@ func TestGetTopologyHints(t *testing.T) { }, }, Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, }, Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, }, }, }, { - name: "req for shared_cores with numa_binding main container", + name: "req for hugepages and memory for shared cores with numa binding main container", req: &pluginapi.ResourceRequest{ PodUid: string(uuid.NewUUID()), PodNamespace: testName, @@ -1593,6 +2159,7 @@ func TestGetTopologyHints(t *testing.T) { ResourceName: string(v1.ResourceMemory), ResourceRequests: map[string]float64{ string(v1.ResourceMemory): 1073741824, + "hugepages-2Mi": 1073741824, }, Annotations: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, @@ -1630,13 +2197,33 @@ func TestGetTopologyHints(t *testing.T) { }, }, }, + "hugepages-2Mi": { + Hints: []*pluginapi.TopologyHint{ + { + Nodes: []uint64{0}, + Preferred: true, + }, + { + Nodes: []uint64{1}, + Preferred: true, + }, + { + Nodes: []uint64{2}, + Preferred: true, + }, + { + Nodes: []uint64{3}, + Preferred: true, + }, + }, + }, }, Labels: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, }, Annotations: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, - consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, }, }, }, @@ -1715,7 +2302,7 @@ func TestGetTopologyHints(t *testing.T) { tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetTopologyHints") as.Nil(err) - dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) + dynamicPolicy, err := getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) as.Nil(err) if tc.enhancementDefaultValues != nil { @@ -1752,7 +2339,7 @@ func TestGetTopologyAwareAllocatableResources(t *testing.T) { machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) as.Nil(err) - dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) + dynamicPolicy, err := getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) as.Nil(err) resp, err := dynamicPolicy.GetTopologyAwareAllocatableResources(context.Background(), &pluginapi.GetTopologyAwareAllocatableResourcesRequest{}) @@ -1778,6 +2365,42 @@ func TestGetTopologyAwareAllocatableResources(t *testing.T) { AggregatedAllocatableQuantity: 30064771072, AggregatedCapacityQuantity: 34359738368, }, + "hugepages-2Mi": { + IsNodeResource: false, + IsScalarResource: true, + TopologyAwareCapacityQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 2147483648, Node: 0}, + {ResourceValue: 2147483648, Node: 1}, + {ResourceValue: 2147483648, Node: 2}, + {ResourceValue: 2147483648, Node: 3}, + }, + TopologyAwareAllocatableQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 2147483648, Node: 0}, + {ResourceValue: 2147483648, Node: 1}, + {ResourceValue: 2147483648, Node: 2}, + {ResourceValue: 2147483648, Node: 3}, + }, + AggregatedCapacityQuantity: 8589934592, + AggregatedAllocatableQuantity: 8589934592, + }, + "hugepages-1Gi": { + IsNodeResource: false, + IsScalarResource: true, + TopologyAwareCapacityQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 8589934592, Node: 0}, + {ResourceValue: 8589934592, Node: 1}, + {ResourceValue: 8589934592, Node: 2}, + {ResourceValue: 8589934592, Node: 3}, + }, + TopologyAwareAllocatableQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 8589934592, Node: 0}, + {ResourceValue: 8589934592, Node: 1}, + {ResourceValue: 8589934592, Node: 2}, + {ResourceValue: 8589934592, Node: 3}, + }, + AggregatedCapacityQuantity: 34359738368, + AggregatedAllocatableQuantity: 34359738368, + }, }, }, resp) } @@ -1893,7 +2516,116 @@ func TestGetTopologyAwareResources(t *testing.T) { }, }, { - description: "req for dedicated_cores with numa_binding main container", + description: "req for dedicated_cores with numa_binding main container", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0, 1}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 10737418240, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + expectedResp: &pluginapi.GetTopologyAwareResourcesResponse{ + PodNamespace: testName, + PodName: testName, + ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{ + ContainerName: testName, + AllocatedResources: map[string]*pluginapi.TopologyAwareResource{ + string(v1.ResourceMemory): { + IsNodeResource: false, + IsScalarResource: true, + AggregatedQuantity: 15032385536, + OriginalAggregatedQuantity: 15032385536, + TopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 7516192768, Node: 0}, + {ResourceValue: 7516192768, Node: 1}, + }, + OriginalTopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 7516192768, Node: 0}, + {ResourceValue: 7516192768, Node: 1}, + }, + }, + }, + }, + }, + }, + { + description: "req for dedicated_cores with numa_binding main container for memory and hugepages", + req: &pluginapi.ResourceRequest{ + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ContainerIndex: 0, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1073741824, + "hugepages-2Mi": 1073741824, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + expectedResp: &pluginapi.GetTopologyAwareResourcesResponse{ + PodNamespace: testName, + PodName: testName, + ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{ + ContainerName: testName, + AllocatedResources: map[string]*pluginapi.TopologyAwareResource{ + string(v1.ResourceMemory): { + IsNodeResource: false, + IsScalarResource: true, + AggregatedQuantity: 1073741824, + OriginalAggregatedQuantity: 1073741824, + TopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 1073741824, Node: 0}, + }, + OriginalTopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 1073741824, Node: 0}, + }, + }, + "hugepages-2Mi": { + IsNodeResource: false, + IsScalarResource: true, + AggregatedQuantity: 1073741824, + OriginalAggregatedQuantity: 1073741824, + TopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 1073741824, Node: 0}, + }, + OriginalTopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 1073741824, Node: 0}, + }, + }, + }, + }, + }, + }, + { + description: "req for dedicated_cores with numa_binding main container with distribute evenly across numa for memory and hugepages", req: &pluginapi.ResourceRequest{ PodUid: string(uuid.NewUUID()), PodNamespace: testName, @@ -1907,11 +2639,12 @@ func TestGetTopologyAwareResources(t *testing.T) { Preferred: true, }, ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 10737418240, + string(v1.ResourceMemory): 1073741824, + "hugepages-2Mi": 1073741824, }, Annotations: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, - consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "distribute_evenly_across_numa": "true"}`, }, Labels: map[string]string{ consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, @@ -1926,15 +2659,29 @@ func TestGetTopologyAwareResources(t *testing.T) { string(v1.ResourceMemory): { IsNodeResource: false, IsScalarResource: true, - AggregatedQuantity: 15032385536, - OriginalAggregatedQuantity: 15032385536, + AggregatedQuantity: 1073741824, + OriginalAggregatedQuantity: 1073741824, TopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ - {ResourceValue: 7516192768, Node: 0}, - {ResourceValue: 7516192768, Node: 1}, + {ResourceValue: 536870912, Node: 0}, + {ResourceValue: 536870912, Node: 1}, }, OriginalTopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ - {ResourceValue: 7516192768, Node: 0}, - {ResourceValue: 7516192768, Node: 1}, + {ResourceValue: 536870912, Node: 0}, + {ResourceValue: 536870912, Node: 1}, + }, + }, + "hugepages-2Mi": { + IsNodeResource: false, + IsScalarResource: true, + AggregatedQuantity: 1073741824, + OriginalAggregatedQuantity: 1073741824, + TopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 536870912, Node: 0}, + {ResourceValue: 536870912, Node: 1}, + }, + OriginalTopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{ + {ResourceValue: 536870912, Node: 0}, + {ResourceValue: 536870912, Node: 1}, }, }, }, @@ -1944,31 +2691,36 @@ func TestGetTopologyAwareResources(t *testing.T) { } for _, tc := range testCases { - tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetTopologyAwareResources") - as.Nil(err) - - dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) - as.Nil(err) + tc := tc + t.Run(tc.description, func(t *testing.T) { + t.Parallel() - _, err = dynamicPolicy.Allocate(context.Background(), tc.req) - as.Nil(err) + tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetTopologyAwareResources") + as.Nil(err) - resp, err := dynamicPolicy.GetTopologyAwareResources(context.Background(), &pluginapi.GetTopologyAwareResourcesRequest{ - PodUid: tc.req.PodUid, - ContainerName: testName, - }) + dynamicPolicy, err := getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) + as.Nil(err) - if tc.err != nil { - as.NotNil(err) - continue - } else { + _, err = dynamicPolicy.Allocate(context.Background(), tc.req) as.Nil(err) - tc.expectedResp.PodUid = tc.req.PodUid - } - as.Equalf(tc.expectedResp, resp, "failed in test case: %s", tc.description) + resp, err := dynamicPolicy.GetTopologyAwareResources(context.Background(), &pluginapi.GetTopologyAwareResourcesRequest{ + PodUid: tc.req.PodUid, + ContainerName: testName, + }) + + if tc.err != nil { + as.NotNil(err) + return + } else { + as.Nil(err) + tc.expectedResp.PodUid = tc.req.PodUid + } - os.Remove(tmpDir) + as.Equalf(tc.expectedResp, resp, "failed in test case: %s", tc.description) + + os.Remove(tmpDir) + }) } } @@ -1976,10 +2728,7 @@ func TestGetResourcesAllocation(t *testing.T) { t.Parallel() as := require.New(t) - - tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetResourcesAllocation") - as.Nil(err) - defer os.RemoveAll(tmpDir) + testName := "test" cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) as.Nil(err) @@ -1987,215 +2736,308 @@ func TestGetResourcesAllocation(t *testing.T) { machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) as.Nil(err) - dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) - as.Nil(err) - - testName := "test" + type testCase struct { + name string + useExtraResources bool + buildRequest func() []*pluginapi.ResourceRequest + expectedMemory *pluginapi.ResourceAllocationInfo + expectedHugepages *pluginapi.ResourceAllocationInfo + checkHugepages bool + } - // test for shared_cores - req := &pluginapi.ResourceRequest{ - PodUid: string(uuid.NewUUID()), - PodNamespace: testName, - PodName: testName, - ContainerName: testName, - ContainerType: pluginapi.ContainerType_MAIN, - ContainerIndex: 0, - ResourceName: string(v1.ResourceMemory), - ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 1073741824, + tests := []testCase{ + { + name: "shared_cores", + useExtraResources: true, + buildRequest: func() []*pluginapi.ResourceRequest { + return []*pluginapi.ResourceRequest{ + { + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1073741824, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + } + }, + expectedMemory: &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 1073741824, + AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), + }, }, - Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + { + name: "reclaimed_cores", + useExtraResources: true, + buildRequest: func() []*pluginapi.ResourceRequest { + return []*pluginapi.ResourceRequest{ + { + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 1073741824, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + }, + }, + } + }, + expectedMemory: &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 1073741824, + AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), + }, }, - Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + { + name: "dedicated_cores_numa_binding", + useExtraResources: false, + buildRequest: func() []*pluginapi.ResourceRequest { + return []*pluginapi.ResourceRequest{ + { + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + } + }, + expectedMemory: &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 7516192768, + AllocationResult: machine.NewCPUSet(0).String(), + }, }, - } - - _, err = dynamicPolicy.Allocate(context.Background(), req) - as.Nil(err) - - resp1, err := dynamicPolicy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) - as.Nil(err) - - as.NotNil(resp1.PodResources[req.PodUid]) - as.NotNil(resp1.PodResources[req.PodUid].ContainerResources[testName]) - as.NotNil(resp1.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(&pluginapi.ResourceAllocationInfo{ - OciPropertyName: util.OCIPropertyNameCPUSetMems, - IsNodeResource: false, - IsScalarResource: true, - AllocatedQuantity: 1073741824, - AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), - }, resp1.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - - // test for reclaimed_cores - req = &pluginapi.ResourceRequest{ - PodUid: string(uuid.NewUUID()), - PodNamespace: testName, - PodName: testName, - ContainerName: testName, - ContainerType: pluginapi.ContainerType_MAIN, - ContainerIndex: 0, - ResourceName: string(v1.ResourceMemory), - ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 1073741824, + { + name: "system_cores_cpuset_reserve", + useExtraResources: false, + buildRequest: func() []*pluginapi.ResourceRequest { + return []*pluginapi.ResourceRequest{ + { + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationCPUEnhancementKey: `{"cpuset_pool": "reserve"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + } + }, + expectedMemory: &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 0, + AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), + }, }, - Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + { + name: "system_cores_cpuset_reserve_with_numa_binding", + useExtraResources: false, + buildRequest: func() []*pluginapi.ResourceRequest { + return []*pluginapi.ResourceRequest{ + { + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + { + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + consts.PodAnnotationCPUEnhancementKey: `{"cpuset_pool": "reserve"}`, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, + }, + }, + } + }, + expectedMemory: &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 0, + AllocationResult: machine.NewCPUSet(1, 2, 3).String(), + }, }, - Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelReclaimedCores, + { + name: "dedicated_cores_memory_and_hugepages", + useExtraResources: true, + buildRequest: func() []*pluginapi.ResourceRequest { + return []*pluginapi.ResourceRequest{ + { + PodUid: string(uuid.NewUUID()), + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN, + ResourceName: string(v1.ResourceMemory), + Hint: &pluginapi.TopologyHint{ + Nodes: []uint64{0}, + Preferred: true, + }, + ResourceRequests: map[string]float64{ + string(v1.ResourceMemory): 2147483648, + "hugepages-2Mi": 2147483648, + }, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + } + }, + expectedMemory: &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0).String(), + }, + checkHugepages: true, + expectedHugepages: &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: 2147483648, + AllocationResult: machine.NewCPUSet(0).String(), + }, }, } - _, err = dynamicPolicy.Allocate(context.Background(), req) - as.Nil(err) - - resp2, err := dynamicPolicy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) - as.Nil(err) + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() - as.NotNil(resp2.PodResources[req.PodUid]) - as.NotNil(resp2.PodResources[req.PodUid].ContainerResources[testName]) - as.NotNil(resp2.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(&pluginapi.ResourceAllocationInfo{ - OciPropertyName: util.OCIPropertyNameCPUSetMems, - IsNodeResource: false, - IsScalarResource: true, - AllocatedQuantity: 1073741824, - AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), - }, resp2.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) + tmpDir, err := ioutil.TempDir("", "checkpoint-"+tc.name) + as.Nil(err) + defer os.RemoveAll(tmpDir) - os.RemoveAll(tmpDir) - dynamicPolicy, err = getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) - as.Nil(err) + var policy *DynamicPolicy + if tc.useExtraResources { + policy, err = getTestDynamicPolicyWithExtraResourcesWithInitialization(cpuTopology, machineInfo, tmpDir) + } else { + policy, err = getTestDynamicPolicyWithInitialization(cpuTopology, machineInfo, tmpDir) + } + as.Nil(err) - // test for dedicated_cores with numa_binding - req = &pluginapi.ResourceRequest{ - PodUid: string(uuid.NewUUID()), - PodNamespace: testName, - PodName: testName, - ContainerName: testName, - ContainerType: pluginapi.ContainerType_MAIN, - ContainerIndex: 0, - ResourceName: string(v1.ResourceMemory), - Hint: &pluginapi.TopologyHint{ - Nodes: []uint64{0}, - Preferred: true, - }, - ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 2147483648, - }, - Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, - consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "true"}`, - }, - Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, - }, - } + reqs := tc.buildRequest() - _, err = dynamicPolicy.Allocate(context.Background(), req) - as.Nil(err) + // Execute all requests sequentially + for _, req := range reqs { + _, err = policy.Allocate(context.Background(), req) + as.Nil(err) + } - resp3, err := dynamicPolicy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) - as.Nil(err) + lastReq := reqs[len(reqs)-1] - as.NotNil(resp3.PodResources[req.PodUid]) - as.NotNil(resp3.PodResources[req.PodUid].ContainerResources[testName]) - as.NotNil(resp3.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(&pluginapi.ResourceAllocationInfo{ - OciPropertyName: util.OCIPropertyNameCPUSetMems, - IsNodeResource: false, - IsScalarResource: true, - AllocatedQuantity: 7516192768, - AllocationResult: machine.NewCPUSet(0).String(), - }, resp3.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - - // test for system_cores with cpuset_pool reserve - req = &pluginapi.ResourceRequest{ - PodUid: string(uuid.NewUUID()), - PodNamespace: testName, - PodName: testName, - ContainerName: testName, - ContainerType: pluginapi.ContainerType_MAIN, - ContainerIndex: 0, - ResourceName: string(v1.ResourceMemory), - Hint: &pluginapi.TopologyHint{ - Nodes: []uint64{0}, - Preferred: true, - }, - ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 2147483648, - }, - Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, - consts.PodAnnotationCPUEnhancementKey: `{"cpuset_pool": "reserve"}`, - }, - Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, - }, - } + resp, err := policy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) + as.Nil(err) - _, err = dynamicPolicy.Allocate(context.Background(), req) - as.Nil(err) + memAlloc := resp.PodResources[lastReq.PodUid]. + ContainerResources[testName]. + ResourceAllocation[string(v1.ResourceMemory)] - resp4, err := dynamicPolicy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) - as.Nil(err) + as.NotNil(memAlloc) + as.Equal(tc.expectedMemory, memAlloc) - as.NotNil(resp4.PodResources[req.PodUid]) - as.NotNil(resp4.PodResources[req.PodUid].ContainerResources[testName]) - as.NotNil(resp4.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(&pluginapi.ResourceAllocationInfo{ - OciPropertyName: util.OCIPropertyNameCPUSetMems, - IsNodeResource: false, - IsScalarResource: true, - AllocatedQuantity: 0, - AllocationResult: machine.NewCPUSet(0, 1, 2, 3).String(), - }, resp4.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - - // test for system_cores with cpuset_pool reserve and with numa binding - req = &pluginapi.ResourceRequest{ - PodUid: string(uuid.NewUUID()), - PodNamespace: testName, - PodName: testName, - ContainerName: testName, - ContainerType: pluginapi.ContainerType_MAIN, - ContainerIndex: 0, - ResourceName: string(v1.ResourceMemory), - Hint: &pluginapi.TopologyHint{ - Nodes: []uint64{0}, - Preferred: true, - }, - ResourceRequests: map[string]float64{ - string(v1.ResourceMemory): 2147483648, - }, - Annotations: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, - consts.PodAnnotationCPUEnhancementKey: `{"cpuset_pool": "reserve"}`, - consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true"}`, - }, - Labels: map[string]string{ - consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSystemCores, - }, + if tc.checkHugepages { + hpAlloc := resp.PodResources[lastReq.PodUid]. + ContainerResources[testName]. + ResourceAllocation["hugepages-2Mi"] + as.NotNil(hpAlloc) + as.Equal(tc.expectedHugepages, hpAlloc) + } + }) } - - _, err = dynamicPolicy.Allocate(context.Background(), req) - as.Nil(err) - - resp5, err := dynamicPolicy.GetResourcesAllocation(context.Background(), &pluginapi.GetResourcesAllocationRequest{}) - as.Nil(err) - - as.NotNil(resp5.PodResources[req.PodUid]) - as.NotNil(resp5.PodResources[req.PodUid].ContainerResources[testName]) - as.NotNil(resp5.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) - as.Equal(&pluginapi.ResourceAllocationInfo{ - OciPropertyName: util.OCIPropertyNameCPUSetMems, - IsNodeResource: false, - IsScalarResource: true, - AllocatedQuantity: 0, - AllocationResult: machine.NewCPUSet(1, 2, 3).String(), - }, resp5.PodResources[req.PodUid].ContainerResources[testName].ResourceAllocation[string(v1.ResourceMemory)]) } func TestGenerateResourcesMachineStateFromPodEntries(t *testing.T) { @@ -2206,7 +3048,7 @@ func TestGenerateResourcesMachineStateFromPodEntries(t *testing.T) { machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) as.Nil(err) - reservedMemory, err := getReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo) + reserved, err := getResourcesReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo, []string{string(v1.ResourceMemory)}) as.Nil(err) podUID := string(uuid.NewUUID()) @@ -2236,11 +3078,7 @@ func TestGenerateResourcesMachineStateFromPodEntries(t *testing.T) { v1.ResourceMemory: podEntries, } - reserved := map[v1.ResourceName]map[int]uint64{ - v1.ResourceMemory: reservedMemory, - } - - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, podResourceEntries, nil, reserved) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, nil, podResourceEntries, nil, reserved, nil) as.Nil(err) as.NotNil(resourcesMachineState[v1.ResourceMemory][0]) @@ -2262,13 +3100,10 @@ func TestHandleAdvisorResp(t *testing.T) { machineInfo, err := machine.GenerateDummyMachineInfo(4, 32) as.Nil(err) - reservedMemory, err := getReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo) + resourcesReservedMemory, err := getResourcesReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo, + []string{string(v1.ResourceMemory)}) as.Nil(err) - resourcesReservedMemory := map[v1.ResourceName]map[int]uint64{ - v1.ResourceMemory: reservedMemory, - } - pod1UID := string(uuid.NewUUID()) pod2UID := string(uuid.NewUUID()) pod3UID := string(uuid.NewUUID()) @@ -2793,7 +3628,8 @@ func TestHandleAdvisorResp(t *testing.T) { memoryadvisor.RegisterControlKnobHandler(memoryadvisor.ControlKnobKeyMemoryNUMAHeadroom, memoryadvisor.ControlKnobHandlerWithChecker(dynamicPolicy.handleAdvisorMemoryNUMAHeadroom)) - machineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, tc.podResourceEntries, nil, resourcesReservedMemory) + machineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, nil, tc.podResourceEntries, + nil, resourcesReservedMemory, dynamicPolicy.extraResourceNames) as.Nil(err) if tc.podResourceEntries != nil { @@ -3164,14 +4000,11 @@ func TestSetExtraControlKnobByConfigs(t *testing.T) { v1.ResourceMemory: podEntries, } - reservedMemory, err := getReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo) + reserved, err := getResourcesReservedMemory(fakeConf, &metaserver.MetaServer{}, machineInfo, []string{string(v1.ResourceMemory)}) as.Nil(err) - reserved := map[v1.ResourceName]map[int]uint64{ - v1.ResourceMemory: reservedMemory, - } - - resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, podResourceEntries, dynamicPolicy.state.GetMachineState(), reserved) + resourcesMachineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, nil, podResourceEntries, + dynamicPolicy.state.GetMachineState(), reserved, dynamicPolicy.extraResourceNames) as.Nil(err) dynamicPolicy.state.SetPodResourceEntries(podResourceEntries, true) @@ -4109,13 +4942,12 @@ func TestDynamicPolicy_adjustAllocationEntries(t *testing.T) { dynamicPolicy.metaServer = tt.fields.metaServer dynamicPolicy.asyncWorkers = asyncworker.NewAsyncWorkers(memoryPluginAsyncWorkersName, dynamicPolicy.emitter) dynamicPolicy.state.SetPodResourceEntries(podResourceEntries, true) - reservedMemory, err := getReservedMemory(fakeConf, dynamicPolicy.metaServer, machineInfo) + resourcesReservedMemory, err := getResourcesReservedMemory(fakeConf, dynamicPolicy.metaServer, machineInfo, + []string{string(v1.ResourceMemory)}) assert.NoError(t, err) - resourcesReservedMemory := map[v1.ResourceName]map[int]uint64{ - v1.ResourceMemory: reservedMemory, - } - machineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, podResourceEntries, dynamicPolicy.state.GetMachineState(), resourcesReservedMemory) + machineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, nil, podResourceEntries, + dynamicPolicy.state.GetMachineState(), resourcesReservedMemory, dynamicPolicy.extraResourceNames) assert.NoError(t, err) dynamicPolicy.state.SetMachineState(machineState, true) @@ -4864,7 +5696,8 @@ func Test_adjustAllocationEntries(t *testing.T) { dynamicPolicy.state.SetAllocationInfo(v1.ResourceMemory, "test-pod-4-uid", "test-container-1", pod4Container1Allocation, true) podResourceEntries := dynamicPolicy.state.GetPodResourceEntries() - machineState, err := state.GenerateMachineStateFromPodEntries(dynamicPolicy.state.GetMachineInfo(), podResourceEntries, dynamicPolicy.state.GetMachineState(), dynamicPolicy.state.GetReservedMemory()) + machineState, err := state.GenerateMachineStateFromPodEntries(dynamicPolicy.state.GetMachineInfo(), nil, podResourceEntries, + dynamicPolicy.state.GetMachineState(), dynamicPolicy.state.GetReservedMemory(), dynamicPolicy.extraResourceNames) as.NoError(err) dynamicPolicy.state.SetMachineState(machineState, true) diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go index b43b79b8cc..6dca80029a 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor.go @@ -53,8 +53,9 @@ func (p numaPodAllocationWrapper) UpdateAllocation(pod *v1.Pod) error { } func (p numaPodAllocationWrapper) getNUMABindResult() (string, error) { - if p.CheckDedicatedNUMABindingNUMAExclusive() { - // numa binding is exclusive, we can directly use numa allocation result as numa bind result + if p.CheckDedicatedNUMABindingNUMAExclusive() || p.CheckDistributeEvenlyAcrossNuma() { + // numa binding is exclusive or distribute evenly across numa annotation enabled, + // we can directly use numa allocation result as numa bind result // which is more than one numa numaList := p.AllocationInfo.NumaAllocationResult.ToSliceInt() if len(numaList) == 0 { diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor_test.go index dfed116c54..7dfce76531 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor/numa_allocation_reactor_test.go @@ -290,6 +290,236 @@ func Test_podNUMAAllocationReactor_UpdateAllocation(t *testing.T) { }, }, }, + { + name: "distribute_evenly_across_numa_pod", + fields: fields{ + podFetcher: &pod.PodFetcherStub{ + PodList: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + }, + }, + client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + ), + }, + args: args{ + allocation: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: "test-1-uid", + PodNamespace: "test", + PodName: "test-1", + ContainerName: "container-1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma: consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + AggregatedQuantity: 7516192768, + NumaAllocationResult: machine.NewCPUSet(0, 1), + TopologyAwareAllocations: map[int]uint64{ + 0: 3758096384, + 1: 3758096384, + }, + }, + }, + wantPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: types.UID("test-1-uid"), + Annotations: map[string]string{ + consts.PodAnnotationNUMABindResultKey: "0,1", + }, + }, + }, + }, + { + name: "exclusive_enabled_but_empty_allocation_result", + fields: fields{ + podFetcher: &pod.PodFetcherStub{ + PodList: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + }, + }, + client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + ), + }, + args: args{ + allocation: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: "test-1-uid", + PodNamespace: "test", + PodName: "test-1", + ContainerName: "container-1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + QoSLevel: consts.PodAnnotationQoSLevelDedicatedCores, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelDedicatedCores, + }, + }, + AggregatedQuantity: 7516192768, + NumaAllocationResult: machine.NewCPUSet(), + }, + }, + wantPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: types.UID("test-1-uid"), + }, + }, + wantErr: true, + }, + { + name: "invalid_numa_hint_multiple_values", + fields: fields{ + podFetcher: &pod.PodFetcherStub{ + PodList: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + }, + }, + client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + ), + }, + args: args{ + allocation: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: "test-1-uid", + PodNamespace: "test", + PodName: "test-1", + ContainerName: "container-1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + cpuconsts.CPUStateAnnotationKeyNUMAHint: "0,1", + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + AggregatedQuantity: 7516192768, + NumaAllocationResult: machine.NewCPUSet(0), + }, + }, + wantPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: types.UID("test-1-uid"), + }, + }, + wantErr: true, + }, + { + name: "invalid_numa_hint_parse_error", + fields: fields{ + podFetcher: &pod.PodFetcherStub{ + PodList: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + }, + }, + client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: "test-1-uid", + }, + }, + ), + }, + args: args{ + allocation: &state.AllocationInfo{ + AllocationMeta: commonstate.AllocationMeta{ + PodUid: "test-1-uid", + PodNamespace: "test", + PodName: "test-1", + ContainerName: "container-1", + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + QoSLevel: consts.PodAnnotationQoSLevelSharedCores, + Annotations: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + cpuconsts.CPUStateAnnotationKeyNUMAHint: "abc", + }, + Labels: map[string]string{ + consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores, + }, + }, + AggregatedQuantity: 7516192768, + NumaAllocationResult: machine.NewCPUSet(0), + }, + }, + wantPod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-1", + Namespace: "test", + UID: types.UID("test-1-uid"), + }, + }, + wantErr: true, + }, } for _, tt := range tests { tt := tt diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go index dde37ff3e7..959ca8e1cb 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go @@ -203,6 +203,50 @@ func (pre PodResourceEntries) Clone() PodResourceEntries { return clone } +// GetResourceAllocation gets the ResourceAllocation of every resource of a certain pod UID and container name. +func (pre PodResourceEntries) GetResourceAllocation(podUID, containerName string) (*pluginapi.ResourceAllocation, error) { + if pre == nil { + return nil, fmt.Errorf("GetResourceAllocation of nil PodResourceEntries") + } + + resourceAllocation := make(map[string]*pluginapi.ResourceAllocationInfo) + + for resourceName, podEntries := range pre { + allocationInfo := podEntries[podUID][containerName] + if allocationInfo == nil { + continue + } + + resourceAllocation[string(resourceName)] = &pluginapi.ResourceAllocationInfo{ + OciPropertyName: util.OCIPropertyNameCPUSetMems, + IsNodeResource: false, + IsScalarResource: true, + AllocatedQuantity: float64(allocationInfo.AggregatedQuantity), + AllocationResult: allocationInfo.NumaAllocationResult.String(), + } + + // deal with accompanying resources + for name, entry := range allocationInfo.ExtraControlKnobInfo { + if entry.OciPropertyName == "" { + continue + } + + if resourceAllocation[name] != nil { + return nil, fmt.Errorf("name: %s meets conflict", name) + } + + resourceAllocation[name] = &pluginapi.ResourceAllocationInfo{ + OciPropertyName: entry.OciPropertyName, + AllocationResult: entry.ControlKnobValue, + } + } + } + + return &pluginapi.ResourceAllocation{ + ResourceAllocation: resourceAllocation, + }, nil +} + func (ns *NUMANodeState) String() string { if ns == nil { return "" @@ -487,6 +531,9 @@ type reader interface { GetNUMAHeadroom() map[int]int64 GetPodResourceEntries() PodResourceEntries GetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string) *AllocationInfo + // GetResourceAllocationInfo gets the allocationInfo of all resources of a specific container. + // Returns nil if there is no such container in state. + GetResourceAllocationInfo(podUID, containerName string) map[v1.ResourceName]*AllocationInfo } // writer is used to store information into local states, @@ -507,6 +554,8 @@ type ReadonlyState interface { reader GetMachineInfo() *info.MachineInfo + // GetMemoryTopology returns the memory topology info (including NormalMemoryDetails etc.) + GetMemoryTopology() *machine.MemoryTopology GetReservedMemory() map[v1.ResourceName]map[int]uint64 } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_checkpoint.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_checkpoint.go index e595f08ad4..3fc5d6b879 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_checkpoint.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_checkpoint.go @@ -58,18 +58,21 @@ type stateCheckpoint struct { skipStateCorruption bool emitter metrics.MetricEmitter machineInfo *info.MachineInfo - reservedMemory map[v1.ResourceName]map[int]uint64 + // memoryTopology contains detailed memory capacities (e.g. NormalMemoryDetails excluding hugepages) + memoryTopology *machine.MemoryTopology + reservedMemory map[v1.ResourceName]map[int]uint64 + extraResourceNames []string } func NewCheckpointState( stateDirectoryConfig *statedirectory.StateDirectoryConfiguration, checkpointName, policyName string, - topology *machine.CPUTopology, machineInfo *info.MachineInfo, + topology *machine.CPUTopology, machineInfo *info.MachineInfo, memoryTopology *machine.MemoryTopology, reservedMemory map[v1.ResourceName]map[int]uint64, skipStateCorruption bool, - emitter metrics.MetricEmitter, + emitter metrics.MetricEmitter, extraResourceNames []string, ) (State, error) { currentStateDir, otherStateDir := stateDirectoryConfig.GetCurrentAndPreviousStateFileDirectory() - defaultCache, err := NewMemoryPluginState(topology, machineInfo, reservedMemory) + defaultCache, err := NewMemoryPluginState(topology, machineInfo, memoryTopology, reservedMemory, extraResourceNames) if err != nil { return nil, fmt.Errorf("NewMemoryPluginState failed with error: %v", err) } @@ -81,7 +84,9 @@ func NewCheckpointState( skipStateCorruption: skipStateCorruption, emitter: emitter, machineInfo: machineInfo, + memoryTopology: memoryTopology, reservedMemory: reservedMemory, + extraResourceNames: extraResourceNames, } cm, err := customcheckpointmanager.NewCustomCheckpointManager(currentStateDir, otherStateDir, checkpointName, @@ -106,7 +111,8 @@ func (sc *stateCheckpoint) RestoreState(cp checkpointmanager.Checkpoint) (bool, return false, fmt.Errorf("[memory_plugin] configured policy %q differs from state checkpoint policy %q", sc.policyName, checkpoint.PolicyName) } - generatedResourcesMachineState, err := GenerateMachineStateFromPodEntries(sc.machineInfo, checkpoint.PodResourceEntries, checkpoint.MachineState, sc.reservedMemory) + generatedResourcesMachineState, err := GenerateMachineStateFromPodEntries(sc.machineInfo, sc.memoryTopology, checkpoint.PodResourceEntries, + checkpoint.MachineState, sc.reservedMemory, sc.extraResourceNames) if err != nil { return false, fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) } @@ -177,6 +183,13 @@ func (sc *stateCheckpoint) GetMachineInfo() *info.MachineInfo { return sc.cache.GetMachineInfo() } +func (sc *stateCheckpoint) GetMemoryTopology() *machine.MemoryTopology { + sc.RLock() + defer sc.RUnlock() + + return sc.cache.GetMemoryTopology() +} + func (sc *stateCheckpoint) GetMachineState() NUMANodeResourcesMap { sc.RLock() defer sc.RUnlock() @@ -200,6 +213,13 @@ func (sc *stateCheckpoint) GetAllocationInfo( return sc.cache.GetAllocationInfo(resourceName, podUID, containerName) } +func (sc *stateCheckpoint) GetResourceAllocationInfo(podUID, containerName string) map[v1.ResourceName]*AllocationInfo { + sc.RLock() + defer sc.RUnlock() + + return sc.cache.GetResourceAllocationInfo(podUID, containerName) +} + func (sc *stateCheckpoint) GetPodResourceEntries() PodResourceEntries { sc.RLock() defer sc.RUnlock() diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_mem.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_mem.go index a6a34e6e00..9387f711b9 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_mem.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_mem.go @@ -36,14 +36,20 @@ type memoryPluginState struct { socketTopology map[int]string machineInfo *info.MachineInfo + // memoryTopology contains detailed memory capacities (e.g. NormalMemoryDetails excluding hugepages) + memoryTopology *machine.MemoryTopology reservedMemory map[v1.ResourceName]map[int]uint64 machineState NUMANodeResourcesMap numaHeadroom map[int]int64 podResourceEntries PodResourceEntries + + extraResourceNames []string } -func NewMemoryPluginState(topology *machine.CPUTopology, machineInfo *info.MachineInfo, reservedMemory map[v1.ResourceName]map[int]uint64) (*memoryPluginState, error) { +func NewMemoryPluginState(topology *machine.CPUTopology, machineInfo *info.MachineInfo, + memoryTopology *machine.MemoryTopology, reservedMemory map[v1.ResourceName]map[int]uint64, extraResourceNames []string, +) (*memoryPluginState, error) { klog.InfoS("[memory_plugin] initializing new memory plugin in-memory state store") socketTopology := make(map[int]string) @@ -51,7 +57,7 @@ func NewMemoryPluginState(topology *machine.CPUTopology, machineInfo *info.Machi socketTopology[socketID] = topology.CPUDetails.NUMANodesInSockets(socketID).String() } - defaultMachineState, err := GenerateMachineState(machineInfo, reservedMemory) + defaultMachineState, err := GenerateMachineState(machineInfo, memoryTopology, reservedMemory, extraResourceNames) if err != nil { return nil, fmt.Errorf("GenerateMachineState failed with error: %v", err) } @@ -62,7 +68,9 @@ func NewMemoryPluginState(topology *machine.CPUTopology, machineInfo *info.Machi numaHeadroom: make(map[int]int64), socketTopology: socketTopology, machineInfo: machineInfo.Clone(), + memoryTopology: memoryTopology, reservedMemory: reservedMemory, + extraResourceNames: extraResourceNames, }, nil } @@ -103,6 +111,13 @@ func (s *memoryPluginState) GetMachineInfo() *info.MachineInfo { return s.machineInfo.Clone() } +func (s *memoryPluginState) GetMemoryTopology() *machine.MemoryTopology { + s.RLock() + defer s.RUnlock() + + return s.memoryTopology +} + func (s *memoryPluginState) GetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string) *AllocationInfo { s.RLock() defer s.RUnlock() @@ -113,6 +128,24 @@ func (s *memoryPluginState) GetAllocationInfo(resourceName v1.ResourceName, podU return nil } +func (s *memoryPluginState) GetResourceAllocationInfo(podUID, containerName string) map[v1.ResourceName]*AllocationInfo { + s.RLock() + defer s.RUnlock() + + var allAllocationInfos map[v1.ResourceName]*AllocationInfo + for resourceName, res := range s.podResourceEntries { + if allocInfo, ok := res[podUID][containerName]; ok { + // Lazy initialization of map only when there is allocation info for a container + if allAllocationInfos == nil { + allAllocationInfos = make(map[v1.ResourceName]*AllocationInfo) + } + allAllocationInfos[resourceName] = allocInfo.Clone() + } + } + + return allAllocationInfos +} + func (s *memoryPluginState) GetPodResourceEntries() PodResourceEntries { s.RLock() defer s.RUnlock() @@ -193,8 +226,9 @@ func (s *memoryPluginState) ClearState() { s.Lock() defer s.Unlock() - s.machineState, _ = GenerateMachineState(s.machineInfo, s.reservedMemory) + s.machineState, _ = GenerateMachineState(s.machineInfo, s.memoryTopology, s.reservedMemory, s.extraResourceNames) s.podResourceEntries = make(PodResourceEntries) + s.numaHeadroom = make(map[int]int64) s.socketTopology = make(map[int]string) klog.V(2).InfoS("[memory_plugin] cleared state") diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_test.go index e0c5bccfde..98d0c6eb7a 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state_test.go @@ -140,7 +140,7 @@ func TestNewMemoryPluginCheckpoint(t *testing.T) { }, } oldCheckpoint.PodResourceEntries = podResourceEntries - machineState, err := GenerateMachineStateFromPodEntries(machineInfo, podResourceEntries, nil, reservedMemory) + machineState, err := GenerateMachineStateFromPodEntries(machineInfo, nil, podResourceEntries, nil, reservedMemory, nil) assert.NoError(t, err) oldCheckpoint.MachineState = machineState err = oldCheckpointManager.CreateCheckpoint(checkpointName, oldCheckpoint) @@ -153,8 +153,8 @@ func TestNewMemoryPluginCheckpoint(t *testing.T) { EnableInMemoryState: true, } - state, err := NewCheckpointState(stateDirectoryConfig, checkpointName, policyName, cpuTopology, machineInfo, - reservedMemory, false, metrics.DummyMetrics{}) + state, err := NewCheckpointState(stateDirectoryConfig, checkpointName, policyName, cpuTopology, machineInfo, nil, + reservedMemory, false, metrics.DummyMetrics{}, nil) if tt.corruptFile { assert.Error(t, err) diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/util.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/util.go index 7bab660c00..fedda197f5 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/util.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/util.go @@ -18,12 +18,15 @@ package state import ( "fmt" + "strings" "time" info "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + "k8s.io/kubernetes/pkg/apis/core/v1/helper" "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" @@ -48,21 +51,24 @@ func GenerateMemoryContainerAllocationMeta(req *pluginapi.ResourceRequest, qosLe } // GenerateMachineState returns NUMANodeResourcesMap based on -// machine info and reserved resources -func GenerateMachineState(machineInfo *info.MachineInfo, reserved map[v1.ResourceName]map[int]uint64) (NUMANodeResourcesMap, error) { +// machine info, memory topology (to get precise capacities like NormalMemoryDetails), +// and reserved resources. +func GenerateMachineState(machineInfo *info.MachineInfo, memoryTopology *machine.MemoryTopology, reserved map[v1.ResourceName]map[int]uint64, + extraResourceNames []string, +) (NUMANodeResourcesMap, error) { if machineInfo == nil { return nil, fmt.Errorf("GenerateMachineState got nil machineInfo") } - // todo: currently only support memory, we will support huge page later. + resources := append(extraResourceNames, string(v1.ResourceMemory)) defaultResourcesMachineState := make(NUMANodeResourcesMap) - for _, resourceName := range []v1.ResourceName{v1.ResourceMemory} { - machineState, err := GenerateResourceState(machineInfo, reserved, resourceName) + for _, resourceName := range resources { + machineState, err := GenerateResourceState(machineInfo, memoryTopology, reserved, v1.ResourceName(resourceName)) if err != nil { return nil, fmt.Errorf("GenerateResourceState for resource: %s failed with error: %v", resourceName, err) } - defaultResourcesMachineState[resourceName] = machineState + defaultResourcesMachineState[v1.ResourceName(resourceName)] = machineState } return defaultResourcesMachineState, nil } @@ -91,14 +97,21 @@ func GetReclaimedNUMAHeadroom(numaHeadroom map[int]int64, numaSet machine.CPUSet } // GenerateResourceState returns NUMANodeMap for given resource based on -// machine info and reserved resources -func GenerateResourceState(machineInfo *info.MachineInfo, reserved map[v1.ResourceName]map[int]uint64, resourceName v1.ResourceName) (NUMANodeMap, error) { +// machine info, memory topology (to extract normal memory capacities), and reserved resources +func GenerateResourceState(machineInfo *info.MachineInfo, memoryTopology *machine.MemoryTopology, reserved map[v1.ResourceName]map[int]uint64, resourceName v1.ResourceName) (NUMANodeMap, error) { defaultMachineState := make(NUMANodeMap) - switch resourceName { - case v1.ResourceMemory: + switch { + case resourceName == v1.ResourceMemory: for _, node := range machineInfo.Topology { - totalMemSizeQuantity := node.Memory + var totalMemSizeQuantity uint64 + // Use NormalMemoryDetails to exclude hugepages when calculating allocatable memory + if memoryTopology != nil { + totalMemSizeQuantity = memoryTopology.NormalMemoryDetails[node.Id] + } else { + // Fallback for tests or environments where memory topology isn't fully initialized + totalMemSizeQuantity = node.Memory + } numaReservedMemQuantity := reserved[resourceName][node.Id] if totalMemSizeQuantity < numaReservedMemQuantity { @@ -117,6 +130,8 @@ func GenerateResourceState(machineInfo *info.MachineInfo, reserved map[v1.Resour PodEntries: make(PodEntries), } } + case strings.HasPrefix(string(resourceName), v1.ResourceHugePagesPrefix): + return generateHugePagesResourceState(machineInfo, reserved, resourceName) default: return nil, fmt.Errorf("unsupported resource name: %s", resourceName) } @@ -124,11 +139,44 @@ func GenerateResourceState(machineInfo *info.MachineInfo, reserved map[v1.Resour return defaultMachineState, nil } +// generateHugePagesResourceState returns NUMANodeMap for a particular hugepage resource based on machine info and reserved resources. +func generateHugePagesResourceState(machineInfo *info.MachineInfo, reserved map[v1.ResourceName]map[int]uint64, + resourceName v1.ResourceName, +) (NUMANodeMap, error) { + hugepageResourceMachineState := make(NUMANodeMap) + for _, node := range machineInfo.Topology { + nodeState := &NUMANodeState{} + for _, hugepage := range node.HugePages { + hugepageQuantity := resource.NewQuantity(int64(hugepage.PageSize)*1024, resource.BinarySI) + hugepageName := helper.HugePageResourceName(*hugepageQuantity) + if hugepageName != resourceName { + continue + } + + systemReserved := reserved[resourceName][node.Id] + totalHugepagesSize := hugepage.PageSize * hugepage.NumPages * 1024 + if totalHugepagesSize < systemReserved { + return nil, fmt.Errorf("invalid reserved %v: %d in NUMA: %d with total memory size: %d", resourceName, + systemReserved, node.Id, totalHugepagesSize) + } + + allocatable := totalHugepagesSize - systemReserved + nodeState.TotalMemSize = totalHugepagesSize + nodeState.SystemReserved = systemReserved + nodeState.Allocatable = allocatable + nodeState.Free = allocatable + } + hugepageResourceMachineState[node.Id] = nodeState + } + + return hugepageResourceMachineState, nil +} + // GenerateMachineStateFromPodEntries returns NUMANodeResourcesMap based on -// machine info and reserved resources (along with existed pod entries) -func GenerateMachineStateFromPodEntries(machineInfo *info.MachineInfo, +// machine info, memory topology (for exact capacity logic), and reserved resources (along with existed pod entries) +func GenerateMachineStateFromPodEntries(machineInfo *info.MachineInfo, memoryTopology *machine.MemoryTopology, podResourceEntries PodResourceEntries, originResourcesMachineState NUMANodeResourcesMap, - reserved map[v1.ResourceName]map[int]uint64, + reserved map[v1.ResourceName]map[int]uint64, extraResourceNames []string, ) (NUMANodeResourcesMap, error) { if machineInfo == nil { return nil, fmt.Errorf("GenerateMachineStateFromPodEntries got nil machineInfo") @@ -138,16 +186,16 @@ func GenerateMachineStateFromPodEntries(machineInfo *info.MachineInfo, originResourcesMachineState = make(NUMANodeResourcesMap) } - // todo: currently only support memory, we will support huge page later. + resources := append(extraResourceNames, string(v1.ResourceMemory)) currentResourcesMachineState := make(NUMANodeResourcesMap) - for _, resourceName := range []v1.ResourceName{v1.ResourceMemory} { - machineState, err := GenerateResourceStateFromPodEntries(machineInfo, podResourceEntries[resourceName], - originResourcesMachineState[resourceName], reserved, resourceName) + for _, resourceName := range resources { + machineState, err := GenerateResourceStateFromPodEntries(machineInfo, memoryTopology, podResourceEntries[v1.ResourceName(resourceName)], + originResourcesMachineState[v1.ResourceName(resourceName)], reserved, v1.ResourceName(resourceName)) if err != nil { return nil, fmt.Errorf("GenerateResourceState for resource: %s failed with error: %v", resourceName, err) } - currentResourcesMachineState[resourceName] = machineState + currentResourcesMachineState[v1.ResourceName(resourceName)] = machineState } return currentResourcesMachineState, nil } @@ -214,30 +262,32 @@ func updateMachineStatePreOccPodEntries(currentMachineState, originMachineState } // GenerateResourceStateFromPodEntries returns NUMANodeMap for given resource based on -// machine info and reserved resources along with existed pod entries -func GenerateResourceStateFromPodEntries(machineInfo *info.MachineInfo, +// machine info, memory topology, and reserved resources along with existed pod entries +func GenerateResourceStateFromPodEntries(machineInfo *info.MachineInfo, memoryTopology *machine.MemoryTopology, podEntries PodEntries, originMachineState NUMANodeMap, reserved map[v1.ResourceName]map[int]uint64, resourceName v1.ResourceName, ) (NUMANodeMap, error) { - switch resourceName { - case v1.ResourceMemory: - currentMachineState, err := GenerateMemoryStateFromPodEntries(machineInfo, podEntries, reserved) + switch { + case resourceName == v1.ResourceMemory: + currentMachineState, err := GenerateMemoryStateFromPodEntries(machineInfo, memoryTopology, podEntries, reserved, resourceName) if err != nil { return nil, err } updateMachineStatePreOccPodEntries(currentMachineState, originMachineState) return currentMachineState, nil + case strings.HasPrefix(string(resourceName), v1.ResourceHugePagesPrefix): + return GenerateMemoryStateFromPodEntries(machineInfo, memoryTopology, podEntries, reserved, resourceName) default: return nil, fmt.Errorf("unsupported resource name: %s", resourceName) } } // GenerateMemoryStateFromPodEntries returns NUMANodeMap for memory based on -// machine info and reserved resources along with existed pod entries -func GenerateMemoryStateFromPodEntries(machineInfo *info.MachineInfo, - podEntries PodEntries, reserved map[v1.ResourceName]map[int]uint64, +// machine info, memory topology, and reserved resources along with existed pod entries +func GenerateMemoryStateFromPodEntries(machineInfo *info.MachineInfo, memoryTopology *machine.MemoryTopology, + podEntries PodEntries, reserved map[v1.ResourceName]map[int]uint64, resourceName v1.ResourceName, ) (NUMANodeMap, error) { - machineState, err := GenerateResourceState(machineInfo, reserved, v1.ResourceMemory) + machineState, err := GenerateResourceState(machineInfo, memoryTopology, reserved, resourceName) if err != nil { return nil, fmt.Errorf("GenerateResourceState failed with error: %v", err) } diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/util.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/util.go index 7713bd3d76..076ee677d5 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/util.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/util.go @@ -20,12 +20,17 @@ import ( "context" "fmt" "math" + "sort" + "strconv" + "strings" info "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" "github.com/kubewharf/katalyst-core/pkg/config" "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/util/general" @@ -48,10 +53,12 @@ func GetFullyDropCacheBytes(container *v1.Container) int64 { return fullyDropCacheBytes } -// GetReservedMemory is used to spread total reserved memories into per-numa level. +// getResourcesReservedMemory is used to spread total reserved memories for all memory resources into per-numa level. // this reserve resource calculation logic should be kept in qrm, if advisor wants // to get this info, it should depend on the returned checkpoint (through cpu-server) -func getReservedMemory(conf *config.Configuration, metaServer *metaserver.MetaServer, machineInfo *info.MachineInfo) (map[int]uint64, error) { +func getResourcesReservedMemory(conf *config.Configuration, metaServer *metaserver.MetaServer, machineInfo *info.MachineInfo, + resourceNames []string, +) (map[v1.ResourceName]map[int]uint64, error) { if conf == nil { return nil, fmt.Errorf("nil conf") } else if metaServer == nil { @@ -60,6 +67,40 @@ func getReservedMemory(conf *config.Configuration, metaServer *metaserver.MetaSe return nil, fmt.Errorf("nil machineInfo") } + resourceNames = append(resourceNames, string(v1.ResourceMemory)) + + resourcesReservedMemory := make(map[v1.ResourceName]map[int]uint64) + for _, resourceName := range resourceNames { + // ignore duplicated resource name + if _, ok := resourcesReservedMemory[v1.ResourceName(resourceName)]; ok { + continue + } + + var reservedMemory map[int]uint64 + var err error + switch { + case v1.ResourceName(resourceName) == v1.ResourceMemory: + reservedMemory, err = getReservedMemory(conf, metaServer, machineInfo) + if err != nil { + return nil, err + } + case strings.HasPrefix(resourceName, v1.ResourceHugePagesPrefix): + reservedMemory, err = getReservedHugePagesMemory(conf, metaServer, machineInfo, v1.ResourceName(resourceName)) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unknown memory resource name: %s", resourceName) + } + + resourcesReservedMemory[v1.ResourceName(resourceName)] = reservedMemory + } + + return resourcesReservedMemory, nil +} + +// getReservedMemory gets reserved v1.ResourceMemory per numa level. +func getReservedMemory(conf *config.Configuration, metaServer *metaserver.MetaServer, machineInfo *info.MachineInfo) (map[int]uint64, error) { numasCount := len(machineInfo.Topology) var reservedMemoryGB float64 @@ -96,6 +137,71 @@ func getReservedMemory(conf *config.Configuration, metaServer *metaserver.MetaSe return reservedMemory, nil } +// getReservedHugePagesMemory gets the reserved memory for the hugepages resource name per numa level. +func getReservedHugePagesMemory(conf *config.Configuration, metaServer *metaserver.MetaServer, machineInfo *info.MachineInfo, + resourceName v1.ResourceName, +) (map[int]uint64, error) { + numaNodes := make([]int, 0, len(machineInfo.Topology)) + for _, node := range machineInfo.Topology { + numaNodes = append(numaNodes, node.Id) + } + + if len(numaNodes) == 0 { + return nil, fmt.Errorf("[memory_plugin] machine topology numa node number is zero") + } + + nodeNumber := int64(len(numaNodes)) + sort.Ints(numaNodes) + + reservedMemory := make(map[int]uint64) + var reservedBytes int64 + if conf.UseKubeletReservedConfig { + klConfig, err := metaServer.GetKubeletConfig(context.TODO()) + if err != nil { + return nil, fmt.Errorf("failed to get kubelet config: %v", err) + } + + reservedMemoryInfo, err := utilkubeconfig.GetReservedMemoryInfo(klConfig) + if err == nil && len(reservedMemoryInfo) != 0 { + for _, numaNode := range numaNodes { + if reservedMem, ok := reservedMemoryInfo[int32(numaNode)]; ok { + quantity := reservedMem[resourceName] + reservedMemory[numaNode] = uint64(quantity.Value()) + } + } + general.Infof("get numa reserved %v:%v bytes from kubelet config", resourceName, reservedMemory) + return reservedMemory, nil + } + + reservedQuantity, found, err := utilkubeconfig.GetReservedQuantity(klConfig, string(resourceName)) + if err != nil { + return nil, fmt.Errorf("GetReservedQuantity failed with error: %v", err) + } + reservedBytes = reservedQuantity.Value() + general.Infof("get reserved %v:%d bytes from kubelet config, found: %v", resourceName, reservedBytes, found) + } else { + if len(conf.ReservedNumaMemory) > 0 { + for _, numaNode := range numaNodes { + if reservedMem, ok := conf.ReservedNumaMemory[int32(numaNode)]; ok { + quantity := reservedMem[resourceName] + reservedMemory[numaNode] = uint64(quantity.Value()) + } + } + general.Infof("get numa reserved %v:%v bytes from ReservedNumaMemory configuration", resourceName, reservedMemory) + return reservedMemory, nil + } + } + + integerPart := uint64(reservedBytes / nodeNumber) + remainder := uint64(reservedBytes % nodeNumber) + for _, node := range numaNodes { + reservedMemory[node] = integerPart + } + + reservedMemory[numaNodes[0]] = reservedMemory[numaNodes[0]] + remainder + return reservedMemory, nil +} + func applySidecarAllocationInfoFromMainContainer(sidecarAllocationInfo, mainAllocationInfo *state.AllocationInfo) bool { changed := false if !sidecarAllocationInfo.NumaAllocationResult.Equals(mainAllocationInfo.NumaAllocationResult) { @@ -113,3 +219,49 @@ func applySidecarAllocationInfoFromMainContainer(sidecarAllocationInfo, mainAllo return changed } + +// getMemoryTopologyAllocationsAnnotations gets the memory topology allocation in the form of annotations. +func getMemoryTopologyAllocationsAnnotations(allocationInfos map[v1.ResourceName]*state.AllocationInfo, + topologyAllocationAnnotationKey string, +) map[v1.ResourceName]map[string]string { + if allocationInfos == nil { + return nil + } + + resourceAnnos := make(map[v1.ResourceName]map[string]string) + for resourceName, ai := range allocationInfos { + if ai == nil { + continue + } + + topologyAllocation := make(v1alpha1.TopologyAllocation) + topologyAllocation[v1alpha1.TopologyTypeNuma] = make(map[string]v1alpha1.ZoneAllocation) + + // In the case where there are no topology aware allocations, we just report the numa nodes. + if ai.TopologyAwareAllocations == nil { + if ai.NumaAllocationResult.IsEmpty() { + continue + } + + numaNodes := ai.NumaAllocationResult.ToSliceNoSortInt() + for _, numaNode := range numaNodes { + topologyAllocation[v1alpha1.TopologyTypeNuma][strconv.Itoa(numaNode)] = v1alpha1.ZoneAllocation{} + } + } else { + for numaNode, allocated := range ai.TopologyAwareAllocations { + topologyAllocation[v1alpha1.TopologyTypeNuma][strconv.Itoa(numaNode)] = v1alpha1.ZoneAllocation{ + Allocated: map[v1.ResourceName]resource.Quantity{ + resourceName: *resource.NewQuantity(int64(allocated), resource.BinarySI), + }, + } + } + } + + resourceAnnos[resourceName] = util.MakeTopologyAllocationResourceAllocationAnnotations(topologyAllocation, topologyAllocationAnnotationKey) + } + + if len(resourceAnnos) == 0 { + return nil + } + return resourceAnnos +} diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/util_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/util_test.go index 673e3fe390..d85b55d4db 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/util_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/util_test.go @@ -17,10 +17,17 @@ limitations under the License. package dynamicpolicy import ( + "encoding/json" + "reflect" "testing" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/state" + coreconsts "github.com/kubewharf/katalyst-core/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/util/machine" ) func TestGetFullyDropCacheBytes(t *testing.T) { @@ -81,3 +88,203 @@ func TestGetFullyDropCacheBytes(t *testing.T) { }) } } + +// helper to extract topology allocation from annotations JSON +func parseTopologyAllocationFromAnno(t *testing.T, annos map[string]string) v1alpha1.TopologyAllocation { + t.Helper() + if annos == nil { + return nil + } + raw, ok := annos[coreconsts.QRMPodAnnotationTopologyAllocationKey] + if !ok { + return nil + } + var ta v1alpha1.TopologyAllocation + if err := json.Unmarshal([]byte(raw), &ta); err != nil { + t.Fatalf("failed to unmarshal topology allocation: %v", err) + } + return ta +} + +func TestGetMemoryTopologyAllocationsAnnotations(t *testing.T) { + t.Parallel() + + giB := func(n int) uint64 { return uint64(n) << 30 } + hugepages2Mi := v1.ResourceName("hugepages-2Mi") + + tests := []struct { + name string + ai map[v1.ResourceName]*state.AllocationInfo + wantNilAnno bool + wantTopology map[v1.ResourceName]v1alpha1.TopologyAllocation + }{ + { + name: "nil allocation info returns nil", + ai: nil, + wantNilAnno: true, + }, + { + name: "no topology allocations and empty NUMA result returns nil", + ai: map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: {}, + }, + wantNilAnno: true, + }, + { + name: "no topology allocations but with NUMA result lists zones only", + ai: map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: { + NumaAllocationResult: machine.NewCPUSet(0, 1), + }, + }, + wantTopology: map[v1.ResourceName]v1alpha1.TopologyAllocation{ + v1.ResourceMemory: { + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": {}, + "1": {}, + }, + }, + }, + }, + { + name: "with topology allocations includes allocated quantities", + ai: map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: { + TopologyAwareAllocations: map[int]uint64{ + 0: giB(1), + 1: giB(2), + }, + }, + }, + wantTopology: map[v1.ResourceName]v1alpha1.TopologyAllocation{ + v1.ResourceMemory: { + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + "1": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + }, + { + name: "with topology allocations includes allocated quantities (including zero)", + ai: map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: { + TopologyAwareAllocations: map[int]uint64{ + 0: giB(3), + 2: 0, + }, + }, + }, + wantTopology: map[v1.ResourceName]v1alpha1.TopologyAllocation{ + v1.ResourceMemory: { + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("3Gi"), + }, + }, + "2": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("0"), + }, + }, + }, + }, + }, + }, + { + name: "multiple resources include hugepages-2Mi allocations", + ai: map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: { + TopologyAwareAllocations: map[int]uint64{ + 0: giB(1), + }, + }, + hugepages2Mi: { + TopologyAwareAllocations: map[int]uint64{ + 1: giB(2), + }, + }, + }, + wantTopology: map[v1.ResourceName]v1alpha1.TopologyAllocation{ + v1.ResourceMemory: { + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }, + }, + hugepages2Mi: { + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "1": { + Allocated: map[v1.ResourceName]resource.Quantity{ + hugepages2Mi: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + }, + { + name: "mixed resources with and without topology allocations", + ai: map[v1.ResourceName]*state.AllocationInfo{ + v1.ResourceMemory: { + TopologyAwareAllocations: map[int]uint64{ + 0: giB(1), + }, + }, + hugepages2Mi: { + NumaAllocationResult: machine.NewCPUSet(1), + }, + }, + wantTopology: map[v1.ResourceName]v1alpha1.TopologyAllocation{ + v1.ResourceMemory: { + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "0": { + Allocated: map[v1.ResourceName]resource.Quantity{ + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }, + }, + hugepages2Mi: { + v1alpha1.TopologyTypeNuma: map[string]v1alpha1.ZoneAllocation{ + "1": {}, + }, + }, + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := getMemoryTopologyAllocationsAnnotations(tt.ai, coreconsts.QRMPodAnnotationTopologyAllocationKey) + if tt.wantNilAnno { + if got != nil { + t.Fatalf("expected nil annotations, got: %#v", got) + } + return + } + + for resourceName, want := range tt.wantTopology { + ta := parseTopologyAllocationFromAnno(t, got[resourceName]) + if !reflect.DeepEqual(ta, want) { + t.Fatalf("unexpected topology allocation for resource %q. got=%v, want=%v", resourceName, ta, want) + } + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/vpa_test.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/vpa_test.go index a9ab369bfc..a3383cea8e 100644 --- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/vpa_test.go +++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/vpa_test.go @@ -1257,6 +1257,7 @@ func TestRNBMemoryVPA(t *testing.T) { }, Annotations: map[string]string{ coreconsts.QRMResourceAnnotationKeyNUMABindResult: "0", + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{}}}`, }, }, }, @@ -1360,6 +1361,7 @@ func TestRNBMemoryVPA(t *testing.T) { }, Annotations: map[string]string{ coreconsts.QRMResourceAnnotationKeyNUMABindResult: "0", + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{}}}`, }, }, }, @@ -1496,6 +1498,9 @@ func TestRNBMemoryVPA(t *testing.T) { }, }, }, + Annotations: map[string]string{ + coreconsts.QRMPodAnnotationTopologyAllocationKey: `{"Numa":{"0":{},"1":{}}}`, + }, }, }, }, @@ -1564,8 +1569,8 @@ func TestRNBMemoryVPA(t *testing.T) { if tc.PodEntries != nil { podResourceEntries := map[v1.ResourceName]state.PodEntries{v1.ResourceMemory: tc.PodEntries} - machineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, podResourceEntries, nil, - dynamicPolicy.state.GetReservedMemory()) + machineState, err := state.GenerateMachineStateFromPodEntries(machineInfo, nil, podResourceEntries, nil, + dynamicPolicy.state.GetReservedMemory(), nil) as.Nil(err) dynamicPolicy.state.SetMachineState(machineState, true) diff --git a/pkg/agent/qrm-plugins/network/staticpolicy/policy.go b/pkg/agent/qrm-plugins/network/staticpolicy/policy.go index d66b946261..78b960e3b1 100644 --- a/pkg/agent/qrm-plugins/network/staticpolicy/policy.go +++ b/pkg/agent/qrm-plugins/network/staticpolicy/policy.go @@ -103,6 +103,7 @@ type StaticPolicy struct { netInterfaceNameResourceAllocationAnnotationKey string netClassIDResourceAllocationAnnotationKey string netBandwidthResourceAllocationAnnotationKey string + topologyAllocationAnnotationKey string podAnnotationKeptKeys []string podLabelKeptKeys []string @@ -146,20 +147,21 @@ func NewStaticPolicy(agentCtx *agent.GenericContext, conf *config.Configuration, } policyImplement := &StaticPolicy{ - nicManager: nicManager, - qosConfig: conf.QoSConfiguration, - qrmConfig: conf.QRMPluginsConfiguration, - emitter: wrappedEmitter, - metaServer: agentCtx.MetaServer, - agentCtx: agentCtx, - state: stateImpl, - residualHitMap: make(map[string]int64), - stopCh: make(chan struct{}), - name: fmt.Sprintf("%s_%s", agentName, NetworkResourcePluginPolicyNameStatic), - qosLevelToNetClassMap: make(map[string]uint32), - podAnnotationKeptKeys: conf.PodAnnotationKeptKeys, - podLabelKeptKeys: conf.PodLabelKeptKeys, - aliveCgroupID: make(map[uint64]time.Time), + nicManager: nicManager, + qosConfig: conf.QoSConfiguration, + qrmConfig: conf.QRMPluginsConfiguration, + emitter: wrappedEmitter, + metaServer: agentCtx.MetaServer, + agentCtx: agentCtx, + state: stateImpl, + residualHitMap: make(map[string]int64), + stopCh: make(chan struct{}), + name: fmt.Sprintf("%s_%s", agentName, NetworkResourcePluginPolicyNameStatic), + qosLevelToNetClassMap: make(map[string]uint32), + topologyAllocationAnnotationKey: conf.TopologyAllocationAnnotationKey, + podAnnotationKeptKeys: conf.PodAnnotationKeptKeys, + podLabelKeptKeys: conf.PodLabelKeptKeys, + aliveCgroupID: make(map[uint64]time.Time), } if common.CheckCgroup2UnifiedMode() { @@ -1179,7 +1181,7 @@ func (p *StaticPolicy) getResourceAllocationAnnotations( resourceAllocationAnnotations[p.netNSPathResourceAllocationAnnotationKey] = selectedNIC.NetNSInfo.GetNetNSAbsPath() } - return resourceAllocationAnnotations, nil + return getNetworkTopologyAllocationsAnnotations(allocation, resourceAllocationAnnotations, p.topologyAllocationAnnotationKey), nil } func (p *StaticPolicy) removePod(podUID string) error { diff --git a/pkg/agent/qrm-plugins/network/staticpolicy/policy_test.go b/pkg/agent/qrm-plugins/network/staticpolicy/policy_test.go index 0f162ba051..535beec2d5 100644 --- a/pkg/agent/qrm-plugins/network/staticpolicy/policy_test.go +++ b/pkg/agent/qrm-plugins/network/staticpolicy/policy_test.go @@ -48,6 +48,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/reactor" "github.com/kubewharf/katalyst-core/pkg/config" "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/statedirectory" + katalystconsts "github.com/kubewharf/katalyst-core/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/metaserver" metaserveragent "github.com/kubewharf/katalyst-core/pkg/metaserver/agent" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" @@ -214,6 +215,7 @@ func makeStaticPolicy(t *testing.T, hasNic bool) *StaticPolicy { netClassIDResourceAllocationAnnotationKey: testNetClassIDResourceAllocationAnnotationKey, netBandwidthResourceAllocationAnnotationKey: testNetBandwidthResourceAllocationAnnotationKey, nicAllocationReactor: reactor.DummyAllocationReactor{}, + topologyAllocationAnnotationKey: katalystconsts.QRMPodAnnotationTopologyAllocationKey, } } @@ -497,11 +499,12 @@ func TestAllocate(t *testing.T) { AllocatedQuantity: 5000, AllocationResult: machine.NewCPUSet(0, 1).String(), Annotations: map[string]string{ - testIPv4ResourceAllocationAnnotationKey: testEth0IPv4, - testIPv6ResourceAllocationAnnotationKey: "", - testNetInterfaceNameResourceAllocationAnnotationKey: testEth0Name, - testNetClassIDResourceAllocationAnnotationKey: testSharedNetClsId, - testNetBandwidthResourceAllocationAnnotationKey: "5000", + testIPv4ResourceAllocationAnnotationKey: testEth0IPv4, + testIPv6ResourceAllocationAnnotationKey: "", + testNetInterfaceNameResourceAllocationAnnotationKey: testEth0Name, + testNetClassIDResourceAllocationAnnotationKey: testSharedNetClsId, + testNetBandwidthResourceAllocationAnnotationKey: "5000", + katalystconsts.QRMPodAnnotationTopologyAllocationKey: `{"NIC":{"eth0":{}}}`, }, ResourceHints: &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ @@ -565,12 +568,13 @@ func TestAllocate(t *testing.T) { AllocatedQuantity: 5000, AllocationResult: machine.NewCPUSet(2, 3).String(), Annotations: map[string]string{ - testIPv4ResourceAllocationAnnotationKey: "", - testIPv6ResourceAllocationAnnotationKey: testEth2IPv6, - testNetNSPathResourceAllocationAnnotationKey: testEth2NSAbsolutePath, - testNetInterfaceNameResourceAllocationAnnotationKey: testEth2Name, - testNetClassIDResourceAllocationAnnotationKey: testReclaimedNetClsId, - testNetBandwidthResourceAllocationAnnotationKey: "5000", + testIPv4ResourceAllocationAnnotationKey: "", + testIPv6ResourceAllocationAnnotationKey: testEth2IPv6, + testNetNSPathResourceAllocationAnnotationKey: testEth2NSAbsolutePath, + testNetInterfaceNameResourceAllocationAnnotationKey: testEth2Name, + testNetClassIDResourceAllocationAnnotationKey: testReclaimedNetClsId, + testNetBandwidthResourceAllocationAnnotationKey: "5000", + katalystconsts.QRMPodAnnotationTopologyAllocationKey: `{"NIC":{"ns2-eth2":{}}}`, }, ResourceHints: &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ @@ -633,11 +637,12 @@ func TestAllocate(t *testing.T) { AllocatedQuantity: 5000, AllocationResult: machine.NewCPUSet(0, 1).String(), Annotations: map[string]string{ - testIPv4ResourceAllocationAnnotationKey: testEth0IPv4, - testIPv6ResourceAllocationAnnotationKey: "", - testNetInterfaceNameResourceAllocationAnnotationKey: testEth0Name, - testNetClassIDResourceAllocationAnnotationKey: fmt.Sprintf("%d", testDefaultDedicatedNetClsId), - testNetBandwidthResourceAllocationAnnotationKey: "5000", + testIPv4ResourceAllocationAnnotationKey: testEth0IPv4, + testIPv6ResourceAllocationAnnotationKey: "", + testNetInterfaceNameResourceAllocationAnnotationKey: testEth0Name, + testNetClassIDResourceAllocationAnnotationKey: fmt.Sprintf("%d", testDefaultDedicatedNetClsId), + testNetBandwidthResourceAllocationAnnotationKey: "5000", + katalystconsts.QRMPodAnnotationTopologyAllocationKey: `{"NIC":{"eth0":{}}}`, }, ResourceHints: &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ @@ -728,11 +733,12 @@ func TestAllocate(t *testing.T) { AllocatedQuantity: 20000, AllocationResult: machine.NewCPUSet(2, 3).String(), Annotations: map[string]string{ - testIPv4ResourceAllocationAnnotationKey: testEth2IPv6, - testIPv6ResourceAllocationAnnotationKey: "", - testNetInterfaceNameResourceAllocationAnnotationKey: testEth2Name, - testNetClassIDResourceAllocationAnnotationKey: fmt.Sprintf("%d", testDefaultDedicatedNetClsId), - testNetBandwidthResourceAllocationAnnotationKey: "20000", + testIPv4ResourceAllocationAnnotationKey: testEth2IPv6, + testIPv6ResourceAllocationAnnotationKey: "", + testNetInterfaceNameResourceAllocationAnnotationKey: testEth2Name, + testNetClassIDResourceAllocationAnnotationKey: fmt.Sprintf("%d", testDefaultDedicatedNetClsId), + testNetBandwidthResourceAllocationAnnotationKey: "20000", + katalystconsts.QRMPodAnnotationTopologyAllocationKey: `{"NIC":{"ns2-eth2":{}}}`, }, ResourceHints: &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ diff --git a/pkg/agent/qrm-plugins/network/staticpolicy/util.go b/pkg/agent/qrm-plugins/network/staticpolicy/util.go index a22a40fe90..4c0dd7c9c4 100644 --- a/pkg/agent/qrm-plugins/network/staticpolicy/util.go +++ b/pkg/agent/qrm-plugins/network/staticpolicy/util.go @@ -24,6 +24,7 @@ import ( pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent" @@ -238,7 +239,7 @@ func selectOneNIC(nics []machine.InterfaceInfo, policy NICSelectionPoligy) machi } // packAllocationResponse fills pluginapi.ResourceAllocationResponse with information from AllocationInfo and pluginapi.ResourceRequest -func packAllocationResponse(req *pluginapi.ResourceRequest, allocationInfo *state.AllocationInfo, resourceAllocationAnnotations map[string]string) (*pluginapi.ResourceAllocationResponse, error) { +func packAllocationResponse(req *pluginapi.ResourceRequest, allocationInfo *state.AllocationInfo, resourceAllocationAnnotations ...map[string]string) (*pluginapi.ResourceAllocationResponse, error) { if allocationInfo == nil { return nil, fmt.Errorf("packAllocationResponse got nil allocationInfo") } else if req == nil { @@ -262,7 +263,7 @@ func packAllocationResponse(req *pluginapi.ResourceRequest, allocationInfo *stat IsScalarResource: true, // to avoid re-allocating AllocatedQuantity: float64(allocationInfo.Egress), AllocationResult: allocationInfo.NumaNodes.String(), - Annotations: resourceAllocationAnnotations, + Annotations: general.MergeAnnotations(resourceAllocationAnnotations...), ResourceHints: &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ req.Hint, @@ -276,6 +277,22 @@ func packAllocationResponse(req *pluginapi.ResourceRequest, allocationInfo *stat }, nil } +// getNetworkTopologyAllocationsAnnotations gets the network topology allocation and merges it with current annotations. +func getNetworkTopologyAllocationsAnnotations(allocationInfo *state.AllocationInfo, currentAnnotations map[string]string, + topologyAllocationAnnotationKey string, +) map[string]string { + if allocationInfo == nil { + return currentAnnotations + } + + topologyAllocation := make(v1alpha1.TopologyAllocation) + topologyAllocation[v1alpha1.TopologyTypeNIC] = make(map[string]v1alpha1.ZoneAllocation) + topologyAllocation[v1alpha1.TopologyTypeNIC][allocationInfo.Identifier] = v1alpha1.ZoneAllocation{} + + newAnnotations := qrmutil.MakeTopologyAllocationResourceAllocationAnnotations(topologyAllocation, topologyAllocationAnnotationKey) + return general.MergeAnnotations(newAnnotations, currentAnnotations) +} + // getReservedBandwidth is used to spread total reserved bandwidth into per-nic level. func getReservedBandwidth(nics []machine.InterfaceInfo, reservation uint32, policy ReservationPolicy) (map[string]uint32, error) { nicCount := len(nics) diff --git a/pkg/agent/qrm-plugins/network/staticpolicy/util_test.go b/pkg/agent/qrm-plugins/network/staticpolicy/util_test.go new file mode 100644 index 0000000000..3d7ffdd900 --- /dev/null +++ b/pkg/agent/qrm-plugins/network/staticpolicy/util_test.go @@ -0,0 +1,137 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package staticpolicy + +import ( + "encoding/json" + "reflect" + "testing" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + + v1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network/state" + katalystconsts "github.com/kubewharf/katalyst-core/pkg/consts" +) + +// helper to extract topology allocation from annotations JSON +func parseNetworkTopologyFromAnno(t *testing.T, annos map[string]string) v1alpha1.TopologyAllocation { + t.Helper() + if annos == nil { + return nil + } + raw, ok := annos[katalystconsts.QRMPodAnnotationTopologyAllocationKey] + if !ok { + return nil + } + var ta v1alpha1.TopologyAllocation + if err := json.Unmarshal([]byte(raw), &ta); err != nil { + t.Fatalf("failed to unmarshal topology allocation: %v", err) + } + return ta +} + +func TestGetNetworkTopologyAllocationsAnnotations(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + ai *state.AllocationInfo + currentAnnotations map[string]string + wantAnnotations map[string]string + wantTopology v1alpha1.TopologyAllocation + }{ + { + name: "nil allocation returns current annotations", + ai: nil, + currentAnnotations: map[string]string{"foo": "bar"}, + wantAnnotations: map[string]string{"foo": "bar"}, + }, + { + name: "empty current annotations adds NIC zone", + ai: &state.AllocationInfo{ + Identifier: "eth0", + }, + currentAnnotations: nil, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNIC: map[string]v1alpha1.ZoneAllocation{ + "eth0": {}, + }, + }, + }, + { + name: "merges with extra keys in current annotations", + ai: &state.AllocationInfo{ + Identifier: "netns-eth1", + }, + currentAnnotations: map[string]string{"some": "value"}, + wantAnnotations: map[string]string{"some": "value"}, + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNIC: map[string]v1alpha1.ZoneAllocation{ + "netns-eth1": {}, + }, + }, + }, + { + name: "keeps existing topology annotation if already present in current", + ai: &state.AllocationInfo{Identifier: "eth2"}, + currentAnnotations: func() map[string]string { + // prepare an existing different topology annotation; current should take precedence + ta := v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNIC: map[string]v1alpha1.ZoneAllocation{ + "preexist": {Allocated: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("1")}}, + }, + } + b, _ := json.Marshal(ta) + return map[string]string{katalystconsts.QRMPodAnnotationTopologyAllocationKey: string(b)} + }(), + // wantTopology reflects the preexisting one since merge favors current annotations on key collision + wantTopology: v1alpha1.TopologyAllocation{ + v1alpha1.TopologyTypeNIC: map[string]v1alpha1.ZoneAllocation{ + "preexist": {Allocated: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("1")}}, + }, + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := getNetworkTopologyAllocationsAnnotations(tt.ai, tt.currentAnnotations, katalystconsts.QRMPodAnnotationTopologyAllocationKey) + + // If an explicit wantAnnotations map is provided, ensure all its entries exist in result + if tt.wantAnnotations != nil { + for k, v := range tt.wantAnnotations { + if gv, ok := got[k]; !ok || gv != v { + t.Fatalf("expected annotation %q=%q present, got: %v", k, v, got) + } + } + } + + // Validate topology annotation when wantTopology is specified + if tt.wantTopology != nil { + ta := parseNetworkTopologyFromAnno(t, got) + if !reflect.DeepEqual(ta, tt.wantTopology) { + t.Fatalf("unexpected topology allocation. got=%v, want=%v", ta, tt.wantTopology) + } + } + }) + } +} diff --git a/pkg/agent/qrm-plugins/util/consts.go b/pkg/agent/qrm-plugins/util/consts.go index 4b49145699..5766d2efde 100644 --- a/pkg/agent/qrm-plugins/util/consts.go +++ b/pkg/agent/qrm-plugins/util/consts.go @@ -32,6 +32,9 @@ const ( MetricNameGetAccompanyResourceTopologyHintsFailed = "get_accompany_resource_topology_hints_failed" MetricNameAllocateAccompanyResourceFailed = "allocate_accompany_resource_failed" MetricNameReleaseAccompanyResourceFailed = "release_accompany_resource_failed" + MetricNameSyncResourcePackagePinnedCPUSetFailed = "sync_resource_package_pinned_cpuset_failed" + MetricNameResourcePackagePinnedCPUSetSize = "resource_package_pinned_cpuset_size" + MetricNameSyncNumaResourcePackageFailed = "sync_numa_resource_package_failed" // metrics for cpu plugin MetricNamePoolSize = "pool_size" diff --git a/pkg/agent/qrm-plugins/util/util.go b/pkg/agent/qrm-plugins/util/util.go index 412e418583..4416047fba 100644 --- a/pkg/agent/qrm-plugins/util/util.go +++ b/pkg/agent/qrm-plugins/util/util.go @@ -26,10 +26,12 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/config/generic" "github.com/kubewharf/katalyst-core/pkg/util/asyncworker" @@ -45,15 +47,17 @@ func GetQuantityFromResourceReq(req *pluginapi.ResourceRequest) (int, float64, e return 0, 0, fmt.Errorf("invalid req.ResourceRequests length: %d", len(req.ResourceRequests)) } - return GetQuantityFromResourceRequests(req.ResourceRequests, req.ResourceName, IsQuantityFromQRMDeclaration(req.Annotations)) + return GetQuantityFromResourceRequests(req.ResourceRequests, req.ResourceName, req.Annotations) } -func GetQuantityFromResourceRequests(resourceRequests map[string]float64, resourceName string, isQuantityFromQRMDeclaration bool) (int, float64, error) { +func GetQuantityFromResourceRequests(resourceRequests map[string]float64, resourceName string, reqAnnotations map[string]string) (int, float64, error) { quantity, ok := resourceRequests[resourceName] if !ok { return 0, 0, errors.NewNotFound(schema.GroupResource{}, resourceName) } + isQuantityFromQRMDeclaration := IsQuantityFromQRMDeclaration(reqAnnotations) + switch resourceName { case string(apiconsts.ReclaimedResourceMilliCPU): return general.Max(int(math.Ceil(quantity/1000.0)), 0), quantity / 1000.0, nil @@ -73,6 +77,29 @@ func IsQuantityFromQRMDeclaration(podAnnotations map[string]string) bool { return podAnnotations[PodAnnotationQuantityFromQRMDeclarationKey] == PodAnnotationQuantityFromQRMDeclarationTrue } +// GetQuantityMapFromResourceReq parses all resources quantity into maps of resources to value, +// since pods with reclaimed_cores and un-reclaimed_cores have different +// representations, we may to adapt to both cases. +func GetQuantityMapFromResourceReq(req *pluginapi.ResourceRequest) (map[v1.ResourceName]int, map[v1.ResourceName]float64, error) { + intQuantity := make(map[v1.ResourceName]int) + floatQuantity := make(map[v1.ResourceName]float64) + + resourceRequests := req.ResourceRequests + + for key := range resourceRequests { + resName := v1.ResourceName(key) + resInt, resFloat, err := GetQuantityFromResourceRequests(resourceRequests, key, req.Annotations) + if err != nil { + return nil, nil, fmt.Errorf("error getting quantity from resource requests for resource %s: %v", key, err) + } + + intQuantity[resName] = resInt + floatQuantity[resName] = resFloat + } + + return intQuantity, floatQuantity, nil +} + // IsDebugPod returns true if the pod annotations show up any configurable debug key func IsDebugPod(podAnnotations map[string]string, podDebugAnnoKeys []string) bool { for _, debugKey := range podDebugAnnoKeys { @@ -276,9 +303,10 @@ func GetNUMANodesCountToFitMemoryReq(memoryReq, bytesPerNUMA uint64, numaCount i }, } */ -func GetHintsFromExtraStateFile(podName, resourceName, extraHintsStateFileAbsPath string, - availableNUMAs machine.CPUSet, +func GetHintsFromExtraStateFile(podName, extraHintsStateFileAbsPath string, + availableNUMAs machine.CPUSet, requestedResources []v1.ResourceName, ) (map[string]*pluginapi.ListOfTopologyHints, error) { + hints := make(map[string]*pluginapi.ListOfTopologyHints) if extraHintsStateFileAbsPath == "" { return nil, nil } @@ -319,21 +347,23 @@ func GetHintsFromExtraStateFile(podName, resourceName, extraHintsStateFileAbsPat } allocatedNumaNodes := numaSet.ToSliceUInt64() - klog.InfoS("[GetHintsFromExtraStateFile] get hints from extra state file", - "podName", podName, - "resourceName", resourceName, - "hint", allocatedNumaNodes) - hints := map[string]*pluginapi.ListOfTopologyHints{ - resourceName: { + for _, resourceName := range requestedResources { + klog.InfoS("[GetHintsFromExtraStateFile] get hints from extra state file", + "podName", podName, + "resourceName", resourceName, + "hint", allocatedNumaNodes) + + hints[string(resourceName)] = &pluginapi.ListOfTopologyHints{ Hints: []*pluginapi.TopologyHint{ { Nodes: allocatedNumaNodes, Preferred: true, }, }, - }, + } } + return hints, nil } @@ -353,6 +383,60 @@ func PodInplaceUpdateResizing(req *pluginapi.ResourceRequest) bool { return req.Annotations != nil && req.Annotations[apiconsts.PodAnnotationInplaceUpdateResizingKey] == "true" } +// GetPodAggregatedRequestResourceMap returns both integer and float64 quantities for all resources in the pod request. +// If the pod has aggregated resource annotations, those values are used; otherwise, it falls back to the original +// request quantities. Returns an error if any calculation fails. +func GetPodAggregatedRequestResourceMap(req *pluginapi.ResourceRequest) (map[v1.ResourceName]int, map[v1.ResourceName]float64, error) { + annotations := req.Annotations + if annotations == nil { + return GetQuantityMapFromResourceReq(req) + } + + value, ok := annotations[apiconsts.PodAnnotationAggregatedRequestsKey] + if !ok { + return GetQuantityMapFromResourceReq(req) + } + + var resourceList v1.ResourceList + if err := json.Unmarshal([]byte(value), &resourceList); err != nil { + return GetQuantityMapFromResourceReq(req) + } + + intQuantities := make(map[v1.ResourceName]int) + floatQuantities := make(map[v1.ResourceName]float64) + resourceRequests := req.ResourceRequests + + for key := range resourceRequests { + resName := v1.ResourceName(key) + + if _, ok = resourceList[resName]; !ok { + // for resources that do not appear in the aggregated resources map, simply calculate quantity from request + intQuantity, floatQuantity, err := GetQuantityFromResourceRequests(resourceRequests, key, req.Annotations) + if err != nil { + return nil, nil, fmt.Errorf("get resource quantity for resource %s failed with error: %v", resName, err) + } + + intQuantities[resName] = intQuantity + floatQuantities[resName] = floatQuantity + } else { + // otherwise, calculate the aggregated quantity of the resource + intQuantity, floatQuantity, err := calculateAggregatedResource(resName, resourceList) + if err != nil { + return nil, nil, fmt.Errorf("calculate aggregated resource quantity for resource %s failed with error: %v", + resName, err) + } + + intQuantities[resName] = intQuantity + floatQuantities[resName] = floatQuantity + } + } + + return intQuantities, floatQuantities, nil +} + +// GetPodAggregatedRequestResource returns both integer and float64 quantities for the main resource in the pod request. +// If the pod has aggregated resource annotations, those values are used; otherwise, it falls back to the original +// request quantities. Returns an error if any calculation fails. func GetPodAggregatedRequestResource(req *pluginapi.ResourceRequest) (int, float64, error) { annotations := req.Annotations if annotations == nil { @@ -367,16 +451,36 @@ func GetPodAggregatedRequestResource(req *pluginapi.ResourceRequest) (int, float return GetQuantityFromResourceReq(req) } - switch req.ResourceName { - case string(v1.ResourceCPU): + return calculateAggregatedResource(v1.ResourceName(req.ResourceName), resourceList) +} + +func calculateAggregatedResource(resourceName v1.ResourceName, resourceList v1.ResourceList) (int, float64, error) { + switch resourceName { + case v1.ResourceCPU: podAggregatedReqFloat64 := float64(resourceList.Cpu().MilliValue()) / 1000 return int(math.Ceil(podAggregatedReqFloat64)), podAggregatedReqFloat64, nil - case string(v1.ResourceMemory): - podAggregatedReqFloat64 := float64(resourceList.Memory().MilliValue()) / 1000 - return int(math.Ceil(podAggregatedReqFloat64)), podAggregatedReqFloat64, nil default: - return 0, 0, fmt.Errorf("not support resource name: %s", req.ResourceName) + podAggregatedReqFloat64 := float64(resourceList.Name(resourceName, resource.BinarySI).Value()) + return int(podAggregatedReqFloat64), podAggregatedReqFloat64, nil + } +} + +// MakeTopologyAllocationResourceAllocationAnnotations converts the topology allocation to annotations. +func MakeTopologyAllocationResourceAllocationAnnotations(topologyAllocation v1alpha1.TopologyAllocation, + topologyAllocationAnnotationKey string, +) map[string]string { + annotations := make(map[string]string) + if topologyAllocation == nil { + return annotations } + + b, err := json.Marshal(topologyAllocation) + if err != nil { + general.Errorf("Error marshaling topology allocation: %v", err) + } + + annotations[topologyAllocationAnnotationKey] = string(b) + return annotations } // CreateEmptyAllocationResponse creates an empty allocation response diff --git a/pkg/agent/qrm-plugins/util/util_test.go b/pkg/agent/qrm-plugins/util/util_test.go index fe8f85a0f5..60516ef1d4 100644 --- a/pkg/agent/qrm-plugins/util/util_test.go +++ b/pkg/agent/qrm-plugins/util/util_test.go @@ -17,12 +17,15 @@ limitations under the License. package util import ( + "encoding/json" + "reflect" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime/schema" pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" @@ -557,3 +560,182 @@ func TestCeilEdgeCases(t *testing.T) { }) } } + +func TestGetPodAggregatedRequestResourceMap(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + resourceRequest *pluginapi.ResourceRequest + expectedInt map[v1.ResourceName]int + expectedFloat map[v1.ResourceName]float64 + expectedErr bool + }{ + { + name: "no annotations", + resourceRequest: &pluginapi.ResourceRequest{ + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 1, + string(v1.ResourceMemory): 2 * 1024 * 1024 * 1024, + }, + Annotations: nil, + }, + expectedInt: map[v1.ResourceName]int{ + v1.ResourceCPU: 1, + v1.ResourceMemory: 2 * 1024 * 1024 * 1024, // 2Gi + }, + expectedFloat: map[v1.ResourceName]float64{ + v1.ResourceCPU: 1, + v1.ResourceMemory: 2 * 1024 * 1024 * 1024.0, + }, + expectedErr: false, + }, + { + name: "annotations without aggregated key", + resourceRequest: &pluginapi.ResourceRequest{ + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 1, + string(v1.ResourceMemory): 1 * 1024 * 1024 * 1024, + }, + Annotations: map[string]string{ + "some-other-annotation": "value", + }, + }, + expectedInt: map[v1.ResourceName]int{ + v1.ResourceCPU: 1, + v1.ResourceMemory: 1 * 1024 * 1024 * 1024, // 1Gi + }, + expectedFloat: map[v1.ResourceName]float64{ + v1.ResourceCPU: 1.0, + v1.ResourceMemory: 1 * 1024 * 1024 * 1024.0, + }, + expectedErr: false, + }, + { + name: "invalid aggregated json", + resourceRequest: &pluginapi.ResourceRequest{ + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 2, + }, + Annotations: map[string]string{ + consts.PodAnnotationAggregatedRequestsKey: "{invalid json", + }, + }, + expectedInt: map[v1.ResourceName]int{ + v1.ResourceCPU: 2, + }, + expectedFloat: map[v1.ResourceName]float64{ + v1.ResourceCPU: 2.0, + }, + expectedErr: false, // Should fall back to GetQuantityMapFromResourceReq + }, + { + name: "valid aggregated json - mixed resources", + resourceRequest: &pluginapi.ResourceRequest{ + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 1, // Will be aggregated + string(v1.ResourceMemory): 2 * 1024 * 1024 * 1024, // Will be from original request + "example.com/gpu": 1, // Will be aggregated + }, + Annotations: map[string]string{ + consts.PodAnnotationAggregatedRequestsKey: func() string { + rl := v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(2, resource.DecimalSI), + "example.com/gpu": *resource.NewQuantity(2, resource.DecimalSI), + } + b, _ := json.Marshal(rl) + return string(b) + }(), + }, + }, + expectedInt: map[v1.ResourceName]int{ + v1.ResourceCPU: 2, + v1.ResourceMemory: 2 * 1024 * 1024 * 1024, + "example.com/gpu": 2, + }, + expectedFloat: map[v1.ResourceName]float64{ + v1.ResourceCPU: 2.0, + v1.ResourceMemory: 2 * 1024 * 1024 * 1024.0, + "example.com/gpu": 2.0, + }, + expectedErr: false, + }, + { + name: "valid aggregated json - all resources aggregated", + resourceRequest: &pluginapi.ResourceRequest{ + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 1, + string(v1.ResourceMemory): 2 * 1024 * 1024 * 1024, + }, + Annotations: map[string]string{ + consts.PodAnnotationAggregatedRequestsKey: func() string { + rl := v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(2, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(3*1024*1024*1024, resource.DecimalSI), + } + b, _ := json.Marshal(rl) + return string(b) + }(), + }, + }, + expectedInt: map[v1.ResourceName]int{ + v1.ResourceCPU: 2, + v1.ResourceMemory: 3 * 1024 * 1024 * 1024, + }, + expectedFloat: map[v1.ResourceName]float64{ + v1.ResourceCPU: 2.0, + v1.ResourceMemory: 3 * 1024 * 1024 * 1024.0, + }, + expectedErr: false, + }, + { + name: "valid aggregated json - no resources aggregated (empty aggregated list)", + resourceRequest: &pluginapi.ResourceRequest{ + ResourceRequests: map[string]float64{ + string(v1.ResourceCPU): 1, + string(v1.ResourceMemory): 1 * 1024 * 1024 * 1024, + }, + Annotations: map[string]string{ + consts.PodAnnotationAggregatedRequestsKey: func() string { + rl := v1.ResourceList{} // Empty aggregated list + b, _ := json.Marshal(rl) + return string(b) + }(), + }, + }, + expectedInt: map[v1.ResourceName]int{ + v1.ResourceCPU: 1, + v1.ResourceMemory: 1 * 1024 * 1024 * 1024, + }, + expectedFloat: map[v1.ResourceName]float64{ + v1.ResourceCPU: 1.0, + v1.ResourceMemory: 1 * 1024 * 1024 * 1024.0, + }, + expectedErr: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + // Note: This test assumes that GetQuantityFromResourceReq, GetQuantityMapFromResourceReq, + // and calculateAggregatedResource (unexported functions in the same package) + // behave as expected and that GetQuantityFromResourceReq is intended to take + // the current resource name as an implicit argument or processes it correctly + // within the loop context. + gotInt, gotFloat, err := GetPodAggregatedRequestResourceMap(tt.resourceRequest) + + if (err != nil) != tt.expectedErr { + t.Errorf("GetPodAggregatedRequestResourceMap() error = %v, expectedErr %v", err, tt.expectedErr) + return + } + if !reflect.DeepEqual(gotInt, tt.expectedInt) { + t.Errorf("GetPodAggregatedRequestResourceMap() gotInt = %v, want %v", gotInt, tt.expectedInt) + } + if !reflect.DeepEqual(gotFloat, tt.expectedFloat) { + t.Errorf("GetPodAggregatedRequestResourceMap() gotFloat = %v, want %v", gotFloat, tt.expectedFloat) + } + }) + } +} diff --git a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go index 17da950219..b30952a83c 100644 --- a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go +++ b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go @@ -45,6 +45,7 @@ import ( "github.com/kubewharf/katalyst-api/pkg/utils" "github.com/kubewharf/katalyst-core/pkg/config/agent" "github.com/kubewharf/katalyst-core/pkg/config/generic" + "github.com/kubewharf/katalyst-core/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper" metaserverpod "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" @@ -753,7 +754,7 @@ func (p *topologyAdapterImpl) getZoneAttributes(allocatableResources *podresv1.A zoneAttributes[cacheGroupZoneNode] = util.ZoneAttributes{ nodev1alpha1.Attribute{ - Name: "cpu_lists", + Name: consts.ZoneAttributeNameCPULists, Value: machine.NewCPUSet(cpus.List()...).String(), }, } @@ -796,7 +797,7 @@ func (p *topologyAdapterImpl) generateNumaNodeThreadTopologyAttr(node util.ZoneN threadTopology = strings.TrimSuffix(threadTopology, ",") attrs = append(attrs, nodev1alpha1.Attribute{ - Name: "thread_topology_info", + Name: consts.ZoneAttributeNameThreadTopologyInfo, Value: threadTopology, }) @@ -819,7 +820,7 @@ func (p *topologyAdapterImpl) generateNumaNodeResourceReservedAttr(node util.Zon } attrs = append(attrs, nodev1alpha1.Attribute{ - Name: "reserved_cpu_list", + Name: consts.ZoneAttributeNameReservedCPUList, Value: numaReserved.String(), }) @@ -844,7 +845,7 @@ func (p *topologyAdapterImpl) generateNodeDistanceAttr(node util.ZoneNode) []nod } attrs = append(attrs, nodev1alpha1.Attribute{ - Name: "numa_distance", + Name: consts.ZoneAttributeNameNUMADistance, Value: general.IntSliceToString(distances), }) diff --git a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter_test.go b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter_test.go index 1191f5d050..f5776369eb 100644 --- a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter_test.go +++ b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter_test.go @@ -1812,11 +1812,11 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones_ReportRDMATopol Name: "0", Attributes: []nodev1alpha1.Attribute{ { - Name: "numa_distance", + Name: pkgconsts.ZoneAttributeNameNUMADistance, Value: "", }, { - Name: "thread_topology_info", + Name: pkgconsts.ZoneAttributeNameThreadTopologyInfo, Value: "0:3,1:2,2:1,3:0", }, }, @@ -1874,11 +1874,11 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones_ReportRDMATopol Name: "1", Attributes: []nodev1alpha1.Attribute{ { - Name: "numa_distance", + Name: pkgconsts.ZoneAttributeNameNUMADistance, Value: "", }, { - Name: "thread_topology_info", + Name: pkgconsts.ZoneAttributeNameThreadTopologyInfo, Value: "4:7,5:6,6:5,7:4", }, }, @@ -2341,11 +2341,11 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { Name: "0", Attributes: []nodev1alpha1.Attribute{ { - Name: "numa_distance", + Name: pkgconsts.ZoneAttributeNameNUMADistance, Value: "10,20", }, { - Name: "thread_topology_info", + Name: pkgconsts.ZoneAttributeNameThreadTopologyInfo, Value: "0:3,1:2,2:1,3:0", }, }, @@ -2401,7 +2401,7 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { }, Attributes: []nodev1alpha1.Attribute{ { - Name: "cpu_lists", + Name: pkgconsts.ZoneAttributeNameCPULists, Value: "0,4,8,12,16,20,24,28", }, }, @@ -2419,7 +2419,7 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { }, Attributes: []nodev1alpha1.Attribute{ { - Name: "cpu_lists", + Name: pkgconsts.ZoneAttributeNameCPULists, Value: "2,6,10,14,18,22,26,30", }, }, @@ -2457,11 +2457,11 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { Name: "1", Attributes: []nodev1alpha1.Attribute{ { - Name: "numa_distance", + Name: pkgconsts.ZoneAttributeNameNUMADistance, Value: "20,10", }, { - Name: "thread_topology_info", + Name: pkgconsts.ZoneAttributeNameThreadTopologyInfo, Value: "4:7,5:6,6:5,7:4", }, }, @@ -2514,7 +2514,7 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { }, Attributes: []nodev1alpha1.Attribute{ { - Name: "cpu_lists", + Name: pkgconsts.ZoneAttributeNameCPULists, Value: "1,5,9,13,17,21,25,29", }, }, @@ -2532,7 +2532,7 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { }, Attributes: []nodev1alpha1.Attribute{ { - Name: "cpu_lists", + Name: pkgconsts.ZoneAttributeNameCPULists, Value: "3,7,11,15,19,23,27,31", }, }, @@ -2909,11 +2909,11 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { Name: "0", Attributes: []nodev1alpha1.Attribute{ { - Name: "numa_distance", + Name: pkgconsts.ZoneAttributeNameNUMADistance, Value: "", }, { - Name: "thread_topology_info", + Name: pkgconsts.ZoneAttributeNameThreadTopologyInfo, Value: "0:3,1:2,2:1,3:0", }, }, @@ -2955,11 +2955,11 @@ func Test_podResourcesServerTopologyAdapterImpl_GetTopologyZones(t *testing.T) { Name: "1", Attributes: []nodev1alpha1.Attribute{ { - Name: "numa_distance", + Name: pkgconsts.ZoneAttributeNameNUMADistance, Value: "", }, { - Name: "thread_topology_info", + Name: pkgconsts.ZoneAttributeNameThreadTopologyInfo, Value: "4:7,5:6,6:5,7:4", }, }, diff --git a/pkg/agent/sysadvisor/metacache/metacache.go b/pkg/agent/sysadvisor/metacache/metacache.go index 42683ea87b..e1b7fc905c 100644 --- a/pkg/agent/sysadvisor/metacache/metacache.go +++ b/pkg/agent/sysadvisor/metacache/metacache.go @@ -81,6 +81,10 @@ type MetaReader interface { // GetSupportedWantedFeatureGates gets supported and wanted FeatureGates GetSupportedWantedFeatureGates() (map[string]*advisorsvc.FeatureGate, error) + // GetResourcePackageConfig returns a deep-copied snapshot of resource package configurations + // organized by NUMA node. + GetResourcePackageConfig() types.ResourcePackageConfig + metrictypes.MetricsReader } @@ -128,6 +132,9 @@ type MetaWriter interface { // SetSupportedWantedFeatureGates sets supported and wanted FeatureGates SetSupportedWantedFeatureGates(featureGates map[string]*advisorsvc.FeatureGate) error + // SetResourcePackageConfig overwrites resource package configurations organized by NUMA node. + // The input will be deep-copied before being stored. + SetResourcePackageConfig(config types.ResourcePackageConfig) error sync.Locker } @@ -173,6 +180,11 @@ type MetaCacheImp struct { featureGates map[string]*advisorsvc.FeatureGate featureGatesMutex sync.RWMutex + // resourcePackageConfig stores resource package configurations organized by NUMA node, + // and is updated by sysadvisor servers based on qrm-plugin inputs. + resourcePackageConfig types.ResourcePackageConfig + resourcePackageConfigMutex sync.RWMutex + containerCreateTimestamp map[string]int64 // Lock for the entire MetaCache. Useful when you want to make multiple writes atomically. @@ -209,6 +221,7 @@ func NewMetaCacheImp(conf *config.Configuration, emitterPool metricspool.Metrics modelToResult: make(map[string]interface{}), modelInput: make(map[string]map[string]interface{}), featureGates: make(map[string]*advisorsvc.FeatureGate), + resourcePackageConfig: make(types.ResourcePackageConfig), containerCreateTimestamp: make(map[string]int64), } @@ -582,6 +595,23 @@ func (mc *MetaCacheImp) SetSupportedWantedFeatureGates(featureGates map[string]* return nil } +// GetResourcePackageConfig returns a deep-copied snapshot of resource package configurations. +func (mc *MetaCacheImp) GetResourcePackageConfig() types.ResourcePackageConfig { + mc.resourcePackageConfigMutex.RLock() + defer mc.resourcePackageConfigMutex.RUnlock() + + return mc.resourcePackageConfig.Clone() +} + +// SetResourcePackageConfig overwrites resource package configurations with deep copy. +func (mc *MetaCacheImp) SetResourcePackageConfig(config types.ResourcePackageConfig) error { + mc.resourcePackageConfigMutex.Lock() + defer mc.resourcePackageConfigMutex.Unlock() + + mc.resourcePackageConfig = config.Clone() + return nil +} + func (mc *MetaCacheImp) SetHeadroomEntries(resourceName string, headroomInfo *types.HeadroomInfo) error { mc.headroomMutex.Lock() defer mc.headroomMutex.Unlock() diff --git a/pkg/agent/sysadvisor/metacache/metacache_test.go b/pkg/agent/sysadvisor/metacache/metacache_test.go index a6a83739aa..4ad2b85807 100644 --- a/pkg/agent/sysadvisor/metacache/metacache_test.go +++ b/pkg/agent/sysadvisor/metacache/metacache_test.go @@ -20,6 +20,7 @@ import ( "fmt" "os" "reflect" + "sync" "testing" "time" @@ -30,6 +31,7 @@ import ( borweinutils "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference/models/borwein/utils" "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types" "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/machine" ) func TestMetaCacheImp_GetFilteredInferenceResult(t *testing.T) { @@ -297,3 +299,61 @@ func TestRangeAndDeleteContainerWithSafeTime(t *testing.T) { require.Equal(t, 0, len(mc.podEntries), "failed to delete container before safe time") require.Equal(t, 0, len(mc.containerCreateTimestamp), "failed to delete container create timestamp before safe time") } + +func TestMetaCacheImp_ResourcePackageConfig_GetSetClone(t *testing.T) { + t.Parallel() + + mc := &MetaCacheImp{ + resourcePackageConfig: make(types.ResourcePackageConfig), + } + + original := types.ResourcePackageConfig{ + 0: map[string]*types.ResourcePackageState{ + "pkgA": { + PinnedCPUSet: machine.NewCPUSet(0, 1, 2), + }, + }, + } + + require.NoError(t, mc.SetResourcePackageConfig(original)) + + original[0]["pkgA"].PinnedCPUSet = machine.NewCPUSet(99) + stored := mc.GetResourcePackageConfig() + require.Equal(t, 3, stored[0]["pkgA"].PinnedCPUSet.Size()) + + stored[0]["pkgA"].PinnedCPUSet = machine.NewCPUSet(100) + stored2 := mc.GetResourcePackageConfig() + require.Equal(t, 3, stored2[0]["pkgA"].PinnedCPUSet.Size()) +} + +func TestMetaCacheImp_ResourcePackageConfig_ConcurrentAccess(t *testing.T) { + t.Parallel() + + mc := &MetaCacheImp{ + resourcePackageConfig: make(types.ResourcePackageConfig), + } + + var wg sync.WaitGroup + for i := 0; i < 8; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + cfg := types.ResourcePackageConfig{ + i % 2: map[string]*types.ResourcePackageState{ + "pkgA": { + PinnedCPUSet: machine.NewCPUSet(i, i+1), + }, + }, + } + _ = mc.SetResourcePackageConfig(cfg) + }(i) + } + for i := 0; i < 16; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _ = mc.GetResourcePackageConfig() + }() + } + wg.Wait() +} diff --git a/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime.go b/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime.go index 9fb8b2027b..944eac5595 100644 --- a/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime.go +++ b/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime.go @@ -261,7 +261,8 @@ func (ra *RealtimeOvercommitmentAdvisor) syncAllocatableCPU(reserved resource.Qu } func (ra *RealtimeOvercommitmentAdvisor) syncAllocatableMemory(reserved resource.Quantity) { - capacity := resource.NewQuantity(int64(ra.metaServer.MemoryCapacity), resource.BinarySI) + // Use NormalMemoryCapacity which excludes static hugepages for accurate allocatable memory calculation + capacity := resource.NewQuantity(int64(ra.metaServer.NormalMemoryCapacity), resource.BinarySI) capacity.Sub(reserved) diff --git a/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime_test.go b/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime_test.go index 85edcef21f..7739e520d1 100644 --- a/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime_test.go +++ b/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime/realtime_test.go @@ -44,6 +44,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric" "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/machine" metric2 "github.com/kubewharf/katalyst-core/pkg/util/metric" "github.com/kubewharf/katalyst-core/pkg/util/native" ) @@ -442,6 +443,9 @@ func generateTestMetaServer(t *testing.T, conf *config.Configuration, podList [] NumCores: 16, MemoryCapacity: 32 * 1024 * 1024 * 1024, } + meta.KatalystMachineInfo.MemoryTopology = &machine.MemoryTopology{ + NormalMemoryCapacity: 32 * 1024 * 1024 * 1024, + } return meta } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go index 1f788b0f30..5472bd9cbc 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go @@ -185,7 +185,7 @@ func (cra *cpuResourceAdvisor) GetHeadroom() (resource.Quantity, map[int]resourc return headroom, numaHeadroom, err } -func (cra *cpuResourceAdvisor) UpdateAndGetAdvice() (interface{}, error) { +func (cra *cpuResourceAdvisor) UpdateAndGetAdvice(_ context.Context) (interface{}, error) { startTime := time.Now() result, err := cra.update() _ = general.UpdateHealthzStateByError(cpuAdvisorHealthCheckName, err) @@ -249,11 +249,17 @@ func (cra *cpuResourceAdvisor) updateWithIsolationGuardian(tryIsolation bool) ( return nil, errIsolationSafetyCheckFailed } + pinnedCPUSizeByNuma, pinnedCPUSizeByPackageByNuma, err := cra.getPinnedCPUSizes() + if err != nil { + klog.Errorf("[qosaware-cpu] failed to get pinned cpu sizes: %v", err) + return nil, err + } + // run an episode of provision and headroom policy update for each region for _, r := range cra.regionMap { r.SetEssentials(types.ResourceEssentials{ EnableReclaim: cra.conf.GetDynamicConfiguration().EnableReclaim, - ResourceUpperBound: cra.getRegionMaxRequirement(r), + ResourceUpperBound: cra.getRegionMaxRequirement(r, pinnedCPUSizeByNuma, pinnedCPUSizeByPackageByNuma), ResourceLowerBound: cra.getRegionMinRequirement(r), ReservedForReclaim: cra.getRegionReservedForReclaim(r), ReservedForAllocate: cra.getRegionReservedForAllocate(r), @@ -284,6 +290,29 @@ func (cra *cpuResourceAdvisor) updateWithIsolationGuardian(tryIsolation bool) ( return &calculationResult, nil } +func (cra *cpuResourceAdvisor) getPinnedCPUSizes() (map[int]int, map[string]map[int]int, error) { + cfg := cra.metaCache.GetResourcePackageConfig() + pinnedCPUSizeByNuma := make(map[int]int) + pinnedCPUSizeByPackageByNuma := make(map[string]map[int]int) + for numaID, pkgMap := range cfg { + for pkgName, state := range pkgMap { + if state == nil { + continue + } + size := state.PinnedCPUSet.Size() + if size <= 0 { + continue + } + pinnedCPUSizeByNuma[numaID] += size + if _, ok := pinnedCPUSizeByPackageByNuma[pkgName]; !ok { + pinnedCPUSizeByPackageByNuma[pkgName] = make(map[int]int) + } + pinnedCPUSizeByPackageByNuma[pkgName][numaID] = size + } + } + return pinnedCPUSizeByNuma, pinnedCPUSizeByPackageByNuma, nil +} + // setIsolatedContainers get isolation status from isolator and update into containers func (cra *cpuResourceAdvisor) setIsolatedContainers(enableIsolated bool) bool { isolatedPods := sets.NewString() @@ -513,7 +542,6 @@ func (cra *cpuResourceAdvisor) assignDedicatedContainerToRegions(ci *types.Conta } else if len(regions) > 0 { return regions, nil } - if ci.IsNumaBinding() { // create regions by numa node for numaID := range ci.TopologyAwareAssignments { diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go index e726037b52..34f886114f 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go @@ -227,7 +227,11 @@ func (cra *cpuResourceAdvisor) getNumasReservedForAllocate(numas machine.CPUSet) return float64(reserved.Value()*int64(numas.Size())) / float64(cra.metaServer.NumNUMANodes) } -func (cra *cpuResourceAdvisor) getRegionMaxRequirement(r region.QoSRegion) float64 { +func (cra *cpuResourceAdvisor) getRegionMaxRequirement( + r region.QoSRegion, + pinnedCPUSizeByNuma map[int]int, + pinnedCPUSizeByPackageByNuma map[string]map[int]int, +) float64 { res := 0.0 switch r.Type() { case configapi.QoSRegionTypeIsolation: @@ -256,8 +260,22 @@ func (cra *cpuResourceAdvisor) getRegionMaxRequirement(r region.QoSRegion) float res = general.MaxFloat64(1, res) } default: + pkgName := r.GetResourcePackageName() for _, numaID := range r.GetBindingNumas().ToSliceInt() { - res += float64(cra.numaAvailable[numaID] - cra.reservedForReclaim[numaID]) + if pkgName != "" { + if byNuma, ok := pinnedCPUSizeByPackageByNuma[pkgName]; ok { + if pinnedCPUSize, ok := byNuma[numaID]; ok { + res += float64(pinnedCPUSize) + continue + } + } + } + + if pinnedCPUSize, ok := pinnedCPUSizeByNuma[numaID]; ok { + res += float64(cra.numaAvailable[numaID] - pinnedCPUSize - cra.reservedForReclaim[numaID]) + } else { + res += float64(cra.numaAvailable[numaID] - cra.reservedForReclaim[numaID]) + } } } return res diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_test.go index 924b45b4e3..4a1b1eeb24 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_test.go @@ -39,6 +39,7 @@ import ( configapi "github.com/kubewharf/katalyst-api/pkg/apis/config/v1alpha1" configv1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/config/v1alpha1" + nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" workloadapis "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options" @@ -58,6 +59,7 @@ import ( metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool" "github.com/kubewharf/katalyst-core/pkg/util/machine" utilmetric "github.com/kubewharf/katalyst-core/pkg/util/metric" + resourcepkg "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) func generateTestConfiguration(t *testing.T, checkpointDir, stateFileDir string) *config.Configuration { @@ -72,6 +74,17 @@ func generateTestConfiguration(t *testing.T, checkpointDir, stateFileDir string) return conf } +// testResourcePackageManager provides deterministic empty resource packages for unit tests. +type testResourcePackageManager struct{} + +func (m *testResourcePackageManager) NodeResourcePackages(ctx context.Context) (resourcepkg.NUMAResourcePackageItems, error) { + return resourcepkg.NUMAResourcePackageItems{}, nil +} + +func (m *testResourcePackageManager) ConvertNPDResourcePackages(npd *nodev1alpha1.NodeProfileDescriptor) (resourcepkg.NUMAResourcePackageItems, error) { + return resourcepkg.NUMAResourcePackageItems{}, nil +} + func newTestCPUResourceAdvisor(t *testing.T, pods []*v1.Pod, conf *config.Configuration, mf *metric.FakeMetricsFetcher, profiles map[k8stypes.UID]spd.DummyPodServiceProfile) (*cpuResourceAdvisor, metacache.MetaCache) { metaCache, err := metacache.NewMetaCacheImp(conf, metricspool.DummyMetricsEmitterPool{}, mf) require.NoError(t, err) @@ -107,6 +120,9 @@ func newTestCPUResourceAdvisor(t *testing.T, pods []*v1.Pod, conf *config.Config err = metaServer.SetServiceProfilingManager(spd.NewDummyServiceProfilingManager(profiles)) require.NoError(t, err) + err = metaServer.SetResourcePackageManager(&testResourcePackageManager{}) + require.NoError(t, err) + cra := NewCPUResourceAdvisor(conf, struct{}{}, metaCache, metaServer, metrics.DummyMetrics{}) require.NotNil(t, cra) @@ -1523,7 +1539,7 @@ func TestAdvisorUpdate(t *testing.T) { // if preUpdate is enabled, trigger an empty update firstly if tt.preUpdate { - _, err := advisor.UpdateAndGetAdvice() + _, err := advisor.UpdateAndGetAdvice(ctx) if tt.wantErr { assert.Error(t, err) } else { @@ -1532,7 +1548,7 @@ func TestAdvisorUpdate(t *testing.T) { } // trigger advisor update - advisorRespRaw, err := advisor.UpdateAndGetAdvice() + advisorRespRaw, err := advisor.UpdateAndGetAdvice(ctx) if tt.wantErr { assert.Error(t, err) } else { @@ -1586,20 +1602,20 @@ func TestGetIsolatedContainerRegions(t *testing.T) { conf, _ := options.NewOptions().Config() r1 := ®ion.QoSRegionShare{ - QoSRegionBase: region.NewQoSRegionBase("r1", "", configapi.QoSRegionTypeIsolation, + QoSRegionBase: region.NewQoSRegionBase("r1", "", "", configapi.QoSRegionTypeIsolation, conf, struct{}{}, false, false, nil, nil, nil), } _ = r1.AddContainer(c1_1) _ = r1.AddContainer(c1_2) r2 := ®ion.QoSRegionShare{ - QoSRegionBase: region.NewQoSRegionBase("r2", "", configapi.QoSRegionTypeShare, + QoSRegionBase: region.NewQoSRegionBase("r2", "", "", configapi.QoSRegionTypeShare, conf, struct{}{}, false, false, nil, nil, nil), } _ = r2.AddContainer(c2) r3 := ®ion.QoSRegionShare{ - QoSRegionBase: region.NewQoSRegionBase("r3", "", configapi.QoSRegionTypeDedicated, + QoSRegionBase: region.NewQoSRegionBase("r3", "", "", configapi.QoSRegionTypeDedicated, conf, struct{}{}, false, true, nil, nil, nil), } _ = r3.AddContainer(c3_1) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go index 8722f2dac3..a19512411b 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/headroomassembler/assembler_common_test.go @@ -907,7 +907,7 @@ func TestHeadroomAssemblerCommon_GetHeadroom(t *testing.T) { metaServer := generateTestMetaServer(t, tt.fields.cnr, tt.fields.podList, metricsFetcher) for name, regionInfo := range tt.fields.entries { - r := region.NewQoSRegionBase(name, regionInfo.OwnerPoolName, regionInfo.RegionType, conf, nil, false, false, metaCache, metaServer, metrics.DummyMetrics{}) + r := region.NewQoSRegionBase(name, regionInfo.OwnerPoolName, "", regionInfo.RegionType, conf, nil, false, false, metaCache, metaServer, metrics.DummyMetrics{}) r.SetBindingNumas(regionInfo.BindingNumas) r.SetEssentials(types.ResourceEssentials{ EnableReclaim: tt.fields.reclaimedResourceConfiguration.EnableReclaim, diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go index 7bbfb608ea..4bd1c3eec3 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go @@ -21,6 +21,7 @@ import ( "math" "time" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" configapi "github.com/kubewharf/katalyst-api/pkg/apis/config/v1alpha1" @@ -33,6 +34,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) type ProvisionAssemblerCommon struct { @@ -201,19 +203,42 @@ func (pa *ProvisionAssemblerCommon) assembleWithoutNUMAExclusivePool( result *types.InternalCPUCalculationResult, ) error { shareRegions := regionHelper.GetRegions(numaID, configapi.QoSRegionTypeShare) - shareInfo, err := extractShareRegionInfo(shareRegions) + var numaSet machine.CPUSet + if numaID == commonstate.FakedNUMAID { + numaSet = *pa.nonBindingNumas + } else { + numaSet = machine.NewCPUSet(numaID) + } + + cfg := pa.metaReader.GetResourcePackageConfig() + pinnedCPUSizeByPkg := pa.getPinnedCPUSizeByPackage(numaSet, cfg) + totalPinnedCPUSize := general.SumUpMapValues(pinnedCPUSizeByPkg) + + disableReclaimSelectorStr := pa.conf.GetDynamicConfiguration().DisableReclaimPinnedCPUSetResourcePackageSelector + disableReclaimSelector, err := general.ParseSelector(disableReclaimSelectorStr) + if err != nil { + return err + } + nonReclaimablePackages := sets.NewString() + for _, numaID := range numaSet.ToSliceInt() { + if pkgMap, ok := cfg[numaID]; ok { + nonReclaimablePackages = nonReclaimablePackages.Union(resourcepackage.GetMatchedPackages(pkgMap, disableReclaimSelector)) + } + } + + unpinnedShareRegionInfo, pinnedShareRegionInfos, err := extractShareRegionInfo(shareRegions, pinnedCPUSizeByPkg, nonReclaimablePackages) if err != nil { return err } isolationRegions := regionHelper.GetRegions(numaID, configapi.QoSRegionTypeIsolation) - isolationInfo, err := extractIsolationRegionInfo(isolationRegions) + unpinnedIsolationInfo, pinnedIsolationInfo, err := extractIsolationRegionInfo(isolationRegions, pinnedCPUSizeByPkg, nonReclaimablePackages) if err != nil { return err } dedicatedRegions := regionHelper.GetRegions(numaID, configapi.QoSRegionTypeDedicated) - dedicatedInfo, err := extractDedicatedRegionInfo(dedicatedRegions) + unpinnedDedicatedInfo, pinnedDedicatedInfo, err := extractDedicatedRegionInfo(dedicatedRegions, pinnedCPUSizeByPkg, nonReclaimablePackages) if err != nil { return err } @@ -225,42 +250,126 @@ func (pa *ProvisionAssemblerCommon) assembleWithoutNUMAExclusivePool( nodeEnableReclaim := pa.conf.GetDynamicConfiguration().EnableReclaim - var numaSet machine.CPUSet - if numaID == commonstate.FakedNUMAID { - numaSet = *pa.nonBindingNumas - } else { - numaSet = machine.NewCPUSet(numaID) - } - reservedForReclaim := getNUMAsResource(*pa.reservedForReclaim, numaSet) shareAndIsolatedDedicatedPoolAvailable := getNUMAsResource(*pa.numaAvailable, numaSet) if !*pa.allowSharedCoresOverlapReclaimedCores { shareAndIsolatedDedicatedPoolAvailable -= reservedForReclaim } - sharePoolSizeRequirements := getPoolSizeRequirements(shareInfo) - isolationUppers := general.SumUpMapValues(isolationInfo.isolationUpperSizes) - isolationPoolSizes := isolationInfo.isolationUpperSizes - // if the maximum of share sharePoolSizeRequirements and share requests adds up with isolation upper sizes is larger than - // the available cores of share and isolated pool, we should shrink the isolation pool sizes to lower sizes - if general.Max(general.SumUpMapValues(shareInfo.requests), general.SumUpMapValues(shareInfo.requirements))+isolationUppers > - shareAndIsolatedDedicatedPoolAvailable-general.SumUpMapValues(dedicatedInfo.requests) { - isolationPoolSizes = isolationInfo.isolationLowerSizes + getShareAndIsolateDedicatedPoolSizesFunc := func( + shareAndIsolatedDedicatedPoolAvailable int, + shareRegionInfo, dedicatedRegionInfo regionInfo, + isolationRegionInfo isolationRegionInfo, + ) map[string]int { + sharePoolSizeRequirements := getPoolSizeRequirements(shareRegionInfo) + + isolationUppers := general.SumUpMapValues(isolationRegionInfo.isolationUpperSizes) + isolationPoolSizes := isolationRegionInfo.isolationUpperSizes + // if the maximum of share sharePoolSizeRequirements and share requests adds up with isolation upper sizes is larger than + // the available cores of share and isolated pool, we should shrink the isolation pool sizes to lower sizes + if general.Max(general.SumUpMapValues(shareRegionInfo.requests), general.SumUpMapValues(shareRegionInfo.requirements))+isolationUppers > + shareAndIsolatedDedicatedPoolAvailable-general.SumUpMapValues(dedicatedRegionInfo.requests) { + isolationPoolSizes = isolationRegionInfo.isolationLowerSizes + } + + allowExpand := !nodeEnableReclaim || *pa.allowSharedCoresOverlapReclaimedCores + var regulateSharePoolSizes map[string]int + if allowExpand { + regulateSharePoolSizes = shareRegionInfo.requests + } else { + regulateSharePoolSizes = sharePoolSizeRequirements + } + unexpandableRequirements := general.MergeMapInt(isolationPoolSizes, dedicatedRegionInfo.requests) + + general.InfoS("getShareAndIsolateDedicatedPoolSizesFunc pre regulatePoolSizes", + "shareAndIsolatedDedicatedPoolAvailable", shareAndIsolatedDedicatedPoolAvailable, + "allowExpand", allowExpand, + "regulateSharePoolSizes", regulateSharePoolSizes, + "unexpandableRequirements", unexpandableRequirements) + + shareAndIsolateDedicatedPoolSizes, poolThrottled := regulatePoolSizes(regulateSharePoolSizes, unexpandableRequirements, shareAndIsolatedDedicatedPoolAvailable, allowExpand) + + general.InfoS("getShareAndIsolateDedicatedPoolSizesFunc post regulatePoolSizes", + "shareAndIsolateDedicatedPoolSizes", shareAndIsolateDedicatedPoolSizes, + "poolThrottled", poolThrottled) + + for _, r := range shareRegionInfo.regionMap { + r.SetThrottled(poolThrottled) + } + + return shareAndIsolateDedicatedPoolSizes } - allowExpand := !nodeEnableReclaim || *pa.allowSharedCoresOverlapReclaimedCores - var regulateSharePoolSizes map[string]int - if allowExpand { - regulateSharePoolSizes = shareInfo.requests - } else { - regulateSharePoolSizes = sharePoolSizeRequirements + shareInfo := initRegionInfo() + isolationInfo := initIsolationRegionInfo() + dedicatedInfo := initRegionInfo() + shareAndIsolateDedicatedPoolSizes := make(map[string]int) + unpinnedShareAndIsolatedDedicatedPoolAvailable := general.Max(0, shareAndIsolatedDedicatedPoolAvailable-totalPinnedCPUSize) + pinnedCPUSetAllInfo := getPinnedCPUSetAllRegionInfo(pinnedShareRegionInfos, pinnedIsolationInfo, pinnedDedicatedInfo) + totalUnusedNonReclaimablePinnedCPUSize := 0 + + general.InfoS("pool info start", + "numaID", numaID, + "shareAndIsolatedDedicatedPoolAvailable", shareAndIsolatedDedicatedPoolAvailable, + "totalPinnedCPUSize", totalPinnedCPUSize, + "unpinnedShareAndIsolatedDedicatedPoolAvailable", unpinnedShareAndIsolatedDedicatedPoolAvailable, + "nonReclaimablePackages", nonReclaimablePackages, + "disableReclaimSelector", disableReclaimSelector) + + // first calculate share and isolate dedicated pool sizes for each pinned region + for pkgName, pinnedCPUSize := range pinnedCPUSizeByPkg { + allInfo, ok := pinnedCPUSetAllInfo[pkgName] + if !ok { + // No regions for this package, so allocated size is 0 + if nonReclaimablePackages.Has(pkgName) { + totalUnusedNonReclaimablePinnedCPUSize += pinnedCPUSize + } + continue + } + + poolSizes := getShareAndIsolateDedicatedPoolSizesFunc(pinnedCPUSize, allInfo.shareRegionInfo, allInfo.dedicatedRegionInfos, allInfo.isolationRegionInfo) + + allocatedForPkg := general.SumUpMapValues(poolSizes) + unusedForPkg := pinnedCPUSize - allocatedForPkg + if nonReclaimablePackages.Has(pkgName) { + totalUnusedNonReclaimablePinnedCPUSize += unusedForPkg + } + + for poolName, size := range poolSizes { + shareAndIsolateDedicatedPoolSizes[poolName] = size + } + + shareInfo.merge(allInfo.shareRegionInfo) + isolationInfo.merge(allInfo.isolationRegionInfo) + dedicatedInfo.merge(allInfo.dedicatedRegionInfos) + + general.InfoS("pinned pool info", + "numaID", numaID, + "pkgName", pkgName, + "shareRegionInfo", allInfo.shareRegionInfo, + "isolationRegionInfo", allInfo.isolationRegionInfo, + "dedicatedRegionInfos", allInfo.dedicatedRegionInfos, + "pinnedCPUSize", pinnedCPUSize, + "poolSizes", poolSizes) } - unexpandableRequirements := general.MergeMapInt(isolationPoolSizes, dedicatedInfo.requests) - shareAndIsolateDedicatedPoolSizes, poolThrottled := regulatePoolSizes(regulateSharePoolSizes, unexpandableRequirements, shareAndIsolatedDedicatedPoolAvailable, allowExpand) - for _, r := range shareRegions { - r.SetThrottled(poolThrottled) + + unpinnedPoolSizes := getShareAndIsolateDedicatedPoolSizesFunc(unpinnedShareAndIsolatedDedicatedPoolAvailable, unpinnedShareRegionInfo, unpinnedDedicatedInfo, unpinnedIsolationInfo) + for poolName, size := range unpinnedPoolSizes { + shareAndIsolateDedicatedPoolSizes[poolName] = size } + shareInfo.merge(unpinnedShareRegionInfo) + isolationInfo.merge(unpinnedIsolationInfo) + dedicatedInfo.merge(unpinnedDedicatedInfo) + + general.InfoS("unpinned pool info", + "numaID", numaID, + "unpinnedShareRegionInfo", unpinnedShareRegionInfo, + "unpinnedIsolationRegionInfo", unpinnedIsolationInfo, + "unpinnedDedicatedRegionInfos", unpinnedDedicatedInfo, + "unpinnedShareAndIsolatedDedicatedPoolAvailable", unpinnedShareAndIsolatedDedicatedPoolAvailable, + "poolSizes", unpinnedPoolSizes) + dedicatedPoolSizes := make(map[string]int) for poolName := range dedicatedInfo.requests { if size, ok := shareAndIsolateDedicatedPoolSizes[poolName]; ok { @@ -285,11 +394,13 @@ func (pa *ProvisionAssemblerCommon) assembleWithoutNUMAExclusivePool( "dedicatedPoolAvailable", dedicatedPoolAvailable, "dedicatedPoolSizeRequirements", dedicatedPoolSizeRequirements, "dedicatedReclaimCoresSize", dedicatedReclaimCoresSize, - "sharePoolSizeRequirements", sharePoolSizeRequirements, + "sharePoolSizeRequirements", getPoolSizeRequirements(shareInfo), "isolationUpperSizes", isolationInfo.isolationUpperSizes, "isolationLowerSizes", isolationInfo.isolationLowerSizes, "shareAndIsolateDedicatedPoolSizes", shareAndIsolateDedicatedPoolSizes, - "shareAndIsolatedDedicatedPoolAvailable", shareAndIsolatedDedicatedPoolAvailable) + "shareAndIsolatedDedicatedPoolAvailable", shareAndIsolatedDedicatedPoolAvailable, + "totalUnusedNonReclaimablePinnedCPUSize", totalUnusedNonReclaimablePinnedCPUSize, + "unpinnedShareAndIsolatedDedicatedPoolAvailable", unpinnedShareAndIsolatedDedicatedPoolAvailable) // fill in regulated share-and-isolated pool entries for poolName, poolSize := range shareAndIsolateDedicatedPoolSizes { @@ -304,204 +415,261 @@ func (pa *ProvisionAssemblerCommon) assembleWithoutNUMAExclusivePool( } } - // assemble reclaim pool + reclaimPoolData := &reclaimPoolCalculationData{ + shareInfo: shareInfo, + isolationInfo: isolationInfo, + dedicatedInfo: dedicatedInfo, + shareAndIsolateDedicatedPoolSizes: shareAndIsolateDedicatedPoolSizes, + dedicatedPoolSizes: dedicatedPoolSizes, + dedicatedReclaimCoresSize: dedicatedReclaimCoresSize, + shareAndIsolatedDedicatedPoolAvailable: shareAndIsolatedDedicatedPoolAvailable, + reservedForReclaim: reservedForReclaim, + nodeEnableReclaim: nodeEnableReclaim, + numaID: numaID, + totalUnusedNonReclaimablePinnedCPUSize: totalUnusedNonReclaimablePinnedCPUSize, + } + + reclaimedCoresSize, overlapReclaimedCoresSize, reclaimedCoresQuota, err := pa.calculateReclaimPool(reclaimPoolData, result) + if err != nil { + return err + } + + nonOverlapReclaimedCoresSize := general.Max(reclaimedCoresSize-overlapReclaimedCoresSize, 0) + result.SetPoolEntry(commonstate.PoolNameReclaim, numaID, nonOverlapReclaimedCoresSize, reclaimedCoresQuota) + + general.InfoS("assemble reclaim pool entry", + "numaID", numaID, + "reservedForReclaim", reservedForReclaim, + "reclaimedCoresSize", reclaimedCoresSize, + "overlapReclaimedCoresSize", overlapReclaimedCoresSize, + "nonOverlapReclaimedCoresSize", nonOverlapReclaimedCoresSize, + "reclaimedCoresQuota", reclaimedCoresQuota) + + return nil +} + +type reclaimPoolCalculationData struct { + shareInfo regionInfo + isolationInfo isolationRegionInfo + dedicatedInfo regionInfo + shareAndIsolateDedicatedPoolSizes map[string]int + dedicatedPoolSizes map[string]int + dedicatedReclaimCoresSize int + shareAndIsolatedDedicatedPoolAvailable int + reservedForReclaim int + nodeEnableReclaim bool + numaID int + totalUnusedNonReclaimablePinnedCPUSize int +} + +func (pa *ProvisionAssemblerCommon) calculateReclaimPool( + data *reclaimPoolCalculationData, + result *types.InternalCPUCalculationResult, +) (int, int, float64, error) { + if *pa.allowSharedCoresOverlapReclaimedCores { + return pa.calculateOverlapReclaimPool(data, result) + } + return pa.calculateNonOverlapReclaimPool(data, result) +} + +func (pa *ProvisionAssemblerCommon) calculateOverlapReclaimPool( + data *reclaimPoolCalculationData, + result *types.InternalCPUCalculationResult, +) (int, int, float64, error) { var reclaimedCoresSize, overlapReclaimedCoresSize int reclaimedCoresQuota := float64(-1) - if *pa.allowSharedCoresOverlapReclaimedCores { - isolated := 0 - poolSizes := make(map[string]int) - sharePoolSizes := make(map[string]int) - reclaimablePoolSizes := make(map[string]int) - nonReclaimableSharePoolSizes := make(map[string]int) - reclaimableShareRequirements := make(map[string]int) - reclaimableRequirements := make(map[string]int) - for poolName, size := range shareAndIsolateDedicatedPoolSizes { - _, ok := sharePoolSizeRequirements[poolName] - if ok { - if shareInfo.reclaimEnable[poolName] { - reclaimablePoolSizes[poolName] = size - reclaimableShareRequirements[poolName] = shareInfo.requirements[poolName] - reclaimableRequirements[poolName] = shareInfo.requirements[poolName] - } else { - nonReclaimableSharePoolSizes[poolName] = size - } - poolSizes[poolName] = size - sharePoolSizes[poolName] = size - } - _, ok = isolationPoolSizes[poolName] - if ok { - isolated += size + isolated := 0 + poolSizes := make(map[string]int) + sharePoolSizes := make(map[string]int) + reclaimablePoolSizes := make(map[string]int) + nonReclaimableSharePoolSizes := make(map[string]int) + reclaimableShareRequirements := make(map[string]int) + reclaimableRequirements := make(map[string]int) + + for poolName, size := range data.shareAndIsolateDedicatedPoolSizes { + _, ok := data.shareInfo.requirements[poolName] + if ok { + if data.shareInfo.reclaimEnable[poolName] { + reclaimablePoolSizes[poolName] = size + reclaimableShareRequirements[poolName] = data.shareInfo.requirements[poolName] + reclaimableRequirements[poolName] = data.shareInfo.requirements[poolName] + } else { + nonReclaimableSharePoolSizes[poolName] = size } + poolSizes[poolName] = size + sharePoolSizes[poolName] = size + } - _, ok = dedicatedInfo.requests[poolName] - if ok { - if dedicatedInfo.reclaimEnable[poolName] { - reclaimablePoolSizes[poolName] = size - reclaimableRequirements[poolName] = dedicatedInfo.requirements[poolName] - } - poolSizes[poolName] = size - } + _, ok = data.isolationInfo.isolationUpperSizes[poolName] + if ok { + isolated += size } - overlapReclaimSize := make(map[string]int) - // shareReclaimCoresSize is the size of cores that can be reclaimed from share pools - shareReclaimCoresSize := shareAndIsolatedDedicatedPoolAvailable - isolated - - general.SumUpMapValues(nonReclaimableSharePoolSizes) - general.SumUpMapValues(reclaimableShareRequirements) - - general.SumUpMapValues(dedicatedPoolSizes) - if nodeEnableReclaim { - reclaimedCoresSize = shareReclaimCoresSize + dedicatedReclaimCoresSize - if reclaimedCoresSize < reservedForReclaim { - reclaimedCoresSize = reservedForReclaim - regulatedOverlapReclaimPoolSize, err := regulateOverlapReclaimPoolSize(poolSizes, reclaimedCoresSize) - if err != nil { - return fmt.Errorf("failed to regulateOverlapReclaimPoolSize for NUMAs reserved for reclaim: %w", err) - } - overlapReclaimSize = regulatedOverlapReclaimPoolSize - } else { - for poolName, size := range reclaimablePoolSizes { - requirement, ok := reclaimableRequirements[poolName] - if !ok { - continue - } + _, ok = data.dedicatedInfo.requests[poolName] + if ok { + if data.dedicatedInfo.reclaimEnable[poolName] { + reclaimablePoolSizes[poolName] = size + reclaimableRequirements[poolName] = data.dedicatedInfo.requirements[poolName] + } + poolSizes[poolName] = size + } + } - // calculate the reclaim size for each share pool by subtracting the share requirement from the share pool size - reclaimSize := size - requirement - if reclaimSize > 0 { - overlapReclaimSize[poolName] = reclaimSize - } else { - overlapReclaimSize[poolName] = 1 - } - } + overlapReclaimSize := make(map[string]int) + // We deduct totalUnusedNonReclaimablePinnedCPUSize here to ensure that the unused portion of non-reclaimable + // resource packages is not added to the reclaim pool, preventing those CPUs from being reclaimed. + shareReclaimCoresSize := data.shareAndIsolatedDedicatedPoolAvailable - isolated - + general.SumUpMapValues(nonReclaimableSharePoolSizes) - general.SumUpMapValues(reclaimableShareRequirements) - + general.SumUpMapValues(data.dedicatedPoolSizes) - data.totalUnusedNonReclaimablePinnedCPUSize + + if data.nodeEnableReclaim { + reclaimedCoresSize = shareReclaimCoresSize + data.dedicatedReclaimCoresSize + if reclaimedCoresSize < data.reservedForReclaim { + reclaimedCoresSize = data.reservedForReclaim + regulatedOverlapReclaimPoolSize, err := regulateOverlapReclaimPoolSize(poolSizes, reclaimedCoresSize) + if err != nil { + return 0, 0, 0, fmt.Errorf("failed to regulateOverlapReclaimPoolSize for NUMAs reserved for reclaim: %w", err) } + overlapReclaimSize = regulatedOverlapReclaimPoolSize } else { - reclaimedCoresSize = reservedForReclaim - if len(poolSizes) > 0 && reclaimedCoresSize > shareReclaimCoresSize { - // only if reclaimedCoresSize > shareReclaimCoresSize, overlap reclaim pool with both share pool and dedicated pool - reclaimedCoresSize = general.Min(reclaimedCoresSize, general.SumUpMapValues(poolSizes)) - var overlapSharePoolSizes map[string]int - if reclaimedCoresSize <= general.SumUpMapValues(reclaimablePoolSizes) { - overlapSharePoolSizes = reclaimablePoolSizes - } else { - overlapSharePoolSizes = poolSizes + for poolName, size := range reclaimablePoolSizes { + requirement, ok := reclaimableRequirements[poolName] + if !ok { + continue } - - reclaimSizes, err := regulateOverlapReclaimPoolSize(overlapSharePoolSizes, reclaimedCoresSize) - if err != nil { - return fmt.Errorf("failed to regulateOverlapReclaimPoolSize: %w", err) - } - overlapReclaimSize = reclaimSizes - } else if len(sharePoolSizes) > 0 && reclaimedCoresSize <= general.SumUpMapValues(sharePoolSizes) { - // if exit share pool, and reclaimedCoresSize <= sum of share pool size, overlap reclaim pool with share pool - reclaimSizes, err := regulateOverlapReclaimPoolSize(sharePoolSizes, reclaimedCoresSize) - if err != nil { - return fmt.Errorf("failed to regulateOverlapReclaimPoolSize: %w", err) + reclaimSize := size - requirement + if reclaimSize > 0 { + overlapReclaimSize[poolName] = reclaimSize + } else { + overlapReclaimSize[poolName] = 1 } - overlapReclaimSize = reclaimSizes } } + } else { + reclaimedCoresSize = data.reservedForReclaim + if len(poolSizes) > 0 && reclaimedCoresSize > shareReclaimCoresSize { + reclaimedCoresSize = general.Min(reclaimedCoresSize, general.SumUpMapValues(poolSizes)) + var overlapSharePoolSizes map[string]int + if reclaimedCoresSize <= general.SumUpMapValues(reclaimablePoolSizes) { + overlapSharePoolSizes = reclaimablePoolSizes + } else { + overlapSharePoolSizes = poolSizes + } - quotaCtrlKnobEnabled, err := metacache.IsQuotaCtrlKnobEnabled(pa.metaReader) - if err != nil { - return err + reclaimSizes, err := regulateOverlapReclaimPoolSize(overlapSharePoolSizes, reclaimedCoresSize) + if err != nil { + return 0, 0, 0, fmt.Errorf("failed to regulateOverlapReclaimPoolSize: %w", err) + } + overlapReclaimSize = reclaimSizes + } else if len(sharePoolSizes) > 0 && reclaimedCoresSize <= general.SumUpMapValues(sharePoolSizes) { + reclaimSizes, err := regulateOverlapReclaimPoolSize(sharePoolSizes, reclaimedCoresSize) + if err != nil { + return 0, 0, 0, fmt.Errorf("failed to regulateOverlapReclaimPoolSize: %w", err) + } + overlapReclaimSize = reclaimSizes } + } - if quotaCtrlKnobEnabled && numaID != commonstate.FakedNUMAID && len(poolSizes) > 0 { - reclaimedCoresQuota = float64(general.Max(reservedForReclaim, reclaimedCoresSize)) - if shareInfo.minReclaimedCoresCPUQuota != -1 || dedicatedInfo.minReclaimedCoresCPUQuota != -1 { - if shareInfo.minReclaimedCoresCPUQuota != -1 { - reclaimedCoresQuota = shareInfo.minReclaimedCoresCPUQuota - } - - if dedicatedInfo.minReclaimedCoresCPUQuota != -1 { - reclaimedCoresQuota = general.MinFloat64(reclaimedCoresQuota, dedicatedInfo.minReclaimedCoresCPUQuota) - } + quotaCtrlKnobEnabled, err := metacache.IsQuotaCtrlKnobEnabled(pa.metaReader) + if err != nil { + return 0, 0, 0, err + } - reclaimedCoresQuota = general.MaxFloat64(reclaimedCoresQuota, float64(reservedForReclaim)) + if quotaCtrlKnobEnabled && data.numaID != commonstate.FakedNUMAID && len(poolSizes) > 0 { + reclaimedCoresQuota = float64(general.Max(data.reservedForReclaim, reclaimedCoresSize)) + if data.shareInfo.minReclaimedCoresCPUQuota != -1 || data.dedicatedInfo.minReclaimedCoresCPUQuota != -1 { + if data.shareInfo.minReclaimedCoresCPUQuota != -1 { + reclaimedCoresQuota = data.shareInfo.minReclaimedCoresCPUQuota } - // if cpu quota enabled, set all reclaimable share pool size to reclaimablePoolSizes - for poolName := range overlapReclaimSize { - overlapReclaimSize[poolName] = general.Max(overlapReclaimSize[poolName], reclaimablePoolSizes[poolName]) + if data.dedicatedInfo.minReclaimedCoresCPUQuota != -1 { + reclaimedCoresQuota = general.MinFloat64(reclaimedCoresQuota, data.dedicatedInfo.minReclaimedCoresCPUQuota) } + + reclaimedCoresQuota = general.MaxFloat64(reclaimedCoresQuota, float64(data.reservedForReclaim)) } - for overlapPoolName, reclaimSize := range overlapReclaimSize { - if _, ok := shareInfo.requests[overlapPoolName]; ok { - general.InfoS("set pool overlap info", - "poolName", commonstate.PoolNameReclaim, - "numaID", numaID, - "poolName", overlapPoolName, - "reclaimSize", reclaimSize) - result.SetPoolOverlapInfo(commonstate.PoolNameReclaim, numaID, overlapPoolName, reclaimSize) - overlapReclaimedCoresSize += reclaimSize - continue - } + // if cpu quota enabled, set all reclaimable share pool size to reclaimablePoolSizes + for poolName := range overlapReclaimSize { + overlapReclaimSize[poolName] = general.Max(overlapReclaimSize[poolName], reclaimablePoolSizes[poolName]) + } + } - if podSet, ok := dedicatedInfo.podSet[overlapPoolName]; ok { - // set pool overlap info for dedicated pool - for podUID, containerSet := range podSet { - for containerName := range containerSet { - general.InfoS("set pool overlap pod container info", - "poolName", commonstate.PoolNameReclaim, - "numaID", numaID, - "podUID", podUID, - "containerName", containerName, - "reclaimSize", reclaimSize) - result.SetPoolOverlapPodContainerInfo(commonstate.PoolNameReclaim, numaID, podUID, containerName, reclaimSize) - } + for overlapPoolName, reclaimSize := range overlapReclaimSize { + if _, ok := data.shareInfo.requests[overlapPoolName]; ok { + general.InfoS("set pool overlap info", + "poolName", commonstate.PoolNameReclaim, + "numaID", data.numaID, + "poolName", overlapPoolName, + "reclaimSize", reclaimSize) + result.SetPoolOverlapInfo(commonstate.PoolNameReclaim, data.numaID, overlapPoolName, reclaimSize) + overlapReclaimedCoresSize += reclaimSize + continue + } + + if podSet, ok := data.dedicatedInfo.podSet[overlapPoolName]; ok { + // set pool overlap info for dedicated pool + for podUID, containerSet := range podSet { + for containerName := range containerSet { + general.InfoS("set pool overlap pod container info", + "poolName", commonstate.PoolNameReclaim, + "numaID", data.numaID, + "podUID", podUID, + "containerName", containerName, + "reclaimSize", reclaimSize) + result.SetPoolOverlapPodContainerInfo(commonstate.PoolNameReclaim, data.numaID, podUID, containerName, reclaimSize) } - overlapReclaimedCoresSize += reclaimSize - continue } + overlapReclaimedCoresSize += reclaimSize + continue } - } else { - if nodeEnableReclaim { - for poolName, size := range dedicatedInfo.requests { - if dedicatedInfo.reclaimEnable[poolName] { - reclaimSize := size - dedicatedInfo.requirements[poolName] - if reclaimSize <= 0 { - continue - } - if podSet, ok := dedicatedInfo.podSet[poolName]; ok { - // set pool overlap info for dedicated pool - for podUID, containerSet := range podSet { - for containerName := range containerSet { - general.InfoS("set pool overlap pod container info", - "poolName", commonstate.PoolNameReclaim, - "numaID", numaID, - "podUID", podUID, - "containerName", containerName, - "reclaimSize", reclaimSize) - result.SetPoolOverlapPodContainerInfo(commonstate.PoolNameReclaim, numaID, podUID, containerName, reclaimSize) - } + } + + return reclaimedCoresSize, overlapReclaimedCoresSize, reclaimedCoresQuota, nil +} + +func (pa *ProvisionAssemblerCommon) calculateNonOverlapReclaimPool( + data *reclaimPoolCalculationData, + result *types.InternalCPUCalculationResult, +) (int, int, float64, error) { + var reclaimedCoresSize, overlapReclaimedCoresSize int + reclaimedCoresQuota := float64(-1) + + if data.nodeEnableReclaim { + for poolName, size := range data.dedicatedInfo.requests { + if data.dedicatedInfo.reclaimEnable[poolName] { + reclaimSize := size - data.dedicatedInfo.requirements[poolName] + if reclaimSize <= 0 { + continue + } + if podSet, ok := data.dedicatedInfo.podSet[poolName]; ok { + for podUID, containerSet := range podSet { + for containerName := range containerSet { + general.InfoS("set pool overlap pod container info", + "poolName", commonstate.PoolNameReclaim, + "numaID", data.numaID, + "podUID", podUID, + "containerName", containerName, + "reclaimSize", reclaimSize) + result.SetPoolOverlapPodContainerInfo(commonstate.PoolNameReclaim, data.numaID, podUID, containerName, reclaimSize) } - overlapReclaimedCoresSize += reclaimSize - continue } + overlapReclaimedCoresSize += reclaimSize + continue } } - - shareReclaimedCoresSize := shareAndIsolatedDedicatedPoolAvailable - general.SumUpMapValues(shareAndIsolateDedicatedPoolSizes) - reclaimedCoresSize = shareReclaimedCoresSize + dedicatedReclaimCoresSize + reservedForReclaim - } else { - reclaimedCoresSize = reservedForReclaim } - } - // nonOverlapReclaimedCoresSize should be non-negative - nonOverlapReclaimedCoresSize := general.Max(reclaimedCoresSize-overlapReclaimedCoresSize, 0) - result.SetPoolEntry(commonstate.PoolNameReclaim, numaID, nonOverlapReclaimedCoresSize, reclaimedCoresQuota) - - general.InfoS("assemble reclaim pool entry", - "numaID", numaID, - "reservedForReclaim", reservedForReclaim, - "reclaimedCoresSize", reclaimedCoresSize, - "overlapReclaimedCoresSize", overlapReclaimedCoresSize, - "nonOverlapReclaimedCoresSize", nonOverlapReclaimedCoresSize, - "reclaimedCoresQuota", reclaimedCoresQuota) + // We deduct totalUnusedNonReclaimablePinnedCPUSize here to ensure that the unused portion of non-reclaimable + // resource packages is not added to the reclaim pool, preventing those CPUs from being reclaimed. + shareReclaimedCoresSize := data.shareAndIsolatedDedicatedPoolAvailable - general.SumUpMapValues(data.shareAndIsolateDedicatedPoolSizes) - data.totalUnusedNonReclaimablePinnedCPUSize + reclaimedCoresSize = shareReclaimedCoresSize + data.dedicatedReclaimCoresSize + data.reservedForReclaim + } else { + reclaimedCoresSize = data.reservedForReclaim + } - return nil + return reclaimedCoresSize, overlapReclaimedCoresSize, reclaimedCoresQuota, nil } // regionInfo is a struct that contains region information @@ -513,35 +681,112 @@ type regionInfo struct { reclaimEnable map[string]bool podSet map[string]types.PodSet minReclaimedCoresCPUQuota float64 + regionMap map[string]region.QoSRegion +} + +func (r *regionInfo) merge(other regionInfo) { + for poolName, size := range other.requirements { + r.requirements[poolName] = size + } + + for poolName, size := range other.requests { + r.requests[poolName] = size + } + + for poolName, enable := range other.reclaimEnable { + r.reclaimEnable[poolName] = enable + } + + for poolName, podSet := range other.podSet { + r.podSet[poolName] = podSet + } + + if r.minReclaimedCoresCPUQuota == -1 || other.minReclaimedCoresCPUQuota < r.minReclaimedCoresCPUQuota { + r.minReclaimedCoresCPUQuota = other.minReclaimedCoresCPUQuota + } + + for poolName, reg := range other.regionMap { + r.regionMap[poolName] = reg + } +} + +func initRegionInfo() regionInfo { + return regionInfo{ + requirements: make(map[string]int), + requests: make(map[string]int), + reclaimEnable: make(map[string]bool), + podSet: make(map[string]types.PodSet), + minReclaimedCoresCPUQuota: -1, + regionMap: make(map[string]region.QoSRegion), + } } -func extractShareRegionInfo(shareRegions []region.QoSRegion) (regionInfo, error) { - shareRequirements := make(map[string]int) - shareRequests := make(map[string]int) - shareReclaimEnable := make(map[string]bool) - minReclaimedCoresCPUQuota := float64(-1) +func (pa *ProvisionAssemblerCommon) getPinnedCPUSizeByPackage(numaSet machine.CPUSet, cfg types.ResourcePackageConfig) map[string]int { + pinnedCPUSizeByPkg := make(map[string]int) + + if len(cfg) > 0 { + for _, numaID := range numaSet.ToSliceInt() { + pkgMap, ok := cfg[numaID] + if !ok { + continue + } + for pkgName, state := range pkgMap { + if state == nil { + continue + } + size := state.PinnedCPUSet.Size() + if size <= 0 { + continue + } + pinnedCPUSizeByPkg[pkgName] += size + } + } + return pinnedCPUSizeByPkg + } + return pinnedCPUSizeByPkg +} + +func extractShareRegionInfo(shareRegions []region.QoSRegion, pinnedCPUSizeByPkg map[string]int, nonReclaimablePackages sets.String) (regionInfo, map[string]*regionInfo, error) { + unpinnedRegionInfo := initRegionInfo() + pinnedRegionInfos := make(map[string]*regionInfo) for _, r := range shareRegions { controlKnob, err := r.GetProvision() if err != nil { - return regionInfo{}, err + return regionInfo{}, nil, err } - shareRequirements[r.OwnerPoolName()] = general.Max(1, int(controlKnob[configapi.ControlKnobNonReclaimedCPURequirement].Value)) - shareRequests[r.OwnerPoolName()] = general.Max(1, int(math.Ceil(r.GetPodsRequest()))) - shareReclaimEnable[r.OwnerPoolName()] = r.EnableReclaim() - if quota, ok := controlKnob[configapi.ControlKnobReclaimedCoresCPUQuota]; ok { - if minReclaimedCoresCPUQuota == -1 || quota.Value < minReclaimedCoresCPUQuota { - minReclaimedCoresCPUQuota = quota.Value + + ri := &unpinnedRegionInfo + pkgName := r.GetResourcePackageName() + if pkgName != "" { + if _, ok := pinnedCPUSizeByPkg[pkgName]; ok { + if _, exists := pinnedRegionInfos[pkgName]; !exists { + info := initRegionInfo() + pinnedRegionInfos[pkgName] = &info + } + ri = pinnedRegionInfos[pkgName] + } + } + + reclaimEnable := r.EnableReclaim() + if pkgName != "" && nonReclaimablePackages.Has(pkgName) { + reclaimEnable = false // override reclaim Enable if the resource package is non-reclaimable + } + + ri.requirements[r.OwnerPoolName()] = general.Max(1, int(controlKnob[configapi.ControlKnobNonReclaimedCPURequirement].Value)) + ri.requests[r.OwnerPoolName()] = general.Max(1, int(math.Ceil(r.GetPodsRequest()))) + ri.reclaimEnable[r.OwnerPoolName()] = reclaimEnable + if reclaimEnable { + if quota, ok := controlKnob[configapi.ControlKnobReclaimedCoresCPUQuota]; ok { + if ri.minReclaimedCoresCPUQuota == -1 || quota.Value < ri.minReclaimedCoresCPUQuota { + ri.minReclaimedCoresCPUQuota = quota.Value + } } } + ri.regionMap[r.OwnerPoolName()] = r } - return regionInfo{ - requirements: shareRequirements, - requests: shareRequests, - reclaimEnable: shareReclaimEnable, - minReclaimedCoresCPUQuota: minReclaimedCoresCPUQuota, - }, nil + return unpinnedRegionInfo, pinnedRegionInfos, nil } func getPoolSizeRequirements(info regionInfo) map[string]int { @@ -561,32 +806,58 @@ type isolationRegionInfo struct { isolationLowerSizes map[string]int } -func extractIsolationRegionInfo(isolationRegions []region.QoSRegion) (isolationRegionInfo, error) { - isolationUpperSizes := make(map[string]int) - isolationLowerSizes := make(map[string]int) +func (r *isolationRegionInfo) merge(other isolationRegionInfo) { + for poolName, size := range other.isolationUpperSizes { + r.isolationUpperSizes[poolName] = size + } + + for poolName, size := range other.isolationLowerSizes { + r.isolationLowerSizes[poolName] = size + } +} + +func initIsolationRegionInfo() isolationRegionInfo { + return isolationRegionInfo{ + isolationUpperSizes: make(map[string]int), + isolationLowerSizes: make(map[string]int), + } +} + +func extractIsolationRegionInfo(isolationRegions []region.QoSRegion, pinnedCPUSizeByPkg map[string]int, _ sets.String) (isolationRegionInfo, map[string]*isolationRegionInfo, error) { + unpinnedRegionInfo := initIsolationRegionInfo() + pinnedRegionInfos := make(map[string]*isolationRegionInfo) for _, r := range isolationRegions { controlKnob, err := r.GetProvision() if err != nil { - return isolationRegionInfo{}, err + return isolationRegionInfo{}, nil, err + } + + ri := &unpinnedRegionInfo + pkgName := r.GetResourcePackageName() + if pkgName != "" { + if _, ok := pinnedCPUSizeByPkg[pkgName]; ok { + if _, exists := pinnedRegionInfos[pkgName]; !exists { + info := initIsolationRegionInfo() + pinnedRegionInfos[pkgName] = &info + } + ri = pinnedRegionInfos[pkgName] + } } - // save limits and requests for isolated region - isolationUpperSizes[r.Name()] = int(controlKnob[configapi.ControlKnobNonIsolatedUpperCPUSize].Value) - isolationLowerSizes[r.Name()] = int(controlKnob[configapi.ControlKnobNonIsolatedLowerCPUSize].Value) + + // Isolation region currently doesn't use reclaimEnable in the same way as Share and Dedicated, + // but we still process it just in case, though it only sets upper/lower sizes. + ri.isolationUpperSizes[r.Name()] = int(controlKnob[configapi.ControlKnobNonIsolatedUpperCPUSize].Value) + ri.isolationLowerSizes[r.Name()] = int(controlKnob[configapi.ControlKnobNonIsolatedLowerCPUSize].Value) } - return isolationRegionInfo{ - isolationUpperSizes: isolationUpperSizes, - isolationLowerSizes: isolationLowerSizes, - }, nil + return unpinnedRegionInfo, pinnedRegionInfos, nil } -func extractDedicatedRegionInfo(regions []region.QoSRegion) (regionInfo, error) { - dedicatedRequirements := make(map[string]int) - dedicatedRequests := make(map[string]int) - dedicatedEnable := make(map[string]bool) - dedicatedPodSet := make(map[string]types.PodSet) - minReclaimedCoresCPUQuota := float64(-1) +func extractDedicatedRegionInfo(regions []region.QoSRegion, pinnedCPUSizeByPkg map[string]int, nonReclaimablePackages sets.String) (regionInfo, map[string]*regionInfo, error) { + unpinnedRegionInfo := initRegionInfo() + pinnedRegionInfos := make(map[string]*regionInfo) + for _, r := range regions { if r.IsNumaExclusive() { continue @@ -594,34 +865,95 @@ func extractDedicatedRegionInfo(regions []region.QoSRegion) (regionInfo, error) controlKnob, err := r.GetProvision() if err != nil { - return regionInfo{}, err + return regionInfo{}, nil, err + } + + ri := &unpinnedRegionInfo + pkgName := r.GetResourcePackageName() + if pkgName != "" { + if _, ok := pinnedCPUSizeByPkg[pkgName]; ok { + if _, exists := pinnedRegionInfos[pkgName]; !exists { + info := initRegionInfo() + pinnedRegionInfos[pkgName] = &info + } + ri = pinnedRegionInfos[pkgName] + } + } + + reclaimEnable := r.EnableReclaim() + if pkgName != "" && nonReclaimablePackages.Has(pkgName) { + reclaimEnable = false // override reclaim Enable if the resource package is non-reclaimable } regionName := r.Name() - dedicatedRequirements[regionName] = general.Max(1, int(controlKnob[configapi.ControlKnobNonReclaimedCPURequirement].Value)) + ri.requirements[regionName] = general.Max(1, int(controlKnob[configapi.ControlKnobNonReclaimedCPURequirement].Value)) if r.IsNumaBinding() { numaBindingSize := r.GetBindingNumas().Size() if numaBindingSize == 0 { - return regionInfo{}, fmt.Errorf("numa binding size is zero, region name: %s", r.Name()) + return regionInfo{}, nil, fmt.Errorf("numa binding size is zero, region name: %s", r.Name()) } - dedicatedRequests[regionName] = int(math.Ceil(r.GetPodsRequest() / float64(numaBindingSize))) + ri.requests[regionName] = int(math.Ceil(r.GetPodsRequest() / float64(numaBindingSize))) } else { - dedicatedRequests[regionName] = int(math.Ceil(r.GetPodsRequest())) + ri.requests[regionName] = int(math.Ceil(r.GetPodsRequest())) } - dedicatedEnable[regionName] = r.EnableReclaim() - dedicatedPodSet[regionName] = r.GetPods() - if quota, ok := controlKnob[configapi.ControlKnobReclaimedCoresCPUQuota]; ok { - if minReclaimedCoresCPUQuota == -1 || quota.Value < minReclaimedCoresCPUQuota { - minReclaimedCoresCPUQuota = quota.Value + ri.reclaimEnable[regionName] = reclaimEnable + ri.podSet[regionName] = r.GetPods() + if reclaimEnable { + if quota, ok := controlKnob[configapi.ControlKnobReclaimedCoresCPUQuota]; ok { + if ri.minReclaimedCoresCPUQuota == -1 || quota.Value < ri.minReclaimedCoresCPUQuota { + ri.minReclaimedCoresCPUQuota = quota.Value + } } } + ri.regionMap[regionName] = r } - return regionInfo{ - requirements: dedicatedRequirements, - requests: dedicatedRequests, - reclaimEnable: dedicatedEnable, - podSet: dedicatedPodSet, - minReclaimedCoresCPUQuota: minReclaimedCoresCPUQuota, - }, nil + return unpinnedRegionInfo, pinnedRegionInfos, nil +} + +type pinnedCPUSetAllRegionInfo struct { + shareRegionInfo regionInfo + isolationRegionInfo isolationRegionInfo + dedicatedRegionInfos regionInfo +} + +func initPinnedCPUSetAllRegionInfo() *pinnedCPUSetAllRegionInfo { + return &pinnedCPUSetAllRegionInfo{ + shareRegionInfo: initRegionInfo(), + isolationRegionInfo: initIsolationRegionInfo(), + dedicatedRegionInfos: initRegionInfo(), + } +} + +func getPinnedCPUSetAllRegionInfo( + shareRegionInfo map[string]*regionInfo, + isolationRegionInfo map[string]*isolationRegionInfo, + dedicatedRegionInfos map[string]*regionInfo, +) map[string]*pinnedCPUSetAllRegionInfo { + res := make(map[string]*pinnedCPUSetAllRegionInfo) + for pkgName, info := range shareRegionInfo { + _, ok := res[pkgName] + if !ok { + res[pkgName] = initPinnedCPUSetAllRegionInfo() + } + res[pkgName].shareRegionInfo = *info + } + + for regionName, info := range isolationRegionInfo { + _, ok := res[regionName] + if !ok { + res[regionName] = initPinnedCPUSetAllRegionInfo() + } + res[regionName].isolationRegionInfo = *info + } + + for regionName, info := range dedicatedRegionInfos { + _, ok := res[regionName] + if !ok { + res[regionName] = initPinnedCPUSetAllRegionInfo() + } + res[regionName].dedicatedRegionInfos = *info + } + + return res } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go index eeca28b187..2b0da4bef3 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go @@ -38,6 +38,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/metrics" metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool" "github.com/kubewharf/katalyst-core/pkg/util/machine" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) type FakeRegion struct { @@ -83,6 +84,11 @@ func (fake *FakeRegion) OwnerPoolName() string { return fake.ownerPoolName } +func (fake *FakeRegion) GetResourcePackageName() string { + _, pkgName := resourcepackage.UnwrapOwnerPoolName(fake.ownerPoolName) + return pkgName +} + func (fake *FakeRegion) IsEmpty() bool { return false } @@ -267,10 +273,152 @@ func TestAssembleProvision(t *testing.T) { name string enableReclaimed bool allowSharedCoresOverlapReclaimedCores bool + disableReclaimSelector string + resourcePackageConfig types.ResourcePackageConfig poolInfos []testCasePoolConfig + wantErr bool expectPoolEntries map[string]map[int]types.CPUResource expectPoolOverlapInfo map[string]map[int]map[string]int }{ + { + name: "test-disable-reclaim-pkg-complex", + enableReclaimed: true, + disableReclaimSelector: "disable-reclaim=true", + allowSharedCoresOverlapReclaimedCores: true, + resourcePackageConfig: types.ResourcePackageConfig{ + 0: map[string]*types.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(1, 2, 3), // size 3 + }, + "pkg2": { + Attributes: map[string]string{"disable-reclaim": "false"}, + PinnedCPUSet: machine.NewCPUSet(4, 5), // size 2 + }, + }, + 1: map[string]*types.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(1, 2, 3, 4, 5), // size 5 + }, + }, + }, + poolInfos: []testCasePoolConfig{ + { + poolName: "share", // ownerPoolName is share, pkg is empty + poolType: configapi.QoSRegionTypeShare, + numa: machine.NewCPUSet(0), + isNumaBinding: false, + provision: types.ControlKnob{ + configapi.ControlKnobNonReclaimedCPURequirement: {Value: 6}, + }, + }, + { + poolName: "share-NUMA1", // ownerPoolName is share-NUMA1, pkg is NUMA1 + poolType: configapi.QoSRegionTypeShare, + numa: machine.NewCPUSet(1), + isNumaBinding: true, + provision: types.ControlKnob{ + configapi.ControlKnobNonReclaimedCPURequirement: {Value: 8}, + }, + }, + { + poolName: "isolation-NUMA1", + poolType: configapi.QoSRegionTypeIsolation, + numa: machine.NewCPUSet(1), + isNumaBinding: true, + provision: types.ControlKnob{ + configapi.ControlKnobNonIsolatedUpperCPUSize: {Value: 8}, + configapi.ControlKnobNonIsolatedLowerCPUSize: {Value: 4}, + }, + }, + }, + expectPoolEntries: map[string]map[int]types.CPUResource{ + "share": { + -1: types.CPUResource{Size: 19, Quota: -1}, // allow expand to full size + }, + "share-NUMA1": { + 1: types.CPUResource{Size: 11, Quota: -1}, // NUMA1 total 24, isolation 8, share req 8. allow expand but max is 24-8=16? + }, + "isolation-NUMA1": { + 1: types.CPUResource{Size: 8, Quota: -1}, + }, + "reserve": { + -1: types.CPUResource{Size: 0, Quota: -1}, + }, + "reclaim": { + // NUMA 0: available 24, isolated 0, unused non-reclaimable: pkg1(size 3) - allocated 0 = 3 + // overlapReclaim pool calculation: shareReclaimCoresSize = 24 - 0 - 0 - 6 - 0 - 3 = 15 + // reclaimedCoresSize = 15 + 0 = 15 + // overlapSharePoolSizes = 24, overlapReclaimSize = 15 + -1: types.CPUResource{Size: 2, Quota: -1}, + // NUMA 1: available 24, isolated 8, unused non-reclaimable: pkg1(size 5) - allocated 0 = 5 + // shareReclaimCoresSize = 24 - 8 - 0 - 8 - 0 - 5 = 3 + // reclaimedCoresSize = 3 (but reservedForReclaim is 4, so it should be regulated to 4) + // if regulated to 4, then overlapReclaimSize is 4 + // nonOverlap is 4-4=0 + 1: types.CPUResource{Size: 0, Quota: -1}, + }, + }, + expectPoolOverlapInfo: map[string]map[int]map[string]int{ + "reclaim": { + -1: map[string]int{"share": 13}, // total unused non-reclaimable is 3. share size is 24, req is 6, max reclaim is 15. overlap is 15. + 1: map[string]int{"share-NUMA1": 4}, + }, + }, + }, + { + name: "test-disable-reclaim-pkg", + enableReclaimed: true, + disableReclaimSelector: "disable-reclaim=true", + resourcePackageConfig: types.ResourcePackageConfig{ + 0: map[string]*types.ResourcePackageState{ + "pkg1": { + Attributes: map[string]string{"disable-reclaim": "true"}, + PinnedCPUSet: machine.NewCPUSet(1, 2, 3), // size 3 + }, + "pkg2": { + Attributes: map[string]string{"disable-reclaim": "false"}, + PinnedCPUSet: machine.NewCPUSet(4, 5), // size 2 + }, + }, + }, + poolInfos: []testCasePoolConfig{ + { + poolName: "share", + poolType: configapi.QoSRegionTypeShare, + numa: machine.NewCPUSet(0), + isNumaBinding: false, + provision: types.ControlKnob{ + configapi.ControlKnobNonReclaimedCPURequirement: {Value: 6}, + }, + }, + { + poolName: "share-NUMA1", + poolType: configapi.QoSRegionTypeShare, + numa: machine.NewCPUSet(1), + isNumaBinding: true, + provision: types.ControlKnob{ + configapi.ControlKnobNonReclaimedCPURequirement: {Value: 8}, + }, + }, + }, + expectPoolEntries: map[string]map[int]types.CPUResource{ + "share": { + -1: types.CPUResource{Size: 6, Quota: -1}, + }, + "share-NUMA1": { + 1: types.CPUResource{Size: 8, Quota: -1}, + }, + "reserve": { + -1: types.CPUResource{Size: 0, Quota: -1}, + }, + "reclaim": { + -1: types.CPUResource{Size: 15, Quota: -1}, // Originally 18, but we deducted 3 unused non-reclaimable + 1: types.CPUResource{Size: 16, Quota: -1}, + }, + }, + }, { name: "test1", enableReclaimed: true, @@ -988,6 +1136,11 @@ func TestAssembleProvision(t *testing.T) { "reclaim": {-1: map[string]int{"share-a": 18, "share-b": 16}}, }, }, + { + name: "test with invalid disable-reclaim selector", + disableReclaimSelector: "disable-reclaim=true,,invalid", + wantErr: true, + }, } reservedForReclaim := map[int]int{ @@ -1005,7 +1158,7 @@ func TestAssembleProvision(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - conf := generateTestConf(t, tt.enableReclaimed) + conf := generateTestConf(t, tt.enableReclaimed, tt.disableReclaimSelector) genericCtx, err := katalyst_base.GenerateFakeGenericContext([]runtime.Object{}) require.NoError(t, err) @@ -1019,6 +1172,11 @@ func TestAssembleProvision(t *testing.T) { metaCache, err := metacache.NewMetaCacheImp(conf, metricspool.DummyMetricsEmitterPool{}, metric.NewFakeMetricsFetcher(metrics.DummyMetrics{})) require.NoError(t, err) + if tt.resourcePackageConfig != nil { + require.NoError(t, metaCache.SetResourcePackageConfig(tt.resourcePackageConfig)) + } else { + require.NoError(t, metaCache.SetResourcePackageConfig(types.ResourcePackageConfig{0: map[string]*types.ResourcePackageState{}})) + } nonBindingNumas := machine.NewCPUSet() for numaID := range numaAvailable { @@ -1045,6 +1203,10 @@ func TestAssembleProvision(t *testing.T) { common := NewProvisionAssemblerCommon(conf, nil, ®ionMap, &reservedForReclaim, &numaAvailable, &nonBindingNumas, &tt.allowSharedCoresOverlapReclaimedCores, metaCache, metaServer, metrics.DummyMetrics{}) result, err := common.AssembleProvision() + if tt.wantErr { + require.Error(t, err) + return + } require.NoErrorf(t, err, "failed to AssembleProvision: %s", err) require.NotNil(t, result, "invalid assembler result") t.Logf("%v", result) @@ -1056,7 +1218,7 @@ func TestAssembleProvision(t *testing.T) { } } -func generateTestConf(t *testing.T, enableReclaim bool) *config.Configuration { +func generateTestConf(t *testing.T, enableReclaim bool, disableReclaimSelector string) *config.Configuration { conf, err := options.NewOptions().Config() require.NoError(t, err) require.NotNil(t, conf) @@ -1072,5 +1234,8 @@ func generateTestConf(t *testing.T, enableReclaim bool) *config.Configuration { configapi.QoSRegionTypeShare: {types.CPUProvisionPolicyCanonical}, } conf.GetDynamicConfiguration().EnableReclaim = enableReclaim + if disableReclaimSelector != "" { + conf.GetDynamicConfiguration().DisableReclaimPinnedCPUSetResourcePackageSelector = disableReclaimSelector + } return conf } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/isolation/isolator_load.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/isolation/isolator_load.go index 2eb1f7efb8..b237fc20ba 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/isolation/isolator_load.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/isolation/isolator_load.go @@ -69,7 +69,7 @@ type LoadIsolator struct { // map from pod/container pair to containerIsolationState states sync.Map - configTranslator *general.CommonSuffixTranslator + configTranslator general.SuffixTranslator } func NewLoadIsolator(conf *config.Configuration, _ interface{}, emitter metrics.MetricEmitter, @@ -82,7 +82,7 @@ func NewLoadIsolator(conf *config.Configuration, _ interface{}, emitter metrics. metaReader: metaCache, metaServer: metaServer, - configTranslator: general.NewCommonSuffixTranslator(commonstate.NUMAPoolInfix), + configTranslator: commonstate.OwnerPoolNameTranslator, } } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/helper.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/helper.go new file mode 100644 index 0000000000..207e847574 --- /dev/null +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/helper.go @@ -0,0 +1,27 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package region + +import ( + "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" +) + +// GetResourcePackageName returns resource package name from container info +func GetResourcePackageName(ci *types.ContainerInfo) string { + return resourcepackage.GetResourcePackageName(ci.Annotations) +} diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go index 2ed48138d4..768afa1a45 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region.go @@ -22,6 +22,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/machine" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) // QoSRegion is internal abstraction, managing a group of containers with similar QoS sensitivity @@ -33,6 +34,10 @@ type QoSRegion interface { Type() configapi.QoSRegionType // OwnerPoolName returns region's owner pool name OwnerPoolName() string + // GetResourcePackageName returns the resource package name of this region. + // It is derived from region ownerPoolName and is used to associate regions with + // resource package pinned cpuset information in sysadvisor. + GetResourcePackageName() string // IsEmpty returns true if no container remains in region IsEmpty() bool @@ -95,8 +100,11 @@ func GetRegionBasicMetricTags(r QoSRegion) []metrics.MetricTag { provisionPolicyPrior, provisionPolicyInUse := r.GetProvisionPolicy() headroomPolicyPrior, headroomPolicyInUse := r.GetHeadRoomPolicy() + // regionName is the name of the region, without the resource package suffix for isolation region + regionName, _ := resourcepackage.UnwrapOwnerPoolName(r.Name()) tags := []metrics.MetricTag{ - {Key: "region_name", Val: r.Name()}, + {Key: "region_name", Val: regionName}, + {Key: "resource_package_name", Val: r.GetResourcePackageName()}, {Key: "region_type", Val: string(r.Type())}, {Key: "owner_pool_name", Val: r.OwnerPoolName()}, {Key: "pool_type", Val: commonstate.GetPoolType(r.OwnerPoolName())}, diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go index 961f34c4d7..26951caa67 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_base.go @@ -171,8 +171,10 @@ type QoSRegionBase struct { name string ownerPoolName string - regionType v1alpha1.QoSRegionType - regionStatus types.RegionStatus + // resourcePackageName is derived from ownerPoolName by resourcepackage.UnwrapOwnerPoolName. + resourcePackageName string + regionType v1alpha1.QoSRegionType + regionStatus types.RegionStatus types.ResourceEssentials types.ControlEssentials @@ -228,15 +230,16 @@ type QoSRegionBase struct { } // NewQoSRegionBase returns a base qos region instance with common region methods -func NewQoSRegionBase(name string, ownerPoolName string, regionType v1alpha1.QoSRegionType, +func NewQoSRegionBase(name string, ownerPoolName string, resourcePackageName string, regionType v1alpha1.QoSRegionType, conf *config.Configuration, extraConf interface{}, isNumaBinding bool, isNumaExclusive bool, metaReader metacache.MetaReader, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, ) *QoSRegionBase { r := &QoSRegionBase{ - conf: conf, - name: name, - ownerPoolName: ownerPoolName, - regionType: regionType, + conf: conf, + name: name, + ownerPoolName: ownerPoolName, + resourcePackageName: resourcePackageName, + regionType: regionType, bindingNumas: machine.NewCPUSet(), podSet: make(types.PodSet), @@ -311,6 +314,12 @@ func (r *QoSRegionBase) OwnerPoolName() string { return r.ownerPoolName } +func (r *QoSRegionBase) GetResourcePackageName() string { + r.Lock() + defer r.Unlock() + return r.resourcePackageName +} + func (r *QoSRegionBase) IsEmpty() bool { r.Lock() defer r.Unlock() diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go index c4862279fc..9718d8657b 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_dedicated.go @@ -64,7 +64,7 @@ func NewQoSRegionDedicated(ci *types.ContainerInfo, conf *config.Configuration, isNumaBinding := numaID != commonstate.FakedNUMAID r := &QoSRegionDedicated{ - QoSRegionBase: NewQoSRegionBase(regionName, ci.OwnerPoolName, configapi.QoSRegionTypeDedicated, conf, extraConf, + QoSRegionBase: NewQoSRegionBase(regionName, ci.OwnerPoolName, GetResourcePackageName(ci), configapi.QoSRegionTypeDedicated, conf, extraConf, isNumaBinding, ci.IsDedicatedNumaExclusive(), metaReader, metaServer, emitter), } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go index a221666598..5a24b7a660 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_isolation.go @@ -27,6 +27,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/machine" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) const ( @@ -55,8 +56,10 @@ func NewQoSRegionIsolation(ci *types.ContainerInfo, customRegionName string, con if isNumaBinding { ownerPoolName = isolationRegionNUMAOwnerPoolName } + + _, pkgName := resourcepackage.UnwrapOwnerPoolName(ci.OriginOwnerPoolName) r := &QoSRegionIsolation{ - QoSRegionBase: NewQoSRegionBase(regionName, ownerPoolName, configapi.QoSRegionTypeIsolation, conf, extraConf, isNumaBinding, false, metaReader, metaServer, emitter), + QoSRegionBase: NewQoSRegionBase(resourcepackage.WrapOwnerPoolName(regionName, pkgName), ownerPoolName, pkgName, configapi.QoSRegionTypeIsolation, conf, extraConf, isNumaBinding, false, metaReader, metaServer, emitter), } if isNumaBinding { r.bindingNumas = machine.NewCPUSet(numaID) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go index f0a5206746..b94ee92fbb 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region/region_share.go @@ -40,7 +40,7 @@ import ( type QoSRegionShare struct { *QoSRegionBase - configTranslator *general.CommonSuffixTranslator + configTranslator general.SuffixTranslator } // NewQoSRegionShare returns a region instance for shared pool @@ -59,8 +59,8 @@ func NewQoSRegionShare(ci *types.ContainerInfo, conf *config.Configuration, extr // When put isolation pods back to share pool, advisor should create a new share region with OriginOwnerPoolName (OriginOwnerPoolName != OwnerPoolName). isNumaBinding := numaID != commonstate.FakedNUMAID r := &QoSRegionShare{ - QoSRegionBase: NewQoSRegionBase(regionName, ci.OriginOwnerPoolName, configapi.QoSRegionTypeShare, conf, extraConf, isNumaBinding, false, metaReader, metaServer, emitter), - configTranslator: general.NewCommonSuffixTranslator(commonstate.NUMAPoolInfix), + QoSRegionBase: NewQoSRegionBase(regionName, ci.OriginOwnerPoolName, GetResourcePackageName(ci), configapi.QoSRegionTypeShare, conf, extraConf, isNumaBinding, false, metaReader, metaServer, emitter), + configTranslator: commonstate.OwnerPoolNameTranslator, } if isNumaBinding { diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/helper/memory.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/helper/memory.go index 2b4ad82e72..49589c87b2 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/helper/memory.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/helper/memory.go @@ -35,8 +35,6 @@ import ( "github.com/kubewharf/katalyst-core/pkg/util/native" ) -var configTranslator = general.NewCommonSuffixTranslator(commonstate.NUMAPoolInfix) - func GetAvailableNUMAsAndReclaimedCores(conf *config.Configuration, metaReader metacache.MetaReader, metaServer *metaserver.MetaServer) (machine.CPUSet, []*types.ContainerInfo, error) { var errList []error @@ -111,7 +109,7 @@ func GetAvailableNUMAsAndReclaimedCores(conf *config.Configuration, metaReader m } if containerInfo.IsSharedNumaBinding() && - sets.NewString(dynamicConf.DisableReclaimSharePools...).Has(configTranslator.Translate(containerInfo.OriginOwnerPoolName)) { + sets.NewString(dynamicConf.DisableReclaimSharePools...).Has(commonstate.OwnerPoolNameTranslator.Translate(containerInfo.OriginOwnerPoolName)) { bindingResult, err := containerInfo.GetActualNUMABindingResult() if err != nil { errList = append(errList, err) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor.go index 7b6854407a..724f6342d8 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor.go @@ -157,7 +157,7 @@ func (ra *memoryResourceAdvisor) GetHeadroom() (resource.Quantity, map[int]resou return resource.Quantity{}, nil, fmt.Errorf("failed to get valid headroom") } -func (ra *memoryResourceAdvisor) UpdateAndGetAdvice() (interface{}, error) { +func (ra *memoryResourceAdvisor) UpdateAndGetAdvice(ctx context.Context) (interface{}, error) { startTime := time.Now() defer func() { general.InfoS("finished", "duration", time.Since(startTime)) @@ -194,8 +194,9 @@ func (ra *memoryResourceAdvisor) update() (*types.InternalMemoryCalculationResul for _, headroomPolicy := range ra.headroomPolices { // capacity and reserved can both be adjusted dynamically during running process headroomPolicy.SetEssentials(types.ResourceEssentials{ - EnableReclaim: ra.conf.GetDynamicConfiguration().EnableReclaim, - ResourceUpperBound: float64(ra.metaServer.MemoryCapacity), + EnableReclaim: ra.conf.GetDynamicConfiguration().EnableReclaim, + // Use NormalMemoryCapacity which excludes static hugepages for accurate upper bound calculation + ResourceUpperBound: float64(ra.metaServer.MemoryTopology.NormalMemoryCapacity), ReservedForAllocate: reservedForAllocate.AsApproximateFloat64(), }) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go index 0463ac0697..8ee5a824b0 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go @@ -123,6 +123,7 @@ func newTestMemoryAdvisor(t *testing.T, pods []*v1.Pod, checkpointDir, stateFile require.NoError(t, err) memoryTopology, err := machine.GenerateDummyMemoryTopology(4, 500<<30) require.NoError(t, err) + memoryTopology.NormalMemoryCapacity = 1000 << 30 extraTopology, err := machine.GenerateDummyExtraTopology(4) require.NoError(t, err) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/headroompolicy/policy_canonical.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/headroompolicy/policy_canonical.go index 5abd5a8a1e..043d2895e8 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/headroompolicy/policy_canonical.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/headroompolicy/policy_canonical.go @@ -95,7 +95,8 @@ func (p *PolicyCanonical) estimateNonReclaimedQoSMemoryRequirement() (float64, e if ci.ContainerType == v1alpha1.ContainerType_MAIN { bindingNumas := machine.GetCPUAssignmentNUMAs(ci.TopologyAwareAssignments) for _, numaID := range bindingNumas.ToSliceInt() { - memoryCap, ok := p.metaServer.MemoryDetails[numaID] + // Use NormalMemoryDetails which excludes static hugepages for accurate per-NUMA capacity + memoryCap, ok := p.metaServer.NormalMemoryDetails[numaID] if !ok { errList = append(errList, fmt.Errorf("get memory capacity of numa %v failed", numaID)) return true diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/memory_provisioner.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/memory_provisioner.go index 49ae8e89a0..80496df43e 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/memory_provisioner.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/memory_provisioner.go @@ -100,8 +100,9 @@ func (m *memoryProvisioner) Reconcile(status *types.MemoryPressureStatus) (err e ReservedResourceForAllocate[v1.ResourceMemory] m.policy.SetEssentials( types.ResourceEssentials{ - EnableReclaim: m.conf.GetDynamicConfiguration().EnableReclaim, - ResourceUpperBound: float64(m.metaServer.MemoryCapacity), + EnableReclaim: m.conf.GetDynamicConfiguration().EnableReclaim, + // Use NormalMemoryCapacity which excludes static hugepages for accurate upper bound calculation + ResourceUpperBound: float64(m.metaServer.MemoryTopology.NormalMemoryCapacity), ReservedForAllocate: reservedForAllocate.AsApproximateFloat64(), }) err = m.policy.Update() diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical.go index d38e0ad33a..fd9bac3c4e 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical.go @@ -77,7 +77,8 @@ func (p *PolicyCanonical) Update() error { } memoryProvisions[numaID] += uint64(memFreeNuma.Value) - memoryTotals[numaID] += uint64(memTotalNuma.Value) + // static huge pages should be excluded from total memory + memoryTotals[numaID] += uint64(memTotalNuma.Value - float64(p.metaServer.StaticHugePagesDetails[numaID])) availNUMATotal += memTotalNuma.Value general.InfoS("numa memory free", "numaID", numaID, diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical_test.go index ccda066168..0a323b6455 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/provisioner/policy/policy_canonical_test.go @@ -122,10 +122,11 @@ func TestPolicyCanonical(t *testing.T) { now := time.Now() type fields struct { - podList []*v1.Pod - containers []*types.ContainerInfo - essentials types.ResourceEssentials - setFakeMetric func(store *metric.FakeMetricsFetcher) + podList []*v1.Pod + containers []*types.ContainerInfo + essentials types.ResourceEssentials + setFakeMetric func(store *metric.FakeMetricsFetcher) + staticHugePages machine.MemoryDetails } tests := []struct { @@ -397,6 +398,34 @@ func TestPolicyCanonical(t *testing.T) { 1: 0, }, }, + { + name: "normal: with large static huge pages causing clamp", + fields: fields{ + podList: []*v1.Pod{}, + containers: []*types.ContainerInfo{}, + essentials: types.ResourceEssentials{ + EnableReclaim: true, + ResourceUpperBound: 100 << 30, + ReservedForAllocate: 0, + }, + setFakeMetric: func(store *metric.FakeMetricsFetcher) { + store.SetNodeMetric(pkgconsts.MetricMemScaleFactorSystem, utilmetric.MetricData{Value: 0, Time: &now}) + store.SetNumaMetric(0, pkgconsts.MetricMemTotalNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now}) + store.SetNumaMetric(1, pkgconsts.MetricMemTotalNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now}) + store.SetNumaMetric(0, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 80 << 30, Time: &now}) + store.SetNumaMetric(1, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 80 << 30, Time: &now}) + }, + staticHugePages: machine.MemoryDetails{ + 0: 60 << 30, + 1: 0, + }, + }, + wantErr: false, + want: machine.MemoryDetails{ + 0: 40 << 30, + 1: 80 << 30, + }, + }, } for _, tt := range tests { @@ -425,6 +454,7 @@ func TestPolicyCanonical(t *testing.T) { } metaServer := generateTestMetaServer(t, tt.fields.podList, metricsFetcher) + metaServer.StaticHugePagesDetails = tt.fields.staticHugePages p := NewPolicyCanonical(conf, nil, metaCache, metaServer, metrics.DummyMetrics{}) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/resource.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/resource.go index 050d7405c2..d31c322ddd 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/resource.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/resource.go @@ -49,7 +49,7 @@ type SubResourceAdvisor interface { Run(ctx context.Context) // UpdateAndGetAdvice triggers resource provision update and returns the latest advice - UpdateAndGetAdvice() (interface{}, error) + UpdateAndGetAdvice(ctx context.Context) (interface{}, error) // GetHeadroom returns the latest resource headroom quantity for resource reporter GetHeadroom() (resource.Quantity, map[int]resource.Quantity, error) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/resource_stub.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/resource_stub.go index dbf2391eac..b90dfae25e 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/resource_stub.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/resource_stub.go @@ -91,7 +91,7 @@ func (s *SubResourceAdvisorStub) Name() string { func (s *SubResourceAdvisorStub) Run(ctx context.Context) { } -func (s *SubResourceAdvisorStub) UpdateAndGetAdvice() (interface{}, error) { +func (s *SubResourceAdvisorStub) UpdateAndGetAdvice(_ context.Context) (interface{}, error) { return nil, nil } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go b/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go index 2642b7b762..4e5adce601 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go @@ -48,6 +48,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) const ( @@ -130,7 +131,7 @@ func (cs *cpuServer) GetAdvice(ctx context.Context, request *cpuadvisor.GetAdvic } general.InfofV(6, "QRM CPU Plugin wanted feature gates: %v, among them sysadvisor supported feature gates: %v", lo.Keys(request.WantedFeatureGates), lo.Keys(supportedWantedFeatureGates)) - result, err := cs.updateAdvisor(supportedWantedFeatureGates) + result, err := cs.updateAdvisor(ctx, supportedWantedFeatureGates) if err != nil { general.Errorf("update advisor failed: %v", err) return nil, fmt.Errorf("update advisor failed: %w", err) @@ -251,7 +252,7 @@ func (cs *cpuServer) getAndPushAdvice(client cpuadvisor.CPUPluginClient, server // old asynchronous communication interface does not support feature gate negotiation. If necessary, upgrade to the synchronization interface. emptyMap := map[string]*advisorsvc.FeatureGate{} - result, err := cs.updateAdvisor(emptyMap) + result, err := cs.updateAdvisor(server.Context(), emptyMap) if err != nil { return err } @@ -273,7 +274,7 @@ func (cs *cpuServer) getAndPushAdvice(client cpuadvisor.CPUPluginClient, server return nil } -func (cs *cpuServer) updateAdvisor(featureGates map[string]*advisorsvc.FeatureGate) (*cpuInternalResult, error) { +func (cs *cpuServer) updateAdvisor(ctx context.Context, featureGates map[string]*advisorsvc.FeatureGate) (*cpuInternalResult, error) { // update feature gates in meta cache err := cs.metaCache.SetSupportedWantedFeatureGates(featureGates) if err != nil { @@ -282,7 +283,7 @@ func (cs *cpuServer) updateAdvisor(featureGates map[string]*advisorsvc.FeatureGa } // trigger advisor update and get latest advice - advisorRespRaw, err := cs.resourceAdvisor.UpdateAndGetAdvice() + advisorRespRaw, err := cs.resourceAdvisor.UpdateAndGetAdvice(ctx) if err != nil { _ = cs.emitter.StoreInt64(cs.genMetricsName(metricServerAdvisorUpdateFailed), int64(cs.period.Seconds()), metrics.MetricTypeNameCount) return nil, fmt.Errorf("get advice failed: %w", err) @@ -428,6 +429,52 @@ func (cs *cpuServer) updateMetaCacheInput(ctx context.Context, req *cpuadvisor.G var errs []error livingPoolNameSet := sets.NewString() + if req.GetResourcePackageConfig() == nil { + general.InfoS("resource package config is nil, skip updating meta cache") + _ = cs.metaCache.SetResourcePackageConfig(nil) + } else { + cfg := make(types.ResourcePackageConfig) + for numaID, numaConfig := range req.ResourcePackageConfig.NumaResourcePackages { + if numaConfig == nil { + continue + } + if _, ok := cfg[int(numaID)]; !ok { + cfg[int(numaID)] = make(map[string]*types.ResourcePackageState) + } + for pkgName, pkgConfig := range numaConfig.Packages { + if pkgConfig == nil { + continue + } + pinnedCpusetStr := pkgConfig.PinnedCpuset + var pinnedCpuset machine.CPUSet + if pinnedCpusetStr == "" { + pinnedCpuset = machine.NewCPUSet() + } else { + var err error + pinnedCpuset, err = machine.Parse(pinnedCpusetStr) + if err != nil { + return fmt.Errorf("failed to parse pinned cpuset: %v, numaID %d pkgName %s cpuset %q", err, numaID, pkgName, pinnedCpusetStr) + } + } + + var attributes map[string]string + if len(pkgConfig.Attributes) > 0 { + attributes = make(map[string]string, len(pkgConfig.Attributes)) + for k, v := range pkgConfig.Attributes { + attributes[k] = v + } + } + + cfg[int(numaID)][pkgName] = &types.ResourcePackageState{ + PinnedCPUSet: pinnedCpuset, + Attributes: attributes, + } + } + } + general.InfoS("updated resource package config", "cfg", cfg) + _ = cs.metaCache.SetResourcePackageConfig(cfg) + } + // update pool entries first, which are needed for updating container entries for entryName, entry := range req.Entries { poolInfo, ok := entry.Entries[commonstate.FakedContainerName] @@ -607,11 +654,28 @@ func (cs *cpuServer) setContainerInfoBasedOnContainerAllocationInfo( if info.Metadata.QosLevel == consts.PodAnnotationQoSLevelSharedCores && info.Metadata.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable { - originOwnerPoolName, err := commonstate.GetSpecifiedNUMABindingPoolName(info.Metadata.QosLevel, info.Metadata.Annotations) + poolName, err := commonstate.GetSpecifiedNUMABindingPoolName(info.Metadata.QosLevel, info.Metadata.Annotations) if err != nil { return fmt.Errorf("get specified numa binding pool name failed: %w", err) } - ci.OriginOwnerPoolName = originOwnerPoolName + + targetNUMAID, err := commonstate.GetSpecifiedNUMABindingNUMAID(info.Metadata.Annotations) + if err != nil { + return fmt.Errorf("get specified numa binding numa id failed: %w", err) + } + + pkgName := resourcepackage.GetResourcePackageName(info.Metadata.Annotations) + if pkgName != "" && poolName != commonstate.EmptyOwnerPoolName { + // get resource package config from meta cache, make sure it has been set already before setting owner pool name + cfg := cs.metaCache.GetResourcePackageConfig() + if pinnedSets, ok := cfg[targetNUMAID]; ok { + if state, exists := pinnedSets[pkgName]; exists && state.GetPinnedCPUSet().Size() > 0 { + poolName = resourcepackage.WrapOwnerPoolName(poolName, pkgName) + } + } + } + + ci.OriginOwnerPoolName = poolName } else { ci.OriginOwnerPoolName = commonstate.GetSpecifiedPoolName(info.Metadata.QosLevel, info.Metadata.Annotations[consts.PodAnnotationCPUEnhancementCPUSet]) } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server_test.go b/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server_test.go index 07332960a7..91342f249f 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server_test.go @@ -297,7 +297,7 @@ type mockCPUResourceAdvisor struct { err error } -func (m *mockCPUResourceAdvisor) UpdateAndGetAdvice() (interface{}, error) { +func (m *mockCPUResourceAdvisor) UpdateAndGetAdvice(_ context.Context) (interface{}, error) { if m.onUpdate != nil { m.onUpdate() } @@ -700,6 +700,15 @@ func TestCPUServerUpdateMetaCacheInput(t *testing.T) { }, }, }, + ResourcePackageConfig: &cpuadvisor.ResourcePackageConfig{ + NumaResourcePackages: map[uint64]*cpuadvisor.NumaResourcePackageConfig{ + 0: { + Packages: map[string]*cpuadvisor.ResourcePackageItemConfig{ + "pkgA": {PinnedCpuset: "2-3"}, + }, + }, + }, + }, } pods := []*v1.Pod{} for podUID, entries := range request.Entries { @@ -777,6 +786,15 @@ func TestCPUServerUpdateMetaCacheInput(t *testing.T) { err := cs.updateMetaCacheInput(context.Background(), request) require.NoError(t, err) + expectedResourcePackageConfig := types.ResourcePackageConfig{ + 0: map[string]*types.ResourcePackageState{ + "pkgA": { + PinnedCPUSet: machine.MustParse("2-3"), + }, + }, + } + require.Equal(t, expectedResourcePackageConfig, cs.metaCache.GetResourcePackageConfig()) + expectedContainerInfo := []*types.ContainerInfo{ { PodUID: "pod2", @@ -871,3 +889,25 @@ func TestCPUServerUpdateMetaCacheInput(t *testing.T) { require.Equal(t, expectedPoolInfo, actualPoolInfo) } } + +func TestCPUServerUpdateMetaCacheInput_InvalidResourcePackageCPUSet(t *testing.T) { + t.Parallel() + + cs := newTestCPUServer(t, nil, nil) + request := &cpuadvisor.GetAdviceRequest{ + Entries: map[string]*cpuadvisor.ContainerAllocationInfoEntries{}, + ResourcePackageConfig: &cpuadvisor.ResourcePackageConfig{ + NumaResourcePackages: map[uint64]*cpuadvisor.NumaResourcePackageConfig{ + 0: { + Packages: map[string]*cpuadvisor.ResourcePackageItemConfig{ + "pkgA": {PinnedCpuset: "bad"}, + }, + }, + }, + }, + } + + err := cs.updateMetaCacheInput(context.Background(), request) + require.Error(t, err) + require.Equal(t, types.ResourcePackageConfig{}, cs.metaCache.GetResourcePackageConfig()) +} diff --git a/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server.go b/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server.go index 04923521c1..b30111ac6b 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server.go @@ -146,7 +146,7 @@ func (ms *memoryServer) GetAdvice(ctx context.Context, request *advisorsvc.GetAd general.InfofV(6, "QRM Memory Plugin wanted feature gates: %v, among them sysadvisor supported feature gates: %v", lo.Keys(request.WantedFeatureGates), lo.Keys(supportedWantedFeatureGates)) - result, err := ms.updateAdvisor(supportedWantedFeatureGates) + result, err := ms.updateAdvisor(ctx, supportedWantedFeatureGates) if err != nil { general.Errorf("update advisor failed: %v", err) return nil, fmt.Errorf("update advisor failed: %w", err) @@ -254,8 +254,8 @@ type memoryInternalResult struct { ExtraEntries []*advisorsvc.CalculationInfo } -func (ms *memoryServer) updateAdvisor(supportedWantedFeatureGates map[string]*advisorsvc.FeatureGate) (*memoryInternalResult, error) { - advisorRespRaw, err := ms.resourceAdvisor.UpdateAndGetAdvice() +func (ms *memoryServer) updateAdvisor(ctx context.Context, supportedWantedFeatureGates map[string]*advisorsvc.FeatureGate) (*memoryInternalResult, error) { + advisorRespRaw, err := ms.resourceAdvisor.UpdateAndGetAdvice(ctx) if err != nil { return nil, fmt.Errorf("get memory advice failed: %w", err) } @@ -271,7 +271,7 @@ func (ms *memoryServer) updateAdvisor(supportedWantedFeatureGates map[string]*ad func (ms *memoryServer) getAndPushAdvice(server advisorsvc.AdvisorService_ListAndWatchServer) error { // old asynchronous communication interface does not support feature gate negotiation. If necessary, upgrade to the synchronization interface. emptyMap := map[string]*advisorsvc.FeatureGate{} - result, err := ms.updateAdvisor(emptyMap) + result, err := ms.updateAdvisor(server.Context(), emptyMap) if err != nil { _ = ms.emitter.StoreInt64(ms.genMetricsName(metricServerAdvisorUpdateFailed), int64(ms.period.Seconds()), metrics.MetricTypeNameCount) return fmt.Errorf("update advisor failed: %w", err) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server_test.go b/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server_test.go index 89526b4c0f..367562aa91 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/server/memory_server_test.go @@ -200,7 +200,7 @@ type MockMemoryAdvisor struct { err error } -func (a *MockMemoryAdvisor) UpdateAndGetAdvice() (interface{}, error) { +func (a *MockMemoryAdvisor) UpdateAndGetAdvice(_ context.Context) (interface{}, error) { return a.advice, a.err } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/server/server.go b/pkg/agent/sysadvisor/plugin/qosaware/server/server.go index 7724b4832a..a79066861e 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/server/server.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/server/server.go @@ -53,7 +53,7 @@ type subQRMServer interface { } type subResourceAdvisor interface { - UpdateAndGetAdvice() (interface{}, error) + UpdateAndGetAdvice(ctx context.Context) (interface{}, error) } type qrmServerWrapper struct { diff --git a/pkg/agent/sysadvisor/types/helper.go b/pkg/agent/sysadvisor/types/helper.go index 2ddfa8b2c7..0459c4ce02 100644 --- a/pkg/agent/sysadvisor/types/helper.go +++ b/pkg/agent/sysadvisor/types/helper.go @@ -27,6 +27,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" ) func (ci *ContainerInfo) IsNumaBinding() bool { @@ -56,6 +57,17 @@ func (ci *ContainerInfo) GetActualNUMABindingResult() (int, error) { return commonstate.GetSpecifiedNUMABindingNUMAID(ci.Annotations) } +// GetResourcePackageName returns the resource package name of the container. +// It retrieves the package name from the container's annotations. +// If the container info is nil or the annotation is missing, it returns an empty string. +func (ci *ContainerInfo) GetResourcePackageName() string { + if ci == nil { + return "" + } + + return resourcepackage.GetResourcePackageName(ci.Annotations) +} + func (ci *ContainerInfo) IsDedicatedNumaExclusive() bool { return ci.IsDedicatedNumaBinding() && ci.IsNumaExclusive() } diff --git a/pkg/agent/sysadvisor/types/helper_test.go b/pkg/agent/sysadvisor/types/helper_test.go index 0f2e248b66..634ce3da27 100644 --- a/pkg/agent/sysadvisor/types/helper_test.go +++ b/pkg/agent/sysadvisor/types/helper_test.go @@ -63,3 +63,52 @@ func TestClonePodEntries(t *testing.T) { assert.True(t, reflect.DeepEqual(copyPodEntries, podEntries)) } + +func TestContainerInfo_GetResourcePackageName(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + ci *ContainerInfo + expected string + }{ + { + name: "normal case", + ci: &ContainerInfo{ + Annotations: map[string]string{ + consts.PodAnnotationResourcePackageKey: "pkg1", + }, + }, + expected: "pkg1", + }, + { + name: "nil container info", + ci: nil, + expected: "", + }, + { + name: "nil annotations", + ci: &ContainerInfo{ + Annotations: nil, + }, + expected: "", + }, + { + name: "no resource package annotation", + ci: &ContainerInfo{ + Annotations: map[string]string{ + "other": "val", + }, + }, + expected: "", + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.expected, tt.ci.GetResourcePackageName()) + }) + } +} diff --git a/pkg/agent/sysadvisor/types/resource_package.go b/pkg/agent/sysadvisor/types/resource_package.go new file mode 100644 index 0000000000..97fec1b4f3 --- /dev/null +++ b/pkg/agent/sysadvisor/types/resource_package.go @@ -0,0 +1,88 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package types + +import "github.com/kubewharf/katalyst-core/pkg/util/machine" + +// ResourcePackageState stores the state of a resource package on a specific NUMA node. +type ResourcePackageState struct { + PinnedCPUSet machine.CPUSet + Attributes map[string]string +} + +func (s *ResourcePackageState) GetAttributes() map[string]string { + if s == nil { + return nil + } + return s.Attributes +} + +func (s *ResourcePackageState) GetPinnedCPUSet() machine.CPUSet { + if s == nil { + return machine.NewCPUSet() + } + return s.PinnedCPUSet +} + +// Clone returns a deep copy of ResourcePackageState. +func (s *ResourcePackageState) Clone() *ResourcePackageState { + if s == nil { + return nil + } + clone := &ResourcePackageState{ + PinnedCPUSet: s.PinnedCPUSet.Clone(), + } + if s.Attributes != nil { + clone.Attributes = make(map[string]string, len(s.Attributes)) + for k, v := range s.Attributes { + clone.Attributes[k] = v + } + } + return clone +} + +// ResourcePackageConfig stores resource package related configurations organized by NUMA node. +// It is used as an in-memory snapshot in sysadvisor metacache, and is expected to be deep-copied +// when being read/written across module boundaries. +// +// Key format: +// - first key: NUMA id +// - second key: resource package name +// - value: state for the resource package on that NUMA node +type ResourcePackageConfig map[int]map[string]*ResourcePackageState + +// Clone returns a deep copy of ResourcePackageConfig. +func (c ResourcePackageConfig) Clone() ResourcePackageConfig { + if c == nil { + return nil + } + + out := make(ResourcePackageConfig, len(c)) + for numaID, pkgMap := range c { + if pkgMap == nil { + out[numaID] = nil + continue + } + + outPkgMap := make(map[string]*ResourcePackageState, len(pkgMap)) + for pkgName, state := range pkgMap { + outPkgMap[pkgName] = state.Clone() + } + out[numaID] = outPkgMap + } + return out +} diff --git a/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go b/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go index 143ac97633..c72fc8261a 100644 --- a/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go +++ b/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go @@ -25,15 +25,16 @@ import ( ) type ReclaimedResourceConfiguration struct { - EnableReclaim bool - DisableReclaimSharePools []string - ReservedResourceForReport v1.ResourceList - MinReclaimedResourceForReport v1.ResourceList - MinIgnoredReclaimedResourceForReport v1.ResourceList - ReservedResourceForAllocate v1.ResourceList - MinReclaimedResourceForAllocate v1.ResourceList - NumaMinReclaimedResourceRatioForAllocate v1.ResourceList - NumaMinReclaimedResourceForAllocate v1.ResourceList + EnableReclaim bool + DisableReclaimSharePools []string + DisableReclaimPinnedCPUSetResourcePackageSelector string + ReservedResourceForReport v1.ResourceList + MinReclaimedResourceForReport v1.ResourceList + MinIgnoredReclaimedResourceForReport v1.ResourceList + ReservedResourceForAllocate v1.ResourceList + MinReclaimedResourceForAllocate v1.ResourceList + NumaMinReclaimedResourceRatioForAllocate v1.ResourceList + NumaMinReclaimedResourceForAllocate v1.ResourceList *cpuheadroom.CPUHeadroomConfiguration *memoryheadroom.MemoryHeadroomConfiguration diff --git a/pkg/config/agent/qrm/cpu_plugin.go b/pkg/config/agent/qrm/cpu_plugin.go index 6d8269b5c9..c213b72d72 100644 --- a/pkg/config/agent/qrm/cpu_plugin.go +++ b/pkg/config/agent/qrm/cpu_plugin.go @@ -19,6 +19,8 @@ package qrm import ( "time" + "k8s.io/apimachinery/pkg/labels" + "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/hintoptimizer" "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/irqtuner" ) @@ -69,6 +71,9 @@ type CPUDynamicPolicyConfig struct { EnableDefaultDedicatedCoresCPUBurst bool // EnableDefaultSharedCoresCPUBurst indicates whether to enable cpu burst for shared cores by default EnableDefaultSharedCoresCPUBurst bool + // IRQForbiddenPinnedResourcePackageAttributeSelector is the selector to filter pinned resource packages that are + // forbidden for irq binding. + IRQForbiddenPinnedResourcePackageAttributeSelector labels.Selector *hintoptimizer.HintOptimizerConfiguration *irqtuner.IRQTunerConfiguration diff --git a/pkg/config/agent/qrm/memory_plugin.go b/pkg/config/agent/qrm/memory_plugin.go index 25d936fa4b..3812f98e32 100644 --- a/pkg/config/agent/qrm/memory_plugin.go +++ b/pkg/config/agent/qrm/memory_plugin.go @@ -50,6 +50,8 @@ type MemoryQRMPluginConfig struct { // NUMABindResultResourceAllocationAnnotationKey: the annotation key for numa bind result resource allocation // it will be used to set cgroup path for numa bind result resource allocation NUMABindResultResourceAllocationAnnotationKey string + // ExtraMemoryResources: the slice of extra memory resources such as hugepages-* + ExtraMemoryResources []string // SockMemQRMPluginConfig: the configuration for sockmem limitation in cgroup and host level SockMemQRMPluginConfig // LogCacheQRMPluginConfig: the configuration for logcache evicting diff --git a/pkg/config/agent/qrm/qrm_base.go b/pkg/config/agent/qrm/qrm_base.go index 7f86fbfd83..beeb71c980 100644 --- a/pkg/config/agent/qrm/qrm_base.go +++ b/pkg/config/agent/qrm/qrm_base.go @@ -42,6 +42,8 @@ type GenericQRMPluginConfiguration struct { // IsInMemoryStore indicates whether we want to store the state in memory or on disk // if set true, the state will be stored in tmpfs EnableInMemoryState bool + // TopologyAllocationAnnotationKey is the annotation key that indicates the topology-aware allocations of containers. + TopologyAllocationAnnotationKey string *statedirectory.StateDirectoryConfiguration } diff --git a/pkg/consts/cnr.go b/pkg/consts/cnr.go new file mode 100644 index 0000000000..38aa788158 --- /dev/null +++ b/pkg/consts/cnr.go @@ -0,0 +1,40 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package consts + +// File cnr.go defines constant keys for CustomNodeResource (CNR) zone attributes. +// It centralizes the attribute names used by topology collectors and adapters so +// CNR producers and consumers stay consistent across the stack. + +const ( + // ZoneAttributeNameNUMADistance is the attribute key for NUMA distance list data. + // Example: a NUMA zone may expose "20,10" to represent distances to all NUMA nodes. + // Note: the value is derived from sysfs distance files and serialized as a string. + ZoneAttributeNameNUMADistance = "numa_distance" + // ZoneAttributeNameThreadTopologyInfo is the attribute key for thread sibling mapping. + // Example: "0:3,1:2,2:1,3:0" maps each CPU to its hyper-thread sibling. + // Note: this attribute is reported only when thread-topology reporting is enabled. + ZoneAttributeNameThreadTopologyInfo = "thread_topology_info" + // ZoneAttributeNameReservedCPUList is the attribute key for reserved CPUs in a NUMA zone. + // Example: "0-1,8-9" indicates CPUs reserved for system or agent usage on that NUMA node. + // Note: the value is a cpuset string and is emitted only when the set is non-empty. + ZoneAttributeNameReservedCPUList = "reserved_cpu_list" + // ZoneAttributeNameCPULists is the attribute key for CPU lists in a cache group zone. + // Example: a cache group zone may expose "2-5,10-13" to show its CPU membership. + // Note: this is typically reported for L3 cache groups and may be vendor-gated. + ZoneAttributeNameCPULists = "cpu_lists" +) diff --git a/pkg/consts/qrm.go b/pkg/consts/qrm.go index 90fd53fc4e..7fb7518de1 100644 --- a/pkg/consts/qrm.go +++ b/pkg/consts/qrm.go @@ -26,4 +26,8 @@ const ( const ( // QRMResourceAnnotationKeyNUMABindResult is the annotation key for the numa binding result QRMResourceAnnotationKeyNUMABindResult = "qrm.katalyst.kubewharf.io/numa_bind_result" + + // QRMPodAnnotationTopologyAllocationKey is the annotation key for pod annotation about the pod's topology aware allocations. + // It is set during allocation, and kubelet will read the annotation. + QRMPodAnnotationTopologyAllocationKey = "qrm.katalyst.kubewharf.io/topology_allocation" ) diff --git a/pkg/metaserver/resourcepackage/cached.go b/pkg/metaserver/resourcepackage/cached.go new file mode 100644 index 0000000000..9b44256cd9 --- /dev/null +++ b/pkg/metaserver/resourcepackage/cached.go @@ -0,0 +1,76 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcepackage + +import ( + "context" + "sync" + "time" + + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/kubewharf/katalyst-core/pkg/util/general" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" +) + +const ( + syncResourcePackageUpdatePeriod = 30 * time.Second +) + +type CachedResourcePackageManager struct { + mux sync.RWMutex + resourcePackageMap resourcepackage.NUMAResourcePackageItems + + ResourcePackageManager +} + +func (m *CachedResourcePackageManager) Run(stopCh <-chan struct{}) error { + m.updateResourcePackageMap() + go wait.Until(m.updateResourcePackageMap, syncResourcePackageUpdatePeriod, stopCh) + return nil +} + +func NewCachedResourcePackageManager(rpm ResourcePackageManager) *CachedResourcePackageManager { + m := &CachedResourcePackageManager{ + ResourcePackageManager: rpm, + } + return m +} + +func (m *CachedResourcePackageManager) NodeResourcePackages(_ context.Context) (resourcepackage.NUMAResourcePackageItems, error) { + m.mux.RLock() + defer m.mux.RUnlock() + return m.resourcePackageMap, nil +} + +func (m *CachedResourcePackageManager) updateResourcePackageMap() { + // Get resource package information from meta server + resourcePackageMap, err := m.ResourcePackageManager.NodeResourcePackages(context.Background()) + if err != nil { + general.Errorf("NodeResourcePackages failed with error: %v", err) + return + } + + m.mux.Lock() + defer m.mux.Unlock() + if apiequality.Semantic.DeepEqual(resourcePackageMap, m.resourcePackageMap) { + return + } + general.Infof("update resource package map from %+v to %+v", m.resourcePackageMap, resourcePackageMap) + m.resourcePackageMap = resourcePackageMap +} diff --git a/pkg/metaserver/resourcepackage/cached_test.go b/pkg/metaserver/resourcepackage/cached_test.go new file mode 100644 index 0000000000..b7d40d8aea --- /dev/null +++ b/pkg/metaserver/resourcepackage/cached_test.go @@ -0,0 +1,197 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcepackage + +import ( + "context" + "fmt" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package" +) + +// MockResourcePackageManager is a mock implementation of ResourcePackageManager +type MockResourcePackageManager struct { + lock sync.Mutex + returnItems resourcepackage.NUMAResourcePackageItems + returnErr error + called int +} + +func (m *MockResourcePackageManager) NodeResourcePackages(ctx context.Context) (resourcepackage.NUMAResourcePackageItems, error) { + m.lock.Lock() + defer m.lock.Unlock() + m.called++ + return m.returnItems, m.returnErr +} + +func (m *MockResourcePackageManager) ConvertNPDResourcePackages(npd *nodev1alpha1.NodeProfileDescriptor) (resourcepackage.NUMAResourcePackageItems, error) { + return nil, nil +} + +func (m *MockResourcePackageManager) setReturn(items resourcepackage.NUMAResourcePackageItems, err error) { + m.lock.Lock() + defer m.lock.Unlock() + m.returnItems = items + m.returnErr = err +} + +func (m *MockResourcePackageManager) getCalled() int { + m.lock.Lock() + defer m.lock.Unlock() + return m.called +} + +func TestNewCachedResourcePackageManager(t *testing.T) { + t.Parallel() + mockRPM := &MockResourcePackageManager{} + cachedMgr := NewCachedResourcePackageManager(mockRPM) + assert.NotNil(t, cachedMgr) + assert.Equal(t, mockRPM, cachedMgr.ResourcePackageManager) +} + +func TestCachedResourcePackageManager_NodeResourcePackages(t *testing.T) { + t.Parallel() + mockRPM := &MockResourcePackageManager{} + cachedMgr := NewCachedResourcePackageManager(mockRPM) + + expectedItems := make(resourcepackage.NUMAResourcePackageItems) + expectedItems[0] = map[string]resourcepackage.ResourcePackageItem{ + "test": {}, + } + + // Directly set the cache since we are in the same package + cachedMgr.resourcePackageMap = expectedItems + + // Test normal retrieval + items, err := cachedMgr.NodeResourcePackages(context.Background()) + assert.NoError(t, err) + assert.Equal(t, expectedItems, items) + + // Verify mock was NOT called (should use cache) + assert.Equal(t, 0, mockRPM.getCalled()) +} + +func TestCachedResourcePackageManager_Run(t *testing.T) { + t.Parallel() + mockRPM := &MockResourcePackageManager{} + cachedMgr := NewCachedResourcePackageManager(mockRPM) + + expectedItems := make(resourcepackage.NUMAResourcePackageItems) + expectedItems[0] = map[string]resourcepackage.ResourcePackageItem{ + "test": {}, + } + mockRPM.setReturn(expectedItems, nil) + + stopCh := make(chan struct{}) + defer close(stopCh) + + // Run calls updateResourcePackageMap synchronously once + err := cachedMgr.Run(stopCh) + assert.NoError(t, err) + + // Check if cache is updated + cachedMgr.mux.RLock() + items := cachedMgr.resourcePackageMap + cachedMgr.mux.RUnlock() + + assert.Equal(t, expectedItems, items) + assert.GreaterOrEqual(t, mockRPM.getCalled(), 1) +} + +func TestCachedResourcePackageManager_updateResourcePackageMap(t *testing.T) { + t.Parallel() + mockRPM := &MockResourcePackageManager{} + cachedMgr := NewCachedResourcePackageManager(mockRPM) + + // Case 1: Error from upstream + mockRPM.setReturn(nil, fmt.Errorf("some error")) + cachedMgr.updateResourcePackageMap() + assert.Nil(t, cachedMgr.resourcePackageMap) + + // Case 2: Success update + expectedItems := make(resourcepackage.NUMAResourcePackageItems) + expectedItems[0] = map[string]resourcepackage.ResourcePackageItem{ + "test": {}, + } + mockRPM.setReturn(expectedItems, nil) + cachedMgr.updateResourcePackageMap() + assert.Equal(t, expectedItems, cachedMgr.resourcePackageMap) + + // Case 3: No change (deep equal) + // We call it again. The logic should just return. + // We can't easily verify "return" happened without logs, but we verify state remains same. + cachedMgr.updateResourcePackageMap() + assert.Equal(t, expectedItems, cachedMgr.resourcePackageMap) + + // Case 4: Change + newItems := make(resourcepackage.NUMAResourcePackageItems) + newItems[0] = map[string]resourcepackage.ResourcePackageItem{ + "test2": {}, + } + mockRPM.setReturn(newItems, nil) + cachedMgr.updateResourcePackageMap() + assert.Equal(t, newItems, cachedMgr.resourcePackageMap) +} + +func TestCachedResourcePackageManager_Concurrency(t *testing.T) { + t.Parallel() + mockRPM := &MockResourcePackageManager{} + cachedMgr := NewCachedResourcePackageManager(mockRPM) + + stopCh := make(chan struct{}) + defer close(stopCh) + + // Setup initial data + items1 := make(resourcepackage.NUMAResourcePackageItems) + items1[0] = map[string]resourcepackage.ResourcePackageItem{"v1": {}} + mockRPM.setReturn(items1, nil) + + // Initialize + cachedMgr.Run(stopCh) + + // Concurrently read and update + var wg sync.WaitGroup + wg.Add(2) + + // Writer routine + go func() { + defer wg.Done() + for i := 0; i < 100; i++ { + mockRPM.setReturn(make(resourcepackage.NUMAResourcePackageItems), nil) + cachedMgr.updateResourcePackageMap() + time.Sleep(1 * time.Millisecond) + } + }() + + // Reader routine + go func() { + defer wg.Done() + for i := 0; i < 100; i++ { + _, err := cachedMgr.NodeResourcePackages(context.Background()) + assert.NoError(t, err) + time.Sleep(1 * time.Millisecond) + } + }() + + wg.Wait() +} diff --git a/pkg/metaserver/resourcepackage/manager.go b/pkg/metaserver/resourcepackage/manager.go index 6ab53b9fe6..c4c16a8c78 100644 --- a/pkg/metaserver/resourcepackage/manager.go +++ b/pkg/metaserver/resourcepackage/manager.go @@ -21,7 +21,6 @@ import ( "strconv" "github.com/pkg/errors" - apierrors "k8s.io/apimachinery/pkg/util/errors" nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" @@ -34,14 +33,14 @@ type ResourcePackageManager interface { // NodeResourcePackages returns the resource package division for the // specified node. The returned map's keys are NUMA IDs (as int) // and the values are slices of ResourcePackageItem (containing ResourcePackage and Config) - // belonging to that NUMA node: map[NUMA ID] -> []resourcepackage.ResourcePackageItem. - NodeResourcePackages(ctx context.Context) (map[int][]resourcepackage.ResourcePackageItem, error) + // belonging to that NUMA node: map[NUMA ID] -> map[package name]ResourcePackageItem. + NodeResourcePackages(ctx context.Context) (resourcepackage.NUMAResourcePackageItems, error) // ConvertNPDResourcePackages converts a given NodeProfileDescriptor to // resource packages. The returned map's keys are NUMA IDs (as int) // and the values are slices of ResourcePackageItem (containing ResourcePackage and Config) - // belonging to that NUMA node: map[NUMA ID] -> []resourcepackage.ResourcePackageItem. - ConvertNPDResourcePackages(npd *nodev1alpha1.NodeProfileDescriptor) (map[int][]resourcepackage.ResourcePackageItem, error) + // belonging to that NUMA node: map[NUMA ID] -> map[package name]ResourcePackageItem. + ConvertNPDResourcePackages(npd *nodev1alpha1.NodeProfileDescriptor) (resourcepackage.NUMAResourcePackageItems, error) } // resourcePackageManager is the default implementation of ResourcePackageManager @@ -53,7 +52,7 @@ type resourcePackageManager struct { // specified node. The returned map's keys are NUMA IDs (as int) // and the values are slices of ResourcePackageItem (containing ResourcePackage and Config) // belonging to that NUMA node: map[NUMA ID] -> []resourcepackage.ResourcePackageItem. -func (m *resourcePackageManager) NodeResourcePackages(ctx context.Context) (map[int][]resourcepackage.ResourcePackageItem, error) { +func (m *resourcePackageManager) NodeResourcePackages(ctx context.Context) (resourcepackage.NUMAResourcePackageItems, error) { npd, err := m.fetcher.GetNPD(ctx) if err != nil { return nil, errors.Wrap(err, "failed to get NPD from fetcher") @@ -65,9 +64,13 @@ func (m *resourcePackageManager) NodeResourcePackages(ctx context.Context) (map[ // resource packages. The returned map's keys are NUMA IDs (as int) // and the values are slices of ResourcePackageItem (containing ResourcePackage and Config) // belonging to that NUMA node: map[NUMA ID] -> []resourcepackage.ResourcePackageItem -func (m *resourcePackageManager) ConvertNPDResourcePackages(npd *nodev1alpha1.NodeProfileDescriptor) (map[int][]resourcepackage.ResourcePackageItem, error) { +func (m *resourcePackageManager) ConvertNPDResourcePackages(npd *nodev1alpha1.NodeProfileDescriptor) (resourcepackage.NUMAResourcePackageItems, error) { + if npd == nil || npd.Status.NodeMetrics == nil { + return nil, errors.New("npd or npd.Status.NodeMetrics is nil") + } + resourcePackageMetrics := resourcepackage.ConvertNPDMetricsToResourcePackages(npd.Status.NodeMetrics) - resourcePackageMap := make(map[int][]resourcepackage.ResourcePackageItem) + resourcePackageMap := make(resourcepackage.NUMAResourcePackageItems) var errList []error for _, metric := range resourcePackageMetrics { @@ -76,7 +79,11 @@ func (m *resourcePackageManager) ConvertNPDResourcePackages(npd *nodev1alpha1.No errList = append(errList, errors.Wrap(err, "numa ID invalid")) continue } - resourcePackageMap[numaID] = metric.ResourcePackages + + resourcePackageMap[numaID] = make(map[string]resourcepackage.ResourcePackageItem) + for _, item := range metric.ResourcePackages { + resourcePackageMap[numaID][item.PackageName] = item + } } return resourcePackageMap, apierrors.NewAggregate(errList) } diff --git a/pkg/util/cgroup/common/types.go b/pkg/util/cgroup/common/types.go index d92f4cef10..b7b218426b 100644 --- a/pkg/util/cgroup/common/types.go +++ b/pkg/util/cgroup/common/types.go @@ -129,6 +129,14 @@ const ( IOCostModelLinear IOCostModel = "linear" ) +type CPUSetPartitionFlag string + +const ( + CPUSetPartitionFlagRoot CPUSetPartitionFlag = "root" + CPUSetPartitionFlagMember CPUSetPartitionFlag = "member" + CPUSetPartitionFlagIsolated CPUSetPartitionFlag = "isolated" +) + // IOCostQoSData is the io.cost.qos data supported in cgroupv2 type IOCostQoSData struct { Enable uint32 `json:"enable"` // Weight-based control enable diff --git a/pkg/util/cgroup/manager/cgroup.go b/pkg/util/cgroup/manager/cgroup.go index d54e5ec159..81ca400dd1 100644 --- a/pkg/util/cgroup/manager/cgroup.go +++ b/pkg/util/cgroup/manager/cgroup.go @@ -94,6 +94,15 @@ func ApplyCPUSetWithAbsolutePath(absCgroupPath string, data *common.CPUSetData) return GetManager().ApplyCPUSet(absCgroupPath, data) } +func ApplyCPUSetPartitionWithRelativePath(relCgroupPath string, partitionFlag common.CPUSetPartitionFlag) error { + absCgroupPath := common.GetAbsCgroupPath("cpuset", relCgroupPath) + return GetManager().ApplyCPUSetPartition(absCgroupPath, partitionFlag) +} + +func ApplyCPUSetPartitionWithAbsolutePath(absCgroupPath string, partitionFlag common.CPUSetPartitionFlag) error { + return GetManager().ApplyCPUSetPartition(absCgroupPath, partitionFlag) +} + func ApplyCPUSetForContainer(podUID, containerId string, data *common.CPUSetData) error { if data == nil { return fmt.Errorf("ApplyCPUSetForContainer with nil cgroup data") diff --git a/pkg/util/cgroup/manager/fake_manager.go b/pkg/util/cgroup/manager/fake_manager.go index 49241567ea..00c661bc67 100644 --- a/pkg/util/cgroup/manager/fake_manager.go +++ b/pkg/util/cgroup/manager/fake_manager.go @@ -32,6 +32,10 @@ func (f *FakeCgroupManager) ApplyCPUSet(absCgroupPath string, data *common.CPUSe return nil } +func (f *FakeCgroupManager) ApplyCPUSetPartition(absCgroupPath string, partitionFlag common.CPUSetPartitionFlag) error { + return nil +} + func (f *FakeCgroupManager) ApplyNetCls(absCgroupPath string, data *common.NetClsData) error { return nil } diff --git a/pkg/util/cgroup/manager/manager.go b/pkg/util/cgroup/manager/manager.go index 89db3dd2eb..2bf7ef007b 100644 --- a/pkg/util/cgroup/manager/manager.go +++ b/pkg/util/cgroup/manager/manager.go @@ -37,6 +37,7 @@ type Manager interface { ApplyMemory(absCgroupPath string, data *common.MemoryData) error ApplyCPU(absCgroupPath string, data *common.CPUData) error ApplyCPUSet(absCgroupPath string, data *common.CPUSetData) error + ApplyCPUSetPartition(absCgroupPath string, partitionFlag common.CPUSetPartitionFlag) error ApplyNetCls(absCgroupPath string, data *common.NetClsData) error ApplyIOCostQoS(absCgroupPath string, devID string, data *common.IOCostQoSData) error ApplyIOCostModel(absCgroupPath string, devID string, data *common.IOCostModelData) error diff --git a/pkg/util/cgroup/manager/v1/fs_linux.go b/pkg/util/cgroup/manager/v1/fs_linux.go index 949741e910..8220305d3d 100644 --- a/pkg/util/cgroup/manager/v1/fs_linux.go +++ b/pkg/util/cgroup/manager/v1/fs_linux.go @@ -189,6 +189,10 @@ func (m *manager) ApplyCPUSet(absCgroupPath string, data *common.CPUSetData) err return nil } +func (m *manager) ApplyCPUSetPartition(_ string, _ common.CPUSetPartitionFlag) error { + return fmt.Errorf("cgroupv1 does not support cpuset partition feature") +} + func (m *manager) ApplyNetCls(absCgroupPath string, data *common.NetClsData) error { if data.ClassID != 0 { classID := fmt.Sprintf("%d", data.ClassID) diff --git a/pkg/util/cgroup/manager/v1/fs_unsupported.go b/pkg/util/cgroup/manager/v1/fs_unsupported.go index 347ebd5141..d0f105c897 100644 --- a/pkg/util/cgroup/manager/v1/fs_unsupported.go +++ b/pkg/util/cgroup/manager/v1/fs_unsupported.go @@ -44,6 +44,10 @@ func (m *unsupportedManager) ApplyCPUSet(_ string, _ *common.CPUSetData) error { return fmt.Errorf("unsupported manager v1") } +func (m *unsupportedManager) ApplyCPUSetPartition(_ string, _ common.CPUSetPartitionFlag) error { + return fmt.Errorf("unsupported manager v1") +} + func (m *unsupportedManager) ApplyNetCls(_ string, _ *common.NetClsData) error { return fmt.Errorf("unsupported manager v1") } diff --git a/pkg/util/cgroup/manager/v2/fs_linux.go b/pkg/util/cgroup/manager/v2/fs_linux.go index 0ac25b2831..dac657cb4f 100644 --- a/pkg/util/cgroup/manager/v2/fs_linux.go +++ b/pkg/util/cgroup/manager/v2/fs_linux.go @@ -193,6 +193,16 @@ func (m *manager) ApplyCPUSet(absCgroupPath string, data *common.CPUSetData) err return nil } +func (m *manager) ApplyCPUSetPartition(absCgroupPath string, partitionFlag common.CPUSetPartitionFlag) error { + if err, applied, oldData := common.InstrumentedWriteFileIfChange(absCgroupPath, "cpuset.cpus.partition", string(partitionFlag)); err != nil { + return err + } else if applied { + klog.Infof("[CgroupV2] apply cpuset.cpus.partition successfully, cgroupPath: %s, data: %v, old data: %v\n", absCgroupPath, partitionFlag, oldData) + } + + return nil +} + func (m *manager) ApplyNetCls(_ string, _ *common.NetClsData) error { return errors.New("cgroups v2 does not support net_cls cgroup, please use eBPF via external manager") } diff --git a/pkg/util/cgroup/manager/v2/fs_linux_test.go b/pkg/util/cgroup/manager/v2/fs_linux_test.go index 062beb4199..353e533105 100644 --- a/pkg/util/cgroup/manager/v2/fs_linux_test.go +++ b/pkg/util/cgroup/manager/v2/fs_linux_test.go @@ -187,6 +187,42 @@ func Test_manager_ApplyCPUSet(t *testing.T) { } } +func Test_manager_ApplyCPUSetPartition(t *testing.T) { + t.Parallel() + + type args struct { + absCgroupPath string + partitionFlag common.CPUSetPartitionFlag + } + tests := []struct { + name string + m *manager + args args + wantErr bool + }{ + { + name: "test apply cpuset partition", + m: NewManager(), + args: args{ + absCgroupPath: "test-fake-path", + partitionFlag: common.CPUSetPartitionFlagRoot, + }, + wantErr: true, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + m := &manager{} + if err := m.ApplyCPUSetPartition(tt.args.absCgroupPath, tt.args.partitionFlag); (err != nil) != tt.wantErr { + t.Errorf("manager.ApplyCPUSetPartition() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + func Test_manager_ApplyNetCls(t *testing.T) { t.Parallel() diff --git a/pkg/util/cgroup/manager/v2/fs_unsupported.go b/pkg/util/cgroup/manager/v2/fs_unsupported.go index d113c484e0..ef6c0deaab 100644 --- a/pkg/util/cgroup/manager/v2/fs_unsupported.go +++ b/pkg/util/cgroup/manager/v2/fs_unsupported.go @@ -44,6 +44,10 @@ func (m *unsupportedManager) ApplyCPUSet(_ string, _ *common.CPUSetData) error { return fmt.Errorf("unsupported manager v2") } +func (m *unsupportedManager) ApplyCPUSetPartition(_ string, _ common.CPUSetPartitionFlag) error { + return fmt.Errorf("unsupported manager v2") +} + func (m *unsupportedManager) ApplyNetCls(_ string, _ *common.NetClsData) error { return fmt.Errorf("unsupported manager v2") } diff --git a/pkg/util/cnr.go b/pkg/util/cnr.go index f69623cef5..e2eca74ed5 100644 --- a/pkg/util/cnr.go +++ b/pkg/util/cnr.go @@ -296,6 +296,17 @@ func MergeAttributes(dst, src []apis.Attribute) []apis.Attribute { return attrs } +// AttributesToStringMap converts a slice of CNR attributes to a map[string]string. +// It iterates over the attributes slice and populates the map with Name as key and Value as value. +// This helper is useful for efficient attribute lookup and label selection. +func AttributesToStringMap(attrs []apis.Attribute) map[string]string { + res := make(map[string]string, len(attrs)) + for _, attr := range attrs { + res[attr.Name] = attr.Value + } + return res +} + // MergeAllocations merges two allocations, returns the merged result. // If the same allocation exists in both dst and src, the one in dst // will be kept. diff --git a/pkg/util/cnr_test.go b/pkg/util/cnr_test.go index 139d8818e0..01b9bcd101 100644 --- a/pkg/util/cnr_test.go +++ b/pkg/util/cnr_test.go @@ -1786,3 +1786,68 @@ func TestMergeTopologyZone(t *testing.T) { }) } } + +func TestAttributesToStringMap(t *testing.T) { + t.Parallel() + + type args struct { + attrs []nodeapis.Attribute + } + tests := []struct { + name string + args args + want map[string]string + }{ + { + name: "nil input", + args: args{ + attrs: nil, + }, + want: map[string]string{}, + }, + { + name: "empty input", + args: args{ + attrs: []nodeapis.Attribute{}, + }, + want: map[string]string{}, + }, + { + name: "single attribute", + args: args{ + attrs: []nodeapis.Attribute{ + {Name: "key1", Value: "val1"}, + }, + }, + want: map[string]string{"key1": "val1"}, + }, + { + name: "multiple attributes", + args: args{ + attrs: []nodeapis.Attribute{ + {Name: "key1", Value: "val1"}, + {Name: "key2", Value: "val2"}, + }, + }, + want: map[string]string{"key1": "val1", "key2": "val2"}, + }, + { + name: "duplicate attributes (last one wins)", + args: args{ + attrs: []nodeapis.Attribute{ + {Name: "key1", Value: "val1"}, + {Name: "key1", Value: "val2"}, + }, + }, + want: map[string]string{"key1": "val2"}, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := AttributesToStringMap(tt.args.attrs) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/util/general/common.go b/pkg/util/general/common.go index 90095b632c..8e01efd4f7 100644 --- a/pkg/util/general/common.go +++ b/pkg/util/general/common.go @@ -29,6 +29,7 @@ import ( "time" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" ) @@ -516,3 +517,37 @@ func ConvertLinuxListToString(numbers []int64) string { } return strings.TrimRight(result.String(), ",") } + +// ParseSelector returns a labels.Selector from the given string. +// If the string is empty, it returns labels.Nothing() and nil error. +func ParseSelector(selectorStr string) (labels.Selector, error) { + if selectorStr == "" { + return labels.Nothing(), nil + } + return labels.Parse(selectorStr) +} + +func MergeAnnotations(annotations ...map[string]string) map[string]string { + // For compatibility, no annotations returns nil map + if len(annotations) == 0 { + return nil + } + + var mergedAnnotations map[string]string + for _, annotation := range annotations { + if len(annotation) == 0 { + continue + } + + // Only allocate when there is a non-empty allocation + if mergedAnnotations == nil { + mergedAnnotations = make(map[string]string) + } + + for k, v := range annotation { + mergedAnnotations[k] = v + } + } + + return mergedAnnotations +} diff --git a/pkg/util/general/common_suffix_translator.go b/pkg/util/general/common_suffix_translator.go index 77c65c7814..326dc02e11 100644 --- a/pkg/util/general/common_suffix_translator.go +++ b/pkg/util/general/common_suffix_translator.go @@ -18,17 +18,21 @@ package general import "strings" -type CommonSuffixTranslator struct { +type SuffixTranslator interface { + Translate(s string) string +} + +type commonSuffixTranslator struct { suffix string } -func NewCommonSuffixTranslator(suffix string) *CommonSuffixTranslator { - return &CommonSuffixTranslator{ +func NewCommonSuffixTranslator(suffix string) SuffixTranslator { + return &commonSuffixTranslator{ suffix: suffix, } } -func (cs *CommonSuffixTranslator) Translate(s string) string { +func (cs *commonSuffixTranslator) Translate(s string) string { if strings.Contains(s, cs.suffix) { return strings.SplitN(s, cs.suffix, 2)[0] + cs.suffix } diff --git a/pkg/util/general/common_test.go b/pkg/util/general/common_test.go index 49400832b9..b0aa0338ca 100644 --- a/pkg/util/general/common_test.go +++ b/pkg/util/general/common_test.go @@ -302,3 +302,53 @@ func TestConvertLinuxListToString(t *testing.T) { }) } } + +func TestParseSelector(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + selectorStr string + wantEmpty bool + wantErr bool + }{ + { + name: "empty string", + selectorStr: "", + wantEmpty: true, + wantErr: false, + }, + { + name: "valid selector", + selectorStr: "foo=bar", + wantEmpty: false, + wantErr: false, + }, + { + name: "invalid selector", + selectorStr: "foo=bar,,baz", + wantEmpty: false, + wantErr: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, err := ParseSelector(tt.selectorStr) + if (err != nil) != tt.wantErr { + t.Errorf("ParseSelector() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr { + if tt.wantEmpty && got != nil && got.String() != "" { + t.Errorf("ParseSelector() got = %v, want empty selector", got) + } + if !tt.wantEmpty && got != nil && got.String() == "" { + t.Errorf("ParseSelector() got empty selector, want non-empty") + } + } + }) + } +} diff --git a/pkg/util/machine/topology.go b/pkg/util/machine/topology.go index cfe568bb78..0ad52e58df 100644 --- a/pkg/util/machine/topology.go +++ b/pkg/util/machine/topology.go @@ -122,6 +122,14 @@ func (d MemoryDetails) FillNUMANodesWithZero(allNUMAs CPUSet) MemoryDetails { type MemoryTopology struct { MemoryDetails MemoryDetails PageSize int + // NormalMemoryCapacity is the total memory capacity in bytes, excluding static hugepages + NormalMemoryCapacity uint64 + // NormalMemoryDetails is the memory capacity details by NUMA node, excluding static hugepages + NormalMemoryDetails MemoryDetails + // StaticHugePagesCapacity is the total static hugepages capacity in bytes + StaticHugePagesCapacity uint64 + // StaticHugePagesDetails is the static hugepages capacity details by NUMA node + StaticHugePagesDetails MemoryDetails } // AlignToPageSize returns the page numbers from mem numbers. @@ -190,6 +198,16 @@ func GenerateDummyMachineInfo(numaNum int, memoryCapacityGB int) (*info.MachineI machineInfo.Topology = append(machineInfo.Topology, info.Node{ Id: i, Memory: uint64(perNumaCapacityQuantity.Value()), + HugePages: []info.HugePagesInfo{ + { + PageSize: 2 * 1024, // 2Mi + NumPages: 1024, + }, + { + PageSize: 1 * 1024 * 1024, // 1Gi + NumPages: 8, + }, + }, }) } @@ -249,9 +267,18 @@ func GenerateDummyCPUTopology(cpuNum, socketNum, numaNum int) (*CPUTopology, err } func GenerateDummyMemoryTopology(numaNum int, memoryCapacity uint64) (*MemoryTopology, error) { - memoryTopology := &MemoryTopology{map[int]uint64{}, 4096} + memoryTopology := &MemoryTopology{ + MemoryDetails: map[int]uint64{}, + PageSize: 4096, + NormalMemoryDetails: map[int]uint64{}, + NormalMemoryCapacity: memoryCapacity, + StaticHugePagesDetails: map[int]uint64{}, + StaticHugePagesCapacity: 0, + } for i := 0; i < numaNum; i++ { memoryTopology.MemoryDetails[i] = memoryCapacity / uint64(numaNum) + memoryTopology.NormalMemoryDetails[i] = memoryCapacity / uint64(numaNum) + memoryTopology.StaticHugePagesDetails[i] = 0 } return memoryTopology, nil } @@ -472,6 +499,44 @@ func (d CPUDetails) CPUsInCores(ids ...int) CPUSet { return b } +// DiscoverMemoryTopology returns MemoryTopology based on cadvisor node info +func DiscoverMemoryTopology(machineInfo *info.MachineInfo) (*MemoryTopology, error) { + if machineInfo == nil { + return nil, fmt.Errorf("machineInfo is nil") + } + + memoryTopology := MemoryTopology{ + MemoryDetails: map[int]uint64{}, + PageSize: unix.Getpagesize(), + NormalMemoryDetails: map[int]uint64{}, + StaticHugePagesDetails: map[int]uint64{}, + StaticHugePagesCapacity: 0, + } + + for _, node := range machineInfo.Topology { + memoryTopology.MemoryDetails[node.Id] = node.Memory + + staticHugePagesInBytes := uint64(0) + for _, page := range node.HugePages { + staticHugePagesInBytes += page.NumPages * page.PageSize * 1024 + } + + memoryTopology.StaticHugePagesDetails[node.Id] = staticHugePagesInBytes + memoryTopology.StaticHugePagesCapacity += staticHugePagesInBytes + + normalMemory := node.Memory + if normalMemory > staticHugePagesInBytes { + normalMemory -= staticHugePagesInBytes + } else { + normalMemory = 0 + } + memoryTopology.NormalMemoryDetails[node.Id] = normalMemory + memoryTopology.NormalMemoryCapacity += normalMemory + } + + return &memoryTopology, nil +} + // Discover returns CPUTopology based on cadvisor node info func Discover(machineInfo *info.MachineInfo) (*CPUTopology, *MemoryTopology, error) { if machineInfo.NumCores == 0 { @@ -482,14 +547,12 @@ func Discover(machineInfo *info.MachineInfo) (*CPUTopology, *MemoryTopology, err numaNodeIDToSocketID := make(map[int]int, len(machineInfo.Topology)) numPhysicalCores := 0 - memoryTopology := MemoryTopology{ - MemoryDetails: map[int]uint64{}, - PageSize: unix.Getpagesize(), + memoryTopology, err := DiscoverMemoryTopology(machineInfo) + if err != nil { + return nil, nil, err } for _, node := range machineInfo.Topology { - memoryTopology.MemoryDetails[node.Id] = node.Memory - numPhysicalCores += len(node.Cores) for _, core := range node.Cores { l3CacheID := getUniqueL3CacheID(core) @@ -532,7 +595,7 @@ func Discover(machineInfo *info.MachineInfo) (*CPUTopology, *MemoryTopology, err NUMAToCPUs: numaToCPUs, CPUDetails: cpuDetails, CPUInfo: cpuInfo, - }, &memoryTopology, nil + }, memoryTopology, nil } // getUniqueL3CacheID returns the unique L3 cache ID for the given core. diff --git a/pkg/util/machine/topology_test.go b/pkg/util/machine/topology_test.go index 41da5ef468..c47b4f2632 100644 --- a/pkg/util/machine/topology_test.go +++ b/pkg/util/machine/topology_test.go @@ -21,6 +21,7 @@ import ( "sync" "testing" + info "github.com/google/cadvisor/info/v1" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/util/sets" @@ -76,6 +77,128 @@ func TestMemoryDetailsEqual(t *testing.T) { } } +func TestDiscoverMemoryTopology(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + machineInfo *info.MachineInfo + wantMemoryTopology *MemoryTopology + wantErr bool + }{ + { + name: "Nil MachineInfo", + machineInfo: nil, + wantErr: true, + }, + { + name: "Single NUMA Node, No HugePages", + machineInfo: &info.MachineInfo{ + Topology: []info.Node{ + { + Id: 0, + Memory: 1024 * 1024 * 1024, // 1GB + }, + }, + }, + wantMemoryTopology: &MemoryTopology{ + MemoryDetails: map[int]uint64{0: 1024 * 1024 * 1024}, + NormalMemoryDetails: map[int]uint64{0: 1024 * 1024 * 1024}, + NormalMemoryCapacity: 1024 * 1024 * 1024, + StaticHugePagesDetails: map[int]uint64{0: 0}, + StaticHugePagesCapacity: 0, + }, + }, + { + name: "Single NUMA Node, With HugePages", + machineInfo: &info.MachineInfo{ + Topology: []info.Node{ + { + Id: 0, + Memory: 2 * 1024 * 1024 * 1024, // 2GB + HugePages: []info.HugePagesInfo{ + { + PageSize: 1024 * 1024, // 1GB + NumPages: 1, + }, + }, + }, + }, + }, + wantMemoryTopology: &MemoryTopology{ + MemoryDetails: map[int]uint64{0: 2 * 1024 * 1024 * 1024}, + NormalMemoryDetails: map[int]uint64{0: 1 * 1024 * 1024 * 1024}, + NormalMemoryCapacity: 1 * 1024 * 1024 * 1024, + StaticHugePagesDetails: map[int]uint64{0: 1 * 1024 * 1024 * 1024}, + StaticHugePagesCapacity: 1 * 1024 * 1024 * 1024, + }, + }, + { + name: "Multiple NUMA Nodes, Mixed HugePages", + machineInfo: &info.MachineInfo{ + Topology: []info.Node{ + { + Id: 0, + Memory: 4 * 1024 * 1024 * 1024, // 4GB + HugePages: []info.HugePagesInfo{ + { + PageSize: 1 * 1024 * 1024, // 1GB + NumPages: 2, + }, + }, + }, + { + Id: 1, + Memory: 4 * 1024 * 1024 * 1024, // 4GB + HugePages: []info.HugePagesInfo{ + { + PageSize: 2 * 1024, // 2MB + NumPages: 512, // 1GB + }, + }, + }, + }, + }, + wantMemoryTopology: &MemoryTopology{ + MemoryDetails: map[int]uint64{ + 0: 4 * 1024 * 1024 * 1024, + 1: 4 * 1024 * 1024 * 1024, + }, + NormalMemoryDetails: map[int]uint64{ + 0: 2 * 1024 * 1024 * 1024, + 1: 3 * 1024 * 1024 * 1024, + }, + NormalMemoryCapacity: 5 * 1024 * 1024 * 1024, + StaticHugePagesDetails: map[int]uint64{ + 0: 2 * 1024 * 1024 * 1024, + 1: 1 * 1024 * 1024 * 1024, + }, + StaticHugePagesCapacity: 3 * 1024 * 1024 * 1024, + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got, err := DiscoverMemoryTopology(tt.machineInfo) + if (err != nil) != tt.wantErr { + t.Errorf("DiscoverMemoryTopology() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr { + // Ignore PageSize for comparison as it depends on system + got.PageSize = 0 + tt.wantMemoryTopology.PageSize = 0 + + assert.Equal(t, tt.wantMemoryTopology, got) + } + }) + } +} + func TestMemoryDetailsClone(t *testing.T) { t.Parallel() diff --git a/pkg/util/qos/mem_enhancement.go b/pkg/util/qos/mem_enhancement.go index 434e660ce6..793f0424c0 100644 --- a/pkg/util/qos/mem_enhancement.go +++ b/pkg/util/qos/mem_enhancement.go @@ -66,6 +66,11 @@ func AnnotationsIndicateNUMAExclusive(annotations map[string]string) bool { apiconsts.PodAnnotationMemoryEnhancementNumaExclusiveEnable } +func AnnotationsIndicateSharedCores(annotations map[string]string) bool { + return annotations[apiconsts.PodAnnotationQoSLevelKey] == + apiconsts.PodAnnotationQoSLevelSharedCores +} + // GetRSSOverUseEvictThreshold parse the user specified threshold and checks if it's valid func GetRSSOverUseEvictThreshold(qosConf *generic.QoSConfiguration, pod *v1.Pod) (threshold *float64, invalid bool) { memoryEnhancement := ParseMemoryEnhancement(qosConf, pod) diff --git a/pkg/util/resource-package/util.go b/pkg/util/resource-package/util.go index 2542e4a84a..7326fea9dd 100644 --- a/pkg/util/resource-package/util.go +++ b/pkg/util/resource-package/util.go @@ -16,7 +16,69 @@ limitations under the License. package resourcepackage -import "github.com/kubewharf/katalyst-api/pkg/consts" +import ( + "fmt" + "strings" + + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + + "github.com/kubewharf/katalyst-api/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/util/general" + "github.com/kubewharf/katalyst-core/pkg/util/machine" +) + +const ( + OwnerPoolNameSeparator = "/" +) + +// ResourcePackageState is an interface that provides methods to get the attributes and pinned CPUSet of a resource package. +// It is implemented by both qrm state.ResourcePackageState and sysadvisor types.ResourcePackageState, +// allowing generic utilities to work with both representations. +type ResourcePackageState interface { + GetAttributes() map[string]string + GetPinnedCPUSet() machine.CPUSet +} + +// GetMatchedPinnedCPUSet returns the union of PinnedCPUSets from resource packages that match the given selector. +func GetMatchedPinnedCPUSet[T ResourcePackageState](states map[string]T, selector labels.Selector) machine.CPUSet { + res := machine.NewCPUSet() + if selector == nil || selector.Empty() { + return res + } + for _, state := range states { + if selector.Matches(labels.Set(state.GetAttributes())) { + res = res.Union(state.GetPinnedCPUSet()) + } + } + return res +} + +// GetNUMAMatchedPinnedCPUSet returns a map of NUMA ID to the union of PinnedCPUSets from resource packages that match the selector. +func GetNUMAMatchedPinnedCPUSet[T ResourcePackageState](numaStates map[int]map[string]T, selector labels.Selector) map[int]machine.CPUSet { + res := make(map[int]machine.CPUSet) + if selector == nil || selector.Empty() { + return res + } + for numaID, pkgStates := range numaStates { + res[numaID] = GetMatchedPinnedCPUSet(pkgStates, selector) + } + return res +} + +// GetMatchedPackages returns a set of package names that match the given selector. +func GetMatchedPackages[T ResourcePackageState](states map[string]T, selector labels.Selector) sets.String { + res := sets.NewString() + if selector == nil || selector.Empty() { + return res + } + for pkgName, state := range states { + if selector.Matches(labels.Set(state.GetAttributes())) { + res.Insert(pkgName) + } + } + return res +} // GetResourcePackageName retrieves the resource package name from pod annotations. // It looks for the key "katalyst.kubewharf.io/resource_package" in the annotations map. @@ -34,3 +96,195 @@ func GetResourcePackageName(annotations map[string]string) string { return packageName } + +// WrapOwnerPoolName wraps the owner pool name with the package name. +// If the package name is empty, it returns the owner pool name as is. +// Otherwise, it prepends the package name to the owner pool name with a separator. +// Format: / +func WrapOwnerPoolName(ownerPoolName, pkgName string) string { + if pkgName == "" { + return ownerPoolName + } + return pkgName + OwnerPoolNameSeparator + ownerPoolName +} + +// UnwrapOwnerPoolName unwraps the owner pool name to get the original owner pool name and the package name. +// It splits the string by the last occurrence of the separator. +// +// Returns: +// - string: The original owner pool name (suffix). +// - string: The package name (prefix). +func UnwrapOwnerPoolName(ownerPoolName string) (string, string) { + // Find the last index of the separator to split the name correctly. + // This handles cases where the package name itself might contain the separator. + // We assume the owner pool name does not contain the separator or we split by the last one. + idx := strings.LastIndex(ownerPoolName, OwnerPoolNameSeparator) + if idx == -1 { + return ownerPoolName, "" + } + + return ownerPoolName[idx+1:], ownerPoolName[:idx] +} + +type suffixTranslatorWrapper struct { + general.SuffixTranslator +} + +// ResourcePackageSuffixTranslatorWrapper wraps a SuffixTranslator to handle resource package names in owner pool names. +// It ensures that the translation logic is applied to the base owner pool name, stripping the package suffix first. +func ResourcePackageSuffixTranslatorWrapper(translator general.SuffixTranslator) general.SuffixTranslator { + return &suffixTranslatorWrapper{translator} +} + +// Translate implements the SuffixTranslator interface. +// It extracts the base owner pool name using GetOwnerPoolName and delegates the translation to the wrapped translator. +func (s *suffixTranslatorWrapper) Translate(ownerPoolName string) string { + return s.SuffixTranslator.Translate(GetOwnerPoolName(ownerPoolName)) +} + +// GetOwnerPoolName extracts the base owner pool name from a potentially wrapped name. +// It ignores the package name suffix if present. +func GetOwnerPoolName(ownerPoolName string) string { + ownerPoolName, _ = UnwrapOwnerPoolName(ownerPoolName) + return ownerPoolName +} + +// GetResourcePackageConfig retrieves the configuration for a specific resource package on a NUMA node. +// +// Parameters: +// - numaID: The ID of the NUMA node. +// - pkgName: The name of the resource package. +// +// Returns: +// - *ResourcePackageConfig: The configuration if found. +// - error: An error if the receiver is nil, or if the NUMA ID or package name is not found. +func (r NUMAResourcePackageItems) GetResourcePackageConfig(numaID int, pkgName string) (*ResourcePackageConfig, error) { + if r == nil { + return nil, fmt.Errorf("numaResourcePackageItems is nil") + } + + items, ok := r[numaID] + if !ok { + return nil, fmt.Errorf("numaID %d not found", numaID) + } + + item, ok := items[pkgName] + if !ok { + return nil, fmt.Errorf("item not found for package %s on numa %d", pkgName, numaID) + } + + return item.Config, nil +} + +// GetPinnedCPUSetSize returns the size of the pinned CPU set for a specific resource package. +// It checks if the package is configured to use a pinned CPU set and if it has allocatable resources. +// +// Returns: +// - *int: The size of the pinned CPU set (number of CPUs), or nil if not pinned/configured. +// - error: An error if the item is not found or not properly configured. +func (r NUMAResourcePackageItems) GetPinnedCPUSetSize(numaID int, pkgName string) (*int, error) { + if r == nil { + return nil, fmt.Errorf("numaResourcePackageItems is nil") + } + + items, ok := r[numaID] + if !ok { + return nil, fmt.Errorf("numaID %d not found", numaID) + } + + item, ok := items[pkgName] + if !ok { + return nil, fmt.Errorf("item not found for package %s on numa %d", pkgName, numaID) + } + + // Check if PinnedCPUSet is enabled in the config + if item.Config == nil || item.Config.PinnedCPUSet == nil || !*item.Config.PinnedCPUSet { + return nil, nil + } + + if item.Allocatable == nil { + return nil, fmt.Errorf("item not allocatable for package %s on numa %d", pkgName, numaID) + } + + // Calculate size from allocatable CPU resources + size := int(item.Allocatable.Cpu().Value()) + return &size, nil +} + +// ListAllPinnedCPUSetSize lists the sizes of pinned CPU sets for all resource packages across all NUMA nodes. +// It filters out packages that do not have PinnedCPUSet enabled. +func (r NUMAResourcePackageItems) ListAllPinnedCPUSetSize() (map[int]map[string]int, error) { + if r == nil { + return nil, fmt.Errorf("numaResourcePackageItems is nil") + } + + pinnedCPUSets := make(map[int]map[string]int) + for numaID, items := range r { + pinnedCPUSets[numaID] = make(map[string]int) + for pkgName, item := range items { + if item.Config == nil || item.Config.PinnedCPUSet == nil || !*item.Config.PinnedCPUSet { + continue + } + + if item.Allocatable == nil { + continue + } + + pinnedCPUSets[numaID][pkgName] = int(item.Allocatable.Cpu().Value()) + } + } + + return pinnedCPUSets, nil +} + +// GetAttributesMap retrieves the attributes map for a specific resource package on a NUMA node. +// It returns a map of string to string, or nil if no attributes are found. +func (r NUMAResourcePackageItems) GetAttributesMap(numaID int, pkgName string) map[string]string { + if r == nil { + return nil + } + + pkgs, ok := r[numaID] + if !ok { + return nil + } + + pkgItem, ok := pkgs[pkgName] + if !ok || len(pkgItem.Attributes) == 0 { + return nil + } + + attributes := make(map[string]string, len(pkgItem.Attributes)) + for _, attr := range pkgItem.Attributes { + attributes[attr.Name] = attr.Value + } + + return attributes +} + +// GetAllPinnedCPUSetSizeSum calculates the total size of pinned CPU sets for each NUMA node. +// It sums up the CPU values of all pinned packages per NUMA node. +func (r NUMAResourcePackageItems) GetAllPinnedCPUSetSizeSum() (map[int]int, error) { + if r == nil { + return nil, fmt.Errorf("numaResourcePackageItems is nil") + } + + pinnedCPUSets := make(map[int]int) + for numaID, items := range r { + size := int64(0) + for _, item := range items { + if item.Config == nil || item.Config.PinnedCPUSet == nil || !*item.Config.PinnedCPUSet { + continue + } + + if item.Allocatable == nil { + continue + } + + size += item.Allocatable.Cpu().Value() + } + pinnedCPUSets[numaID] = int(size) + } + + return pinnedCPUSets, nil +} diff --git a/pkg/util/resource-package/util_test.go b/pkg/util/resource-package/util_test.go index 8928d13460..26e74c50e4 100644 --- a/pkg/util/resource-package/util_test.go +++ b/pkg/util/resource-package/util_test.go @@ -17,59 +17,106 @@ limitations under the License. package resourcepackage import ( + "reflect" "testing" - "github.com/kubewharf/katalyst-api/pkg/consts" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + + "github.com/kubewharf/katalyst-core/pkg/util/machine" ) -func TestGetResourcePackageName(t *testing.T) { - t.Parallel() +type mockResourcePackageState struct { + attributes map[string]string + pinnedCPUSet machine.CPUSet +} - tests := []struct { - name string - annotations map[string]string - want string - }{ - { - name: "normal case with resource package annotation", - annotations: map[string]string{ - consts.PodAnnotationResourcePackageKey: "test-resource-package", - }, - want: "test-resource-package", - }, - { - name: "empty annotations map", - annotations: map[string]string{}, - want: "", +func (m *mockResourcePackageState) GetAttributes() map[string]string { + if m == nil { + return nil + } + return m.attributes +} + +func (m *mockResourcePackageState) GetPinnedCPUSet() machine.CPUSet { + if m == nil { + return machine.NewCPUSet() + } + return m.pinnedCPUSet +} + +func TestGetMatchedPinnedCPUSet(t *testing.T) { + t.Parallel() + states := map[string]*mockResourcePackageState{ + "pkg1": { + attributes: map[string]string{"disable-reclaim": "true"}, + pinnedCPUSet: machine.NewCPUSet(1, 2), }, - { - name: "nil annotations map", - annotations: nil, - want: "", + "pkg2": { + attributes: map[string]string{"disable-reclaim": "false"}, + pinnedCPUSet: machine.NewCPUSet(3, 4), }, - { - name: "annotations map without resource package key", - annotations: map[string]string{ - "other-key": "other-value", + "pkg3": nil, + } + + selector, _ := labels.Parse("disable-reclaim=true") + + res := GetMatchedPinnedCPUSet(states, selector) + if !reflect.DeepEqual(res.ToSliceInt(), []int{1, 2}) { + t.Errorf("expected [1, 2], got %v", res.ToSliceInt()) + } + + resEmpty := GetMatchedPinnedCPUSet(states, nil) + if resEmpty.Size() != 0 { + t.Errorf("expected empty cpuset, got %v", resEmpty.ToSliceInt()) + } +} + +func TestGetNUMAMatchedPinnedCPUSet(t *testing.T) { + t.Parallel() + numaStates := map[int]map[string]*mockResourcePackageState{ + 0: { + "pkg1": { + attributes: map[string]string{"disable-reclaim": "true"}, + pinnedCPUSet: machine.NewCPUSet(1, 2), }, - want: "", }, - { - name: "resource package key with empty value", - annotations: map[string]string{ - consts.PodAnnotationResourcePackageKey: "", + 1: { + "pkg2": { + attributes: map[string]string{"disable-reclaim": "true"}, + pinnedCPUSet: machine.NewCPUSet(3, 4), }, - want: "", }, } - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - if got := GetResourcePackageName(tt.annotations); got != tt.want { - t.Errorf("GetResourcePackageName() = %v, want %v", got, tt.want) - } - }) + selector, _ := labels.Parse("disable-reclaim=true") + res := GetNUMAMatchedPinnedCPUSet(numaStates, selector) + + if !reflect.DeepEqual(res[0].ToSliceInt(), []int{1, 2}) { + t.Errorf("expected [1, 2] for NUMA 0, got %v", res[0].ToSliceInt()) + } + if !reflect.DeepEqual(res[1].ToSliceInt(), []int{3, 4}) { + t.Errorf("expected [3, 4] for NUMA 1, got %v", res[1].ToSliceInt()) + } +} + +func TestGetMatchedPackages(t *testing.T) { + t.Parallel() + states := map[string]*mockResourcePackageState{ + "pkg1": { + attributes: map[string]string{"disable-reclaim": "true"}, + }, + "pkg2": { + attributes: map[string]string{"disable-reclaim": "false"}, + }, + "pkg3": nil, + } + + selector, _ := labels.Parse("disable-reclaim=true") + res := GetMatchedPackages(states, selector) + + expected := sets.NewString("pkg1") + if !res.Equal(expected) { + t.Errorf("expected %v, got %v", expected, res) } }