Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/katalyst-agent/app/options/qrm/memory_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type MemoryOptions struct {
EnableNonBindingShareCoresMemoryResourceCheck bool
EnableNUMAAllocationReactor bool
NUMABindResultResourceAllocationAnnotationKey string
ExtraMemoryResources []string

SockMemOptions
LogCacheOptions
Expand Down Expand Up @@ -157,6 +158,7 @@ func NewMemoryOptions() *MemoryOptions {
EnabledQoS: []string{apiconsts.PodAnnotationQoSLevelSharedCores},
MonGroupEnabledClosIDs: []string{},
},
ExtraMemoryResources: []string{},
}
}

Expand Down Expand Up @@ -235,6 +237,8 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.MonGroupEnabledClosIDs, "enabled-closid mon-groups")
fs.Float64Var(&o.MonGroupMaxCountRatio, "resctrl-mon-groups-max-count-ratio",
o.MonGroupMaxCountRatio, "ratio of mon_groups max count")
fs.StringSliceVar(&o.ExtraMemoryResources, "extra-memory-resources", o.ExtraMemoryResources,
"extra memory resources such as hugepages-*")
}

func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
Expand Down Expand Up @@ -273,6 +277,7 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
conf.EnabledQoS = o.EnabledQoS
conf.MonGroupEnabledClosIDs = o.MonGroupEnabledClosIDs
conf.MonGroupMaxCountRatio = o.MonGroupMaxCountRatio
conf.ExtraMemoryResources = o.ExtraMemoryResources

for _, reservation := range o.ReservedNumaMemory {
conf.ReservedNumaMemory[reservation.NumaNode] = reservation.Limits
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ replace (
k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.6
k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.6
k8s.io/kubectl => k8s.io/kubectl v0.24.6
k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2
k8s.io/kubelet => github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b
k8s.io/kubernetes => k8s.io/kubernetes v1.24.6
k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.6
k8s.io/metrics => k8s.io/metrics v0.24.6
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -576,8 +576,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882 h1:4KYYk/mAJAOIYDW5V+43wnjnP8p3bwHXAkAcw/AbzuQ=
github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2 h1:2KLMzgntDypiFJRX4fSQJCD+a6zIgHuhcAzd/7nAGmU=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc=
Expand All @@ -587,6 +585,8 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U
github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA=
github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b h1:4fQ2SJiAbt+RMD/RCN/8iN8LevcHnLxXaFY5z2cuQVI=
github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down
22 changes: 19 additions & 3 deletions pkg/agent/qrm-plugins/commonstate/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ func (am *AllocationMeta) GetSpecifiedNUMABindingNUMAID() (int, error) {
return GetSpecifiedNUMABindingNUMAID(am.Annotations)
}

// SetSpecifiedNUMABindingNUMAID set the numa id for AllocationInfo
func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
// SetSpecifiedNUMABindingNUMAID set the numa ids for AllocationInfo
func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaIDs []uint64) {
if am == nil {
return
}
Expand All @@ -155,7 +155,12 @@ func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
am.Annotations = make(map[string]string)
}

am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(int(numaID)).String()
intIDs := make([]int, len(numaIDs))
for i, id := range numaIDs {
intIDs[i] = int(id)
}

am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(intIDs...).String()
}

// GetSpecifiedNUMABindingPoolName get numa_binding pool name
Expand Down Expand Up @@ -316,3 +321,14 @@ func (am *AllocationMeta) CheckDedicatedPool() bool {
}
return am.OwnerPoolName == PoolNameDedicated
}

// CheckDistributeEvenlyAcrossNuma returns true if the AllocationInfo is for pod with distribute evenly across numa
// annotation enabled.
func (am *AllocationMeta) CheckDistributeEvenlyAcrossNuma() bool {
if am == nil {
return false
}

return am.Annotations[consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma] ==
consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable
}
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context,
// set reclaimed numa_binding NUMA ID to allocationInfo
if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualBindingNUMAs.Contains(int(req.Hint.Nodes[0])) ||
!nonReclaimActualBindingNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) {
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0])
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes)
}
}

Expand Down Expand Up @@ -470,7 +470,7 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte
return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+
"not equal to 1", len(req.Hint.Nodes))
}
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0])
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes)
}

// update pod entries directly.
Expand Down Expand Up @@ -741,7 +741,7 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq
InitTimestamp: time.Now().Format(util.QRMTimeFormat),
RequestQuantity: reqFloat64,
}
allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes[0])
allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes)

if util.PodInplaceUpdateResizing(req) {
originAllocationInfo := p.state.GetAllocationInfo(allocationInfo.PodUid, allocationInfo.ContainerName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func TestAllocateSharedNumaBindingCPUs(t *testing.T) {
0: machine.NewCPUSet(0, 1),
},
}
originAllocationInfo.SetSpecifiedNUMABindingNUMAID(0)
originAllocationInfo.SetSpecifiedNUMABindingNUMAID([]uint64{0})

policy.state.SetAllocationInfo(podUID, containerName, originAllocationInfo, false)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Conte
(*commonstate.AllocationMeta).CheckDedicatedNUMABindingNUMAExclusive))

var extraErr error
hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceCPU), p.extraStateFileAbsPath, availableNUMAs)
hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, p.extraStateFileAbsPath, availableNUMAs, []v1.ResourceName{
v1.ResourceCPU,
})
if extraErr != nil {
general.Infof("pod: %s/%s, container: %s GetHintsFromExtraStateFile failed with error: %v",
req.PodNamespace, req.PodName, req.ContainerName, extraErr)
Expand Down Expand Up @@ -303,10 +305,10 @@ func (p *DynamicPolicy) calculateHints(
maskCount := mask.Count()
if maskCount < minNUMAsCountNeeded {
return
} else if numaBinding && !numaExclusive && numaNumber <= 1 && maskCount > 1 {
} else if numaBinding && !numaExclusive && maskCount > 1 && numaNumber <= 1 {
// because it's hard to control memory allocation accurately,
// we only support numa_binding but not exclusive container with request smaller than 1 NUMA
// pods with distribute evenly across numa annotation can occupy more than 1 NUMA
// pods with numa number more than 1 can occupy more than 1 NUMA
return
}

Expand Down Expand Up @@ -371,7 +373,6 @@ func (p *DynamicPolicy) calculateHints(
if numaNumber != 0 {
minAffinitySize = numaNumber
}

// Update hint to be preferred if they have minimum number of NUMA nodes
for _, hint := range availableNumaHints {
if len(hint.Nodes) == minAffinitySize {
Expand Down
Loading
Loading