Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3c6ee54
refactor(resource-package): change pinned cpuset representation from …
luomingmeng Feb 9, 2026
6cdd516
fix(cpu/dynamicpolicy): improve synchronization and test reliability
luomingmeng Feb 14, 2026
0855696
refactor(sysadvisor): restructure resource package handling and remov…
luomingmeng Feb 24, 2026
669c4ba
refactor(topology): centralize zone attribute names in consts package
luomingmeng Feb 25, 2026
0ee2dea
refactor(cpu): restructure pool allocation logic for pinned and commo…
luomingmeng Feb 25, 2026
8198b08
feat(resource-package): enhance CPU allocation with resource package …
luomingmeng Feb 25, 2026
58a2627
fix: check reclaim enable before updating min reclaimed cores quota
luomingmeng Feb 26, 2026
7fb6d6f
feat(qosaware): enhance numa binding pool name handling with resource…
luomingmeng Feb 26, 2026
400c04e
refactor(cpu): simplify resource package name retrieval and improve c…
luomingmeng Feb 27, 2026
46c5eb5
feat(cpu): add IRQ forbidden CPU selection by resource package attrib…
luomingmeng Mar 4, 2026
07ee22a
feat(cpu): add attributes to ResourcePackageItemConfig and update rel…
luomingmeng Mar 20, 2026
d5d85ca
refactor(resource): enhance resource package state handling with attr…
luomingmeng Mar 22, 2026
27b4c4c
feat(resource-package): add support for non-reclaimable pinned CPUs
luomingmeng Mar 22, 2026
10dad55
feat(cpu): add debug logs for resource package state changes
luomingmeng Mar 22, 2026
5f214fd
fix(cpu/dynamicpolicy): handle non-pinned resource packages and attri…
luomingmeng Mar 22, 2026
dbd720e
feat(qosaware): add debug logs for cpu resource allocation
luomingmeng Mar 22, 2026
3869444
feat(cpu): add debug logs for CPU allocation tracking
luomingmeng Mar 22, 2026
bc7d214
feat(util): add ParseSelector function for label parsing
luomingmeng Mar 22, 2026
f061424
fix(cpu): check cpu set size before wrapping owner pool name
luomingmeng Mar 22, 2026
34f9e86
feat(cgroup): add cpuset partition support for cgroupv2
luomingmeng Mar 26, 2026
59f3092
feat(memory): support hugepages and distribute evenly across numa
luomingmeng Apr 2, 2026
d3ffbb8
chore: rebase hugepages feature
JustinChengLZ Apr 8, 2026
12e614a
refactor(cpu): simplify topology allocation annotations logic
luomingmeng Apr 8, 2026
21697e3
fix(cpu): improve numa-aware cpu allocation with resource packages
luomingmeng Apr 9, 2026
4fc2194
fix: panic when nil
JustinChengLZ Apr 9, 2026
8f202c1
refactor(cpu): improve NUMA-aware allocation logic and block classifi…
luomingmeng Apr 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,16 @@ import (
)

type ReclaimedResourceOptions struct {
EnableReclaim bool
DisableReclaimSharePools []string
ReservedResourceForReport general.ResourceList
MinReclaimedResourceForReport general.ResourceList
MinIgnoredReclaimedResourceForReport general.ResourceList
ReservedResourceForAllocate general.ResourceList
ReservedResourceForReclaimedCores general.ResourceList
NumaMinReservedResourceRatioForAllocate general.ResourceList
NumaMinReservedResourceForAllocate general.ResourceList
EnableReclaim bool
DisableReclaimSharePools []string
DisableReclaimPinnedCPUSetResourcePackageSelector string
ReservedResourceForReport general.ResourceList
MinReclaimedResourceForReport general.ResourceList
MinIgnoredReclaimedResourceForReport general.ResourceList
ReservedResourceForAllocate general.ResourceList
ReservedResourceForReclaimedCores general.ResourceList
NumaMinReservedResourceRatioForAllocate general.ResourceList
NumaMinReservedResourceForAllocate general.ResourceList
*cpuheadroom.CPUHeadroomOptions
*memoryheadroom.MemoryHeadroomOptions
}
Expand Down Expand Up @@ -86,6 +87,8 @@ func (o *ReclaimedResourceOptions) AddFlags(fss *cliflag.NamedFlagSets) {
"show whether enable reclaim resource from shared and agent resource")
fs.StringSliceVar(&o.DisableReclaimSharePools, "disable-reclaim-share-pools", o.DisableReclaimSharePools,
"disable reclaim resource from shared pools")
fs.StringVar(&o.DisableReclaimPinnedCPUSetResourcePackageSelector, "disable-reclaim-pinned-cpuset-resource-package-selector", o.DisableReclaimPinnedCPUSetResourcePackageSelector,
"disable reclaim pinned cpuset resource package selector")
fs.Var(&o.ReservedResourceForReport, "reserved-resource-for-report",
"reserved reclaimed resource report to cnr")
fs.Var(&o.MinReclaimedResourceForReport, "min-reclaimed-resource-for-report",
Expand All @@ -110,6 +113,8 @@ func (o *ReclaimedResourceOptions) ApplyTo(c *reclaimedresource.ReclaimedResourc
var errList []error
c.EnableReclaim = o.EnableReclaim
c.DisableReclaimSharePools = o.DisableReclaimSharePools

c.DisableReclaimPinnedCPUSetResourcePackageSelector = o.DisableReclaimPinnedCPUSetResourcePackageSelector
c.ReservedResourceForReport = v1.ResourceList(o.ReservedResourceForReport)
c.MinReclaimedResourceForReport = v1.ResourceList(o.MinReclaimedResourceForReport)
c.MinIgnoredReclaimedResourceForReport = v1.ResourceList(o.MinIgnoredReclaimedResourceForReport)
Expand Down
40 changes: 25 additions & 15 deletions cmd/katalyst-agent/app/options/qrm/cpu_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package qrm
import (
"time"

"k8s.io/apimachinery/pkg/labels"
cliflag "k8s.io/component-base/cli/flag"

"github.com/kubewharf/katalyst-api/pkg/consts"
Expand All @@ -39,21 +40,22 @@ type CPUOptions struct {
}

type CPUDynamicPolicyOptions struct {
EnableCPUAdvisor bool
AdvisorGetAdviceInterval time.Duration
EnableCPUPressureEviction bool
LoadPressureEvictionSkipPools []string
EnableSyncingCPUIdle bool
EnableCPUIdle bool
CPUNUMAHintPreferPolicy string
CPUNUMAHintPreferLowThreshold float64
NUMABindingResultAnnotationKey string
NUMANumberAnnotationKey string
NUMAIDsAnnotationKey string
EnableReserveCPUReversely bool
EnableCPUBurst bool
EnableDefaultDedicatedCoresCPUBurst bool
EnableDefaultSharedCoresCPUBurst bool
EnableCPUAdvisor bool
AdvisorGetAdviceInterval time.Duration
EnableCPUPressureEviction bool
LoadPressureEvictionSkipPools []string
EnableSyncingCPUIdle bool
EnableCPUIdle bool
CPUNUMAHintPreferPolicy string
CPUNUMAHintPreferLowThreshold float64
NUMABindingResultAnnotationKey string
NUMANumberAnnotationKey string
NUMAIDsAnnotationKey string
EnableReserveCPUReversely bool
EnableCPUBurst bool
EnableDefaultDedicatedCoresCPUBurst bool
EnableDefaultSharedCoresCPUBurst bool
IRQForbiddenPinnedResourcePackageAttributeSelector string
*irqtuner.IRQTunerOptions
*hintoptimizer.HintOptimizerOptions
}
Expand Down Expand Up @@ -140,6 +142,9 @@ func (o *CPUOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.EnableDefaultSharedCoresCPUBurst, "if set true, it will enable cpu burst for shared cores by default")
fs.BoolVar(&o.EnableDefaultDedicatedCoresCPUBurst, "enable-default-dedicated-cores-cpu-burst",
o.EnableDefaultDedicatedCoresCPUBurst, "if set true, it will enable cpu burst for dedicated cores by default")
fs.StringVar(&o.IRQForbiddenPinnedResourcePackageAttributeSelector, "irq-forbidden-pinned-resource-package-attribute-selector",
o.IRQForbiddenPinnedResourcePackageAttributeSelector, "The selector to filter pinned resource packages that are"+
"forbidden for irq binding.")
o.HintOptimizerOptions.AddFlags(fss)
o.IRQTunerOptions.AddFlags(fss)
}
Expand All @@ -164,6 +169,11 @@ func (o *CPUOptions) ApplyTo(conf *qrmconfig.CPUQRMPluginConfig) error {
conf.EnableCPUBurst = o.EnableCPUBurst
conf.EnableDefaultDedicatedCoresCPUBurst = o.EnableDefaultDedicatedCoresCPUBurst
conf.EnableDefaultSharedCoresCPUBurst = o.EnableDefaultSharedCoresCPUBurst
selector, err := labels.Parse(o.IRQForbiddenPinnedResourcePackageAttributeSelector)
if err != nil {
return err
}
conf.IRQForbiddenPinnedResourcePackageAttributeSelector = selector
if err := o.HintOptimizerOptions.ApplyTo(conf.HintOptimizerConfiguration); err != nil {
return err
}
Expand Down
5 changes: 5 additions & 0 deletions cmd/katalyst-agent/app/options/qrm/memory_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type MemoryOptions struct {
EnableNonBindingShareCoresMemoryResourceCheck bool
EnableNUMAAllocationReactor bool
NUMABindResultResourceAllocationAnnotationKey string
ExtraMemoryResources []string

SockMemOptions
LogCacheOptions
Expand Down Expand Up @@ -157,6 +158,7 @@ func NewMemoryOptions() *MemoryOptions {
EnabledQoS: []string{apiconsts.PodAnnotationQoSLevelSharedCores},
MonGroupEnabledClosIDs: []string{},
},
ExtraMemoryResources: []string{},
}
}

Expand Down Expand Up @@ -235,6 +237,8 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.MonGroupEnabledClosIDs, "enabled-closid mon-groups")
fs.Float64Var(&o.MonGroupMaxCountRatio, "resctrl-mon-groups-max-count-ratio",
o.MonGroupMaxCountRatio, "ratio of mon_groups max count")
fs.StringSliceVar(&o.ExtraMemoryResources, "extra-memory-resources", o.ExtraMemoryResources,
"extra memory resources such as hugepages-*")
}

func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
Expand Down Expand Up @@ -273,6 +277,7 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
conf.EnabledQoS = o.EnabledQoS
conf.MonGroupEnabledClosIDs = o.MonGroupEnabledClosIDs
conf.MonGroupMaxCountRatio = o.MonGroupMaxCountRatio
conf.ExtraMemoryResources = o.ExtraMemoryResources

for _, reservation := range o.ReservedNumaMemory {
conf.ReservedNumaMemory[reservation.NumaNode] = reservation.Limits
Expand Down
35 changes: 20 additions & 15 deletions cmd/katalyst-agent/app/options/qrm/qrm_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,28 @@ import (
)

type GenericQRMPluginOptions struct {
QRMPluginSocketDirs []string
ExtraStateFileAbsPath string
PodDebugAnnoKeys []string
UseKubeletReservedConfig bool
PodAnnotationKeptKeys []string
PodLabelKeptKeys []string
MainContainerAnnotationKey string
EnableReclaimNUMABinding bool
EnableSNBHighNumaPreference bool
QRMPluginSocketDirs []string
ExtraStateFileAbsPath string
PodDebugAnnoKeys []string
UseKubeletReservedConfig bool
PodAnnotationKeptKeys []string
PodLabelKeptKeys []string
MainContainerAnnotationKey string
EnableReclaimNUMABinding bool
EnableSNBHighNumaPreference bool
TopologyAllocationAnnotationKey string
*statedirectory.StateDirectoryOptions
}

func NewGenericQRMPluginOptions() *GenericQRMPluginOptions {
return &GenericQRMPluginOptions{
QRMPluginSocketDirs: []string{"/var/lib/kubelet/plugins_registry"},
PodDebugAnnoKeys: []string{},
PodAnnotationKeptKeys: []string{},
PodLabelKeptKeys: []string{},
MainContainerAnnotationKey: consts.MainContainerNameAnnotationKey,
StateDirectoryOptions: statedirectory.NewStateDirectoryOptions(),
QRMPluginSocketDirs: []string{"/var/lib/kubelet/plugins_registry"},
PodDebugAnnoKeys: []string{},
PodAnnotationKeptKeys: []string{},
PodLabelKeptKeys: []string{},
MainContainerAnnotationKey: consts.MainContainerNameAnnotationKey,
TopologyAllocationAnnotationKey: consts.QRMPodAnnotationTopologyAllocationKey,
StateDirectoryOptions: statedirectory.NewStateDirectoryOptions(),
}
}

Expand All @@ -68,6 +70,8 @@ func (o *GenericQRMPluginOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.EnableReclaimNUMABinding, "if set true, reclaim pod will be allocated on a specific NUMA node best-effort, otherwise, reclaim pod will be allocated on multi NUMA nodes")
fs.BoolVar(&o.EnableSNBHighNumaPreference, "enable-snb-high-numa-preference",
o.EnableSNBHighNumaPreference, "default false,if set true, snb pod will be preferentially allocated on high numa node")
fs.StringVar(&o.TopologyAllocationAnnotationKey, "topology-allocation-annotation-key",
o.TopologyAllocationAnnotationKey, "the annotation key used to describe a topology aware allocation of a container")
o.StateDirectoryOptions.AddFlags(fss)
}

Expand All @@ -81,6 +85,7 @@ func (o *GenericQRMPluginOptions) ApplyTo(conf *qrmconfig.GenericQRMPluginConfig
conf.MainContainerAnnotationKey = o.MainContainerAnnotationKey
conf.EnableReclaimNUMABinding = o.EnableReclaimNUMABinding
conf.EnableSNBHighNumaPreference = o.EnableSNBHighNumaPreference
conf.TopologyAllocationAnnotationKey = o.TopologyAllocationAnnotationKey

if err := o.StateDirectoryOptions.ApplyTo(conf.StateDirectoryConfiguration); err != nil {
return err
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ require (
github.com/google/uuid v1.3.0
github.com/h2non/gock v1.2.0
github.com/klauspost/cpuid/v2 v2.2.6
github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882
github.com/kubewharf/katalyst-api v0.5.11-0.20260407100730-9a71452c00b1
github.com/moby/sys/mountinfo v0.6.2
github.com/montanaflynn/stats v0.7.1
github.com/opencontainers/runc v1.1.6
Expand Down Expand Up @@ -197,7 +197,7 @@ replace (
k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.6
k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.6
k8s.io/kubectl => k8s.io/kubectl v0.24.6
k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2
k8s.io/kubelet => github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b
k8s.io/kubernetes => k8s.io/kubernetes v1.24.6
k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.6
k8s.io/metrics => k8s.io/metrics v0.24.6
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -574,10 +574,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882 h1:4KYYk/mAJAOIYDW5V+43wnjnP8p3bwHXAkAcw/AbzuQ=
github.com/kubewharf/katalyst-api v0.5.11-0.20260324091059-cae1d07d9882/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2 h1:2KLMzgntDypiFJRX4fSQJCD+a6zIgHuhcAzd/7nAGmU=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kubewharf/katalyst-api v0.5.11-0.20260407100730-9a71452c00b1 h1:JQn9/QjjsmS6dZNurits7d3YUU6qKhHVjFyTHqdMoT4=
github.com/kubewharf/katalyst-api v0.5.11-0.20260407100730-9a71452c00b1/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc=
Expand All @@ -587,6 +585,8 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U
github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA=
github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b h1:4fQ2SJiAbt+RMD/RCN/8iN8LevcHnLxXaFY5z2cuQVI=
github.com/luomingmeng/kubelet v0.0.0-20260306101749-66566cd8838b/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down
4 changes: 4 additions & 0 deletions pkg/agent/qrm-plugins/commonstate/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ import (

apiconsts "github.com/kubewharf/katalyst-api/pkg/consts"
cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
"github.com/kubewharf/katalyst-core/pkg/util/general"
"github.com/kubewharf/katalyst-core/pkg/util/machine"
resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package"
)

// notice that pool-name may not have direct mapping relations with qos-level, for instance
Expand Down Expand Up @@ -56,6 +58,8 @@ const (
PoolNotFoundErrMsg = "pool not found"
)

var OwnerPoolNameTranslator = resourcepackage.ResourcePackageSuffixTranslatorWrapper(general.NewCommonSuffixTranslator(NUMAPoolInfix))

func IsIsolationPool(poolName string) bool {
return strings.HasPrefix(poolName, PoolNamePrefixIsolation)
}
Expand Down
31 changes: 28 additions & 3 deletions pkg/agent/qrm-plugins/commonstate/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
"github.com/kubewharf/katalyst-core/pkg/util/general"
"github.com/kubewharf/katalyst-core/pkg/util/machine"
resourcepackage "github.com/kubewharf/katalyst-core/pkg/util/resource-package"
)

type AllocationMeta struct {
Expand Down Expand Up @@ -127,6 +128,14 @@ func (am *AllocationMeta) GetOwnerPoolName() string {
return am.OwnerPoolName
}

func (am *AllocationMeta) GetResourcePackageName() string {
if am == nil {
return ""
}

return resourcepackage.GetResourcePackageName(am.Annotations)
}

// GetSpecifiedPoolName parses the owner pool name for AllocationInfo from qos-level
func (am *AllocationMeta) GetSpecifiedPoolName() string {
if am == nil {
Expand All @@ -145,8 +154,8 @@ func (am *AllocationMeta) GetSpecifiedNUMABindingNUMAID() (int, error) {
return GetSpecifiedNUMABindingNUMAID(am.Annotations)
}

// SetSpecifiedNUMABindingNUMAID set the numa id for AllocationInfo
func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
// SetSpecifiedNUMABindingNUMAID set the numa ids for AllocationInfo
func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaIDs []uint64) {
if am == nil {
return
}
Expand All @@ -155,7 +164,12 @@ func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
am.Annotations = make(map[string]string)
}

am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(int(numaID)).String()
intIDs := make([]int, len(numaIDs))
for i, id := range numaIDs {
intIDs[i] = int(id)
}

am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(intIDs...).String()
}

// GetSpecifiedNUMABindingPoolName get numa_binding pool name
Expand Down Expand Up @@ -316,3 +330,14 @@ func (am *AllocationMeta) CheckDedicatedPool() bool {
}
return am.OwnerPoolName == PoolNameDedicated
}

// CheckDistributeEvenlyAcrossNuma returns true if the AllocationInfo is for pod with distribute evenly across numa
// annotation enabled.
func (am *AllocationMeta) CheckDistributeEvenlyAcrossNuma() bool {
if am == nil {
return false
}

return am.Annotations[consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma] ==
consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable
}
Loading
Loading