Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 48 additions & 2 deletions cmd/katalyst-agent/app/options/qrm/gpu_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
cliflag "k8s.io/component-base/cli/flag"

"github.com/kubewharf/katalyst-api/pkg/consts"
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/qrm/gpustrategy"
qrmconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
)
Expand All @@ -28,10 +29,19 @@ type GPUOptions struct {
PolicyName string
GPUDeviceNames []string
GPUMemoryAllocatablePerGPU string
MilliGPUAllocatablePerGPU string
SkipGPUStateCorruption bool
RDMADeviceNames []string
RequiredDeviceAffinity bool
EnableKubeletCheckpointFallback bool
VirtualGPUPrefersSpreading bool
VirtualGPUMemoryWeightEnvName string
VirtualGPUComputeWeightEnvName string
VirtualGPUTimesliceEnvName string
VirtualGPUComputePolicyEnvName string
GPUSelectionResultAnnotationKey string
VirtualGPUTimesliceEnvValue int
VirtualGPUComputePolicyEnvValue int

GPUStrategyOptions *gpustrategy.GPUStrategyOptions
}
Expand All @@ -41,10 +51,15 @@ func NewGPUOptions() *GPUOptions {
PolicyName: "static",
GPUDeviceNames: []string{"nvidia.com/gpu"},
GPUMemoryAllocatablePerGPU: "100",
MilliGPUAllocatablePerGPU: "1000",
RDMADeviceNames: []string{},
GPUStrategyOptions: gpustrategy.NewGPUStrategyOptions(),
RequiredDeviceAffinity: true,
EnableKubeletCheckpointFallback: true,
VirtualGPUPrefersSpreading: false,
GPUSelectionResultAnnotationKey: consts.PodAnnotationGPUSelectionResultKey,
VirtualGPUTimesliceEnvValue: 300,
VirtualGPUComputePolicyEnvValue: 0,
}
}

Expand All @@ -56,13 +71,31 @@ func (o *GPUOptions) AddFlags(fss *cliflag.NamedFlagSets) {
fs.StringSliceVar(&o.GPUDeviceNames, "gpu-resource-names", o.GPUDeviceNames, "The name of the GPU resource")
fs.StringVar(&o.GPUMemoryAllocatablePerGPU, "gpu-memory-allocatable-per-gpu",
o.GPUMemoryAllocatablePerGPU, "The total memory allocatable for each GPU, e.g. 100")
fs.StringVar(&o.MilliGPUAllocatablePerGPU, "gpu-milligpu-allocatable-per-gpu",
o.MilliGPUAllocatablePerGPU, "The total milliGPU allocatable for each GPU, e.g. 1000")
fs.BoolVar(&o.SkipGPUStateCorruption, "skip-gpu-state-corruption",
o.SkipGPUStateCorruption, "skip gpu state corruption, and it will be used after updating state properties")
fs.StringSliceVar(&o.RDMADeviceNames, "rdma-resource-names", o.RDMADeviceNames, "The name of the RDMA resource")
fs.BoolVar(&o.RequiredDeviceAffinity, "gpu-required-device-affinity", o.RequiredDeviceAffinity,
"required device affinity, and when true it will cause pods to admit fail if unable to meet device affinity")
fs.BoolVar(&o.EnableKubeletCheckpointFallback, "enable-kubelet-checkpoint-fallback", o.EnableKubeletCheckpointFallback,
"enable fallback to kubelet device plugin checkpoint for device allocation.")
fs.BoolVar(&o.VirtualGPUPrefersSpreading, "virtual-gpu-prefers-spreading",
o.VirtualGPUPrefersSpreading, "whether virtual GPU prefers spreading across devices")
fs.StringVar(&o.VirtualGPUMemoryWeightEnvName, "virtual-gpu-memory-weight-env-name",
o.VirtualGPUMemoryWeightEnvName, "The environment variable name for Virtual GPU memory weight")
fs.StringVar(&o.VirtualGPUComputeWeightEnvName, "virtual-gpu-compute-weight-env-name",
o.VirtualGPUComputeWeightEnvName, "The environment variable name for Virtual GPU compute weight")
fs.StringVar(&o.VirtualGPUTimesliceEnvName, "virtual-gpu-timeslice-env-name",
o.VirtualGPUTimesliceEnvName, "The environment variable name for Virtual GPU timeslice")
fs.StringVar(&o.VirtualGPUComputePolicyEnvName, "virtual-gpu-compute-policy-env-name",
o.VirtualGPUComputePolicyEnvName, "The environment variable name for Virtual GPU compute policy")
fs.StringVar(&o.GPUSelectionResultAnnotationKey, "gpu-selection-result-annotation-key",
o.GPUSelectionResultAnnotationKey, "The annotation key for GPU selection result")
fs.IntVar(&o.VirtualGPUTimesliceEnvValue, "virtual-gpu-timeslice-env-value",
o.VirtualGPUTimesliceEnvValue, "The environment variable value for Virtual GPU timeslice")
fs.IntVar(&o.VirtualGPUComputePolicyEnvValue, "virtual-gpu-compute-policy-env-value",
o.VirtualGPUComputePolicyEnvValue, "The environment variable value for Virtual GPU compute policy")
o.GPUStrategyOptions.AddFlags(fss)
}

Expand All @@ -74,12 +107,25 @@ func (o *GPUOptions) ApplyTo(conf *qrmconfig.GPUQRMPluginConfig) error {
return err
}
conf.GPUMemoryAllocatablePerGPU = gpuMemory
milliGPU, err := resource.ParseQuantity(o.MilliGPUAllocatablePerGPU)
if err != nil {
return err
}
conf.MilliGPUAllocatablePerGPU = milliGPU
conf.SkipGPUStateCorruption = o.SkipGPUStateCorruption
conf.RDMADeviceNames = o.RDMADeviceNames
conf.RequiredDeviceAffinity = o.RequiredDeviceAffinity
conf.EnableKubeletCheckpointFallback = o.EnableKubeletCheckpointFallback
conf.VirtualGPUPrefersSpreading = o.VirtualGPUPrefersSpreading
conf.VirtualGPUMemoryWeightEnvName = o.VirtualGPUMemoryWeightEnvName
conf.VirtualGPUComputeWeightEnvName = o.VirtualGPUComputeWeightEnvName
conf.VirtualGPUTimesliceEnvName = o.VirtualGPUTimesliceEnvName
conf.VirtualGPUComputePolicyEnvName = o.VirtualGPUComputePolicyEnvName
conf.VirtualGPUTimesliceEnvValue = o.VirtualGPUTimesliceEnvValue
conf.VirtualGPUComputePolicyEnvValue = o.VirtualGPUComputePolicyEnvValue
conf.GPUSelectionResultAnnotationKey = o.GPUSelectionResultAnnotationKey
if err := o.GPUStrategyOptions.ApplyTo(conf.GPUStrategyConfig); err != nil {
return err
}
conf.RequiredDeviceAffinity = o.RequiredDeviceAffinity
conf.EnableKubeletCheckpointFallback = o.EnableKubeletCheckpointFallback
return nil
}
5 changes: 5 additions & 0 deletions cmd/katalyst-agent/app/options/qrm/memory_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type MemoryOptions struct {
EnableNonBindingShareCoresMemoryResourceCheck bool
EnableNUMAAllocationReactor bool
NUMABindResultResourceAllocationAnnotationKey string
ExtraMemoryResources []string

SockMemOptions
LogCacheOptions
Expand Down Expand Up @@ -157,6 +158,7 @@ func NewMemoryOptions() *MemoryOptions {
EnabledQoS: []string{apiconsts.PodAnnotationQoSLevelSharedCores},
MonGroupEnabledClosIDs: []string{},
},
ExtraMemoryResources: []string{},
}
}

Expand Down Expand Up @@ -235,6 +237,8 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.MonGroupEnabledClosIDs, "enabled-closid mon-groups")
fs.Float64Var(&o.MonGroupMaxCountRatio, "resctrl-mon-groups-max-count-ratio",
o.MonGroupMaxCountRatio, "ratio of mon_groups max count")
fs.StringSliceVar(&o.ExtraMemoryResources, "extra-memory-resources", o.ExtraMemoryResources,
"extra memory resources such as hugepages-*")
}

func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
Expand Down Expand Up @@ -273,6 +277,7 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
conf.EnabledQoS = o.EnabledQoS
conf.MonGroupEnabledClosIDs = o.MonGroupEnabledClosIDs
conf.MonGroupMaxCountRatio = o.MonGroupMaxCountRatio
conf.ExtraMemoryResources = o.ExtraMemoryResources

for _, reservation := range o.ReservedNumaMemory {
conf.ReservedNumaMemory[reservation.NumaNode] = reservation.Limits
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ require (
github.com/google/uuid v1.3.0
github.com/h2non/gock v1.2.0
github.com/klauspost/cpuid/v2 v2.2.6
github.com/kubewharf/katalyst-api v0.5.11-0.20260423040236-f1a2330d266e
github.com/kubewharf/katalyst-api v0.5.11-0.20260427134811-f4e50b5266f4
github.com/moby/sys/mountinfo v0.6.2
github.com/montanaflynn/stats v0.7.1
github.com/opencontainers/runc v1.1.6
Expand Down Expand Up @@ -176,6 +176,7 @@ require (
)

replace (
github.com/kubewharf/katalyst-api => github.com/luomingmeng/katalyst-api v0.0.0-20260428035600-507ef616baa0
k8s.io/api => k8s.io/api v0.24.6
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
k8s.io/apimachinery => k8s.io/apimachinery v0.24.6
Expand All @@ -197,7 +198,7 @@ replace (
k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.6
k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.6
k8s.io/kubectl => k8s.io/kubectl v0.24.6
k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2
k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.3
k8s.io/kubernetes => k8s.io/kubernetes v1.24.6
k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.6
k8s.io/metrics => k8s.io/metrics v0.24.6
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -574,10 +574,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubewharf/katalyst-api v0.5.11-0.20260423040236-f1a2330d266e h1:/0LP1rzsXEI09g3eqUZCJcyv/XGU8MofdFyi7NBtZA0=
github.com/kubewharf/katalyst-api v0.5.11-0.20260423040236-f1a2330d266e/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2 h1:2KLMzgntDypiFJRX4fSQJCD+a6zIgHuhcAzd/7nAGmU=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.3 h1:oFwpVVeDESqgzkse3iSODzHRKX495VvUgslu2hhepCc=
github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.3/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc=
Expand All @@ -587,6 +585,8 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U
github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA=
github.com/luomingmeng/katalyst-api v0.0.0-20260428035600-507ef616baa0 h1:1xk6xC1H9gzGehH8WBeT/9K2PDOtL/PsViOo8pdZHlQ=
github.com/luomingmeng/katalyst-api v0.0.0-20260428035600-507ef616baa0/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down
22 changes: 19 additions & 3 deletions pkg/agent/qrm-plugins/commonstate/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ func (am *AllocationMeta) GetSpecifiedNUMABindingNUMAID() (int, error) {
return GetSpecifiedNUMABindingNUMAID(am.Annotations)
}

// SetSpecifiedNUMABindingNUMAID set the numa id for AllocationInfo
func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
// SetSpecifiedNUMABindingNUMAID set the numa ids for AllocationInfo
func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaIDs []uint64) {
if am == nil {
return
}
Expand All @@ -155,7 +155,12 @@ func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
am.Annotations = make(map[string]string)
}

am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(int(numaID)).String()
intIDs := make([]int, len(numaIDs))
for i, id := range numaIDs {
intIDs[i] = int(id)
}

am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(intIDs...).String()
}

// GetSpecifiedNUMABindingPoolName get numa_binding pool name
Expand Down Expand Up @@ -316,3 +321,14 @@ func (am *AllocationMeta) CheckDedicatedPool() bool {
}
return am.OwnerPoolName == PoolNameDedicated
}

// CheckDistributeEvenlyAcrossNuma returns true if the AllocationInfo is for pod with distribute evenly across numa
// annotation enabled.
func (am *AllocationMeta) CheckDistributeEvenlyAcrossNuma() bool {
if am == nil {
return false
}

return am.Annotations[consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma] ==
consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable
}
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context,
// set reclaimed numa_binding NUMA ID to allocationInfo
if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualBindingNUMAs.Contains(int(req.Hint.Nodes[0])) ||
!nonReclaimActualBindingNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) {
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0])
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes)
}
}

Expand Down Expand Up @@ -470,7 +470,7 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte
return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+
"not equal to 1", len(req.Hint.Nodes))
}
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0])
allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes)
}

// update pod entries directly.
Expand Down Expand Up @@ -741,7 +741,7 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq
InitTimestamp: time.Now().Format(util.QRMTimeFormat),
RequestQuantity: reqFloat64,
}
allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes[0])
allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes)

if util.PodInplaceUpdateResizing(req) {
originAllocationInfo := p.state.GetAllocationInfo(allocationInfo.PodUid, allocationInfo.ContainerName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func TestAllocateSharedNumaBindingCPUs(t *testing.T) {
0: machine.NewCPUSet(0, 1),
},
}
originAllocationInfo.SetSpecifiedNUMABindingNUMAID(0)
originAllocationInfo.SetSpecifiedNUMABindingNUMAID([]uint64{0})

policy.state.SetAllocationInfo(podUID, containerName, originAllocationInfo, false)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Conte
(*commonstate.AllocationMeta).CheckDedicatedNUMABindingNUMAExclusive))

var extraErr error
hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceCPU), p.extraStateFileAbsPath, availableNUMAs)
hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, p.extraStateFileAbsPath, availableNUMAs, []v1.ResourceName{
v1.ResourceCPU,
})
if extraErr != nil {
general.Infof("pod: %s/%s, container: %s GetHintsFromExtraStateFile failed with error: %v",
req.PodNamespace, req.PodName, req.ContainerName, extraErr)
Expand Down Expand Up @@ -303,10 +305,10 @@ func (p *DynamicPolicy) calculateHints(
maskCount := mask.Count()
if maskCount < minNUMAsCountNeeded {
return
} else if numaBinding && !numaExclusive && numaNumber <= 1 && maskCount > 1 {
} else if numaBinding && !numaExclusive && maskCount > 1 && numaNumber <= 1 {
// because it's hard to control memory allocation accurately,
// we only support numa_binding but not exclusive container with request smaller than 1 NUMA
// pods with distribute evenly across numa annotation can occupy more than 1 NUMA
// pods with numa number more than 1 can occupy more than 1 NUMA
return
}

Expand Down Expand Up @@ -371,7 +373,6 @@ func (p *DynamicPolicy) calculateHints(
if numaNumber != 0 {
minAffinitySize = numaNumber
}

// Update hint to be preferred if they have minimum number of NUMA nodes
for _, hint := range availableNumaHints {
if len(hint.Nodes) == minAffinitySize {
Expand Down
Loading
Loading