Skip to content

Commit 20b4bc7

Browse files
authored
Merge pull request #1016 from JulyWindK/dev/kfx-sys-cpu-pressure-eviction
feat(eviction): support numa sys cpu pressure eviction
2 parents ff886d5 + 378627d commit 20b4bc7

File tree

18 files changed

+2288
-57
lines changed

18 files changed

+2288
-57
lines changed

cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/cpu_pressure_eviction.go

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39,33 +39,35 @@ const (
3939

4040
// CPUPressureEvictionOptions is the options of cpu pressure eviction
4141
type CPUPressureEvictionOptions struct {
42-
EnableLoadEviction bool
43-
LoadUpperBoundRatio float64
44-
LoadLowerBoundRatio float64
45-
LoadThresholdMetPercentage float64
46-
LoadMetricRingSize int
47-
LoadEvictionCoolDownTime time.Duration
48-
EnableSuppressionEviction bool
49-
MaxSuppressionToleranceRate float64
50-
MinSuppressionToleranceDuration time.Duration
51-
GracePeriod int64
52-
NumaCPUPressureEvictionOptions NumaCPUPressureEvictionOptions
42+
EnableLoadEviction bool
43+
LoadUpperBoundRatio float64
44+
LoadLowerBoundRatio float64
45+
LoadThresholdMetPercentage float64
46+
LoadMetricRingSize int
47+
LoadEvictionCoolDownTime time.Duration
48+
EnableSuppressionEviction bool
49+
MaxSuppressionToleranceRate float64
50+
MinSuppressionToleranceDuration time.Duration
51+
GracePeriod int64
52+
NumaCPUPressureEvictionOptions NumaCPUPressureEvictionOptions
53+
NumaSysCPUPressureEvictionOptions NumaSysCPUPressureEvictionOptions
5354
}
5455

5556
// NewCPUPressureEvictionOptions returns a new CPUPressureEvictionOptions
5657
func NewCPUPressureEvictionOptions() *CPUPressureEvictionOptions {
5758
return &CPUPressureEvictionOptions{
58-
EnableLoadEviction: defaultEnableLoadEviction,
59-
LoadUpperBoundRatio: defaultLoadUpperBoundRatio,
60-
LoadLowerBoundRatio: defaultLoadLowerBoundRatio,
61-
LoadThresholdMetPercentage: defaultLoadThresholdMetPercentage,
62-
LoadMetricRingSize: defaultLoadMetricSize,
63-
LoadEvictionCoolDownTime: defaultLoadEvictionCoolDownTime,
64-
EnableSuppressionEviction: defaultEnableSuppressionEviction,
65-
MaxSuppressionToleranceRate: defaultMaxSuppressionToleranceRate,
66-
MinSuppressionToleranceDuration: defaultMinSuppressionToleranceDuration,
67-
GracePeriod: defaultGracePeriod,
68-
NumaCPUPressureEvictionOptions: NewNumaCPUPressureEvictionOptions(),
59+
EnableLoadEviction: defaultEnableLoadEviction,
60+
LoadUpperBoundRatio: defaultLoadUpperBoundRatio,
61+
LoadLowerBoundRatio: defaultLoadLowerBoundRatio,
62+
LoadThresholdMetPercentage: defaultLoadThresholdMetPercentage,
63+
LoadMetricRingSize: defaultLoadMetricSize,
64+
LoadEvictionCoolDownTime: defaultLoadEvictionCoolDownTime,
65+
EnableSuppressionEviction: defaultEnableSuppressionEviction,
66+
MaxSuppressionToleranceRate: defaultMaxSuppressionToleranceRate,
67+
MinSuppressionToleranceDuration: defaultMinSuppressionToleranceDuration,
68+
GracePeriod: defaultGracePeriod,
69+
NumaCPUPressureEvictionOptions: NewNumaCPUPressureEvictionOptions(),
70+
NumaSysCPUPressureEvictionOptions: NewNumaSysCPUPressureEvictionOptions(),
6971
}
7072
}
7173

@@ -99,6 +101,7 @@ func (o *CPUPressureEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) {
99101
"the ratio between the times metric value over the bound value and the metric ring size is greater than this percentage "+
100102
", the eviction or node taint will be triggered")
101103
o.NumaCPUPressureEvictionOptions.AddFlags(fss)
104+
o.NumaSysCPUPressureEvictionOptions.AddFlags(fss)
102105
}
103106

104107
func (o *CPUPressureEvictionOptions) ApplyTo(c *eviction.CPUPressureEvictionConfiguration) error {
@@ -116,6 +119,8 @@ func (o *CPUPressureEvictionOptions) ApplyTo(c *eviction.CPUPressureEvictionConf
116119
if err := o.NumaCPUPressureEvictionOptions.ApplyTo(&c.NumaCPUPressureEvictionConfiguration); err != nil {
117120
return err
118121
}
119-
122+
if err := o.NumaSysCPUPressureEvictionOptions.ApplyTo(&c.NumaSysCPUPressureEvictionConfiguration); err != nil {
123+
return err
124+
}
120125
return nil
121126
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package eviction
18+
19+
import (
20+
cliflag "k8s.io/component-base/cli/flag"
21+
22+
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction"
23+
)
24+
25+
type NumaSysCPUPressureEvictionOptions struct {
26+
EnableEviction bool
27+
MetricRingSize int
28+
GracePeriod int64
29+
SyncPeriod int64
30+
31+
ThresholdMetPercentage float64
32+
NumaCPUUsageSoftThreshold float64
33+
NumaCPUUsageHardThreshold float64
34+
NUMASysOverTotalUsageSoftThreshold float64
35+
NUMASysOverTotalUsageHardThreshold float64
36+
NUMASysOverTotalUsageEvictionThreshold float64
37+
}
38+
39+
func NewNumaSysCPUPressureEvictionOptions() NumaSysCPUPressureEvictionOptions {
40+
return NumaSysCPUPressureEvictionOptions{
41+
EnableEviction: false,
42+
MetricRingSize: 4,
43+
GracePeriod: 60,
44+
SyncPeriod: 10,
45+
46+
ThresholdMetPercentage: 0.7,
47+
NumaCPUUsageSoftThreshold: 0.4,
48+
NumaCPUUsageHardThreshold: 0.5,
49+
NUMASysOverTotalUsageSoftThreshold: 0.4,
50+
NUMASysOverTotalUsageHardThreshold: 0.5,
51+
NUMASysOverTotalUsageEvictionThreshold: 0.3,
52+
}
53+
}
54+
55+
func (o *NumaSysCPUPressureEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) {
56+
fs := fss.FlagSet("numa-sys-cpu-pressure-eviction")
57+
58+
fs.BoolVar(&o.EnableEviction, "numa-sys-cpu-pressure-eviction-enable", o.EnableEviction,
59+
"Enable numa system cpu pressure eviction")
60+
fs.IntVar(&o.MetricRingSize, "numa-sys-cpu-pressure-eviction-metric-ring-size", o.MetricRingSize,
61+
"The size of the metric ring for NUMA system CPU pressure")
62+
fs.Int64Var(&o.GracePeriod, "numa-sys-cpu-pressure-eviction-grace-period", o.GracePeriod,
63+
"The grace period (in seconds) before evicting pods due to NUMA system CPU pressure")
64+
fs.Int64Var(&o.SyncPeriod, "numa-sys-cpu-pressure-eviction-sync-period", o.SyncPeriod,
65+
"The sync period (in seconds) for NUMA system CPU pressure eviction")
66+
67+
fs.Float64Var(&o.ThresholdMetPercentage, "numa-sys-cpu-pressure-eviction-threshold-met-percentage", o.ThresholdMetPercentage,
68+
"The percentage of NUMA nodes whose system CPU pressure meets the threshold to trigger eviction")
69+
fs.Float64Var(&o.NumaCPUUsageSoftThreshold, "numa-sys-cpu-pressure-eviction-numa-cpu-usage-soft-threshold", o.NumaCPUUsageSoftThreshold,
70+
"The soft threshold of NUMA node system CPU usage ratio")
71+
fs.Float64Var(&o.NumaCPUUsageHardThreshold, "numa-sys-cpu-pressure-eviction-numa-cpu-usage-hard-threshold", o.NumaCPUUsageHardThreshold,
72+
"The hard threshold of NUMA node system CPU usage ratio")
73+
fs.Float64Var(&o.NUMASysOverTotalUsageSoftThreshold, "numa-sys-cpu-pressure-eviction-numa-sys-over-total-usage-soft-threshold", o.NUMASysOverTotalUsageSoftThreshold,
74+
"The soft threshold of NUMA node system CPU pressure over total system CPU usage ratio")
75+
fs.Float64Var(&o.NUMASysOverTotalUsageHardThreshold, "numa-sys-cpu-pressure-eviction-numa-sys-over-total-usage-hard-threshold", o.NUMASysOverTotalUsageHardThreshold,
76+
"The hard threshold of NUMA node system CPU pressure over total system CPU usage ratio")
77+
fs.Float64Var(&o.NUMASysOverTotalUsageEvictionThreshold, "numa-sys-cpu-pressure-eviction-numa-sys-over-total-usage-eviction-threshold", o.NUMASysOverTotalUsageEvictionThreshold,
78+
"The eviction threshold of NUMA node system CPU pressure over total system CPU usage ratio")
79+
}
80+
81+
func (o *NumaSysCPUPressureEvictionOptions) ApplyTo(c *eviction.NumaSysCPUPressureEvictionConfiguration) error {
82+
c.EnableEviction = o.EnableEviction
83+
c.MetricRingSize = o.MetricRingSize
84+
c.GracePeriod = o.GracePeriod
85+
c.SyncPeriod = o.SyncPeriod
86+
87+
c.ThresholdMetPercentage = o.ThresholdMetPercentage
88+
c.NumaCPUUsageSoftThreshold = o.NumaCPUUsageSoftThreshold
89+
c.NumaCPUUsageHardThreshold = o.NumaCPUUsageHardThreshold
90+
c.NUMASysOverTotalUsageSoftThreshold = o.NUMASysOverTotalUsageSoftThreshold
91+
c.NUMASysOverTotalUsageHardThreshold = o.NUMASysOverTotalUsageHardThreshold
92+
c.NUMASysOverTotalUsageEvictionThreshold = o.NUMASysOverTotalUsageEvictionThreshold
93+
94+
return nil
95+
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ require (
1919
github.com/google/uuid v1.3.0
2020
github.com/h2non/gock v1.2.0
2121
github.com/klauspost/cpuid/v2 v2.2.6
22-
github.com/kubewharf/katalyst-api v0.5.8-0.20251208194138-ba8b11c02f5b
22+
github.com/kubewharf/katalyst-api v0.5.8-0.20251209195727-bde2ce2b9b13
2323
github.com/moby/sys/mountinfo v0.6.2
2424
github.com/montanaflynn/stats v0.7.1
2525
github.com/opencontainers/runc v1.1.6

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
576576
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
577577
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
578578
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
579-
github.com/kubewharf/katalyst-api v0.5.8-0.20251208194138-ba8b11c02f5b h1:8mB1GVGkhS0Fs60BCSyh/ytmHt5eD6LrFoQaC4O9iuM=
580-
github.com/kubewharf/katalyst-api v0.5.8-0.20251208194138-ba8b11c02f5b/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
579+
github.com/kubewharf/katalyst-api v0.5.8-0.20251209195727-bde2ce2b9b13 h1:/R3HuWFoJkynuCnYF7wZ9p9KkzUgYgtDXZlxCeggMgM=
580+
github.com/kubewharf/katalyst-api v0.5.8-0.20251209195727-bde2ce2b9b13/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
581581
github.com/kubewharf/kubelet v1.24.6-kubewharf.9 h1:jOTYZt7h/J7I8xQMKMUcJjKf5UFBv37jHWvNp5VRFGc=
582582
github.com/kubewharf/kubelet v1.24.6-kubewharf.9/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
583583
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=

pkg/agent/evictionmanager/manager.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,6 @@ func (m *EvictionManger) collectEvictionResult(ctx context.Context, pods []*v1.P
453453
}
454454
}
455455
}
456-
457456
resp, err := m.endpoints[pluginName].GetTopEvictionPods(context.Background(), &pluginapi.GetTopEvictionPodsRequest{
458457
ActivePods: activePods,
459458
TopN: 1,

pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/cpu_eviciton.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ func init() {
3636
RegisterCPUEvictionInitializer(strategy.EvictionNameLoad, strategy.NewCPUPressureLoadEviction)
3737
RegisterCPUEvictionInitializer(strategy.EvictionNameSuppression, strategy.NewCPUPressureSuppressionEviction)
3838
RegisterCPUEvictionInitializer(strategy.EvictionNameNumaCpuPressure, strategy.NewCPUPressureUsageEviction)
39+
RegisterCPUEvictionInitializer(strategy.EvictionNameNumaSysCpuPressure, strategy.NewSysCPUPressureUsageEviction)
3940
}
4041

4142
var cpuEvictionInitializers sync.Map

pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/rules/types.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ type NumaOverStat struct {
5050
Gap float64
5151
}
5252

53+
type NumaSysOverStat struct {
54+
NumaID int
55+
NumaCPUUsageAvg float64
56+
NumaSysCPUUsageAvg float64
57+
58+
IsNumaCPUUsageSoftOver bool
59+
IsNumaCPUUsageHardOver bool
60+
IsNumaSysCPUUsageSoftOver bool
61+
IsNumaSysCPUUsageHardOver bool
62+
}
63+
5364
type EvictOptions struct {
5465
NumaPressureConfig *NumaPressureConfig
5566
State State

0 commit comments

Comments
 (0)