Skip to content

Commit b1af056

Browse files
authored
Merge pull request #472 from WangZzzhe/dev/overcommit-bindcpu
feat(overcommit): add realtime overcommit advisor plugin
2 parents 404d1b2 + 88bd3ae commit b1af056

File tree

42 files changed

+4242
-81
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4242
-81
lines changed

cmd/base/context_fake.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ import (
2121
"reflect"
2222
"strconv"
2323

24+
nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
25+
2426
"github.com/pkg/errors"
2527
appsv1 "k8s.io/api/apps/v1"
2628
v1 "k8s.io/api/core/v1"
@@ -241,6 +243,7 @@ func GenerateFakeGenericContext(objects ...[]runtime.Object) (*GenericContext, e
241243
utilruntime.Must(v1alpha1.AddToScheme(scheme))
242244
utilruntime.Must(overcommitapis.AddToScheme(scheme))
243245
utilruntime.Must(apiregistration.AddToScheme(scheme))
246+
utilruntime.Must(nodev1alpha1.AddToScheme(scheme))
244247

245248
fakeMetaClient := metaFake.NewSimpleMetadataClient(scheme, nilObjectFilter(metaObjects)...)
246249
fakeInternalClient := externalfake.NewSimpleClientset(nilObjectFilter(internalObjects)...)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package overcommit
18+
19+
import (
20+
"k8s.io/apimachinery/pkg/util/errors"
21+
cliflag "k8s.io/component-base/cli/flag"
22+
23+
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/sysadvisor/overcommit/realtime"
24+
"github.com/kubewharf/katalyst-core/pkg/config/agent/sysadvisor/overcommit"
25+
)
26+
27+
type OvercommitAwarePluginOptions struct {
28+
*realtime.RealtimeOvercommitOptions
29+
}
30+
31+
// NewOvercommitAwarePluginOptions creates a new Options with a default config.
32+
func NewOvercommitAwarePluginOptions() *OvercommitAwarePluginOptions {
33+
return &OvercommitAwarePluginOptions{
34+
RealtimeOvercommitOptions: realtime.NewRealtimeOvercommitOptions(),
35+
}
36+
}
37+
38+
func (o *OvercommitAwarePluginOptions) AddFlags(fss *cliflag.NamedFlagSets) {
39+
fs := fss.FlagSet("overcommit_aware_plugin")
40+
41+
o.RealtimeOvercommitOptions.AddFlags(fs)
42+
}
43+
44+
func (o *OvercommitAwarePluginOptions) ApplyTo(c *overcommit.OvercommitAwarePluginConfiguration) error {
45+
var errList []error
46+
47+
errList = append(errList, o.RealtimeOvercommitOptions.ApplyTo(c.RealtimeOvercommitConfiguration))
48+
49+
return errors.NewAggregate(errList)
50+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package realtime
18+
19+
import (
20+
"time"
21+
22+
"github.com/spf13/pflag"
23+
24+
"github.com/kubewharf/katalyst-core/pkg/config/agent/sysadvisor/overcommit/realtime"
25+
)
26+
27+
type RealtimeOvercommitOptions struct {
28+
SyncPeriod time.Duration
29+
SyncPodTimeout time.Duration
30+
31+
TargetCPULoad float64
32+
TargetMemoryLoad float64
33+
EstimatedPodCPULoad float64
34+
EstimatedPodMemoryLoad float64
35+
36+
CPUMetricsToGather []string
37+
MemoryMetricsToGather []string
38+
}
39+
40+
func NewRealtimeOvercommitOptions() *RealtimeOvercommitOptions {
41+
return &RealtimeOvercommitOptions{
42+
SyncPeriod: 10 * time.Second,
43+
SyncPodTimeout: 2 * time.Second,
44+
TargetCPULoad: 0.6,
45+
TargetMemoryLoad: 0.8,
46+
EstimatedPodCPULoad: 0.4,
47+
EstimatedPodMemoryLoad: 0.8,
48+
49+
CPUMetricsToGather: []string{},
50+
MemoryMetricsToGather: []string{},
51+
}
52+
}
53+
54+
func (r *RealtimeOvercommitOptions) AddFlags(fs *pflag.FlagSet) {
55+
fs.DurationVar(&r.SyncPeriod, "realtime-overcommit-sync-period", r.SyncPeriod,
56+
"period for realtime overcommit advisor to calculate node resource overcommit ratio")
57+
fs.DurationVar(&r.SyncPodTimeout, "realtime-overcommit-sync-pod-timeout", r.SyncPodTimeout,
58+
"timeout for realtime overcommit advisor to list pod")
59+
fs.Float64Var(&r.TargetCPULoad, "realtime-overcommit-CPU-targetload", r.TargetCPULoad,
60+
"target node load for realtime overcommit advisor to calculate node CPU overcommit ratio")
61+
fs.Float64Var(&r.TargetMemoryLoad, "realtime-overcommit-mem-targetload", r.TargetMemoryLoad,
62+
"target node load for realtime overcommit advisor to calculate node memory overcommit ratio")
63+
fs.Float64Var(&r.EstimatedPodCPULoad, "realtime-overcommit-estimated-cpuload", r.EstimatedPodCPULoad,
64+
"estimated pod load for realtime overcommit advisor to calculate node CPU overcommit ratio")
65+
fs.Float64Var(&r.EstimatedPodMemoryLoad, "realtime-overcommit-estimated-memload", r.EstimatedPodMemoryLoad,
66+
"estimated pod load for realtime overcommit advisor to calculate node memory overcommit ratio")
67+
fs.StringSliceVar(&r.CPUMetricsToGather, "CPU-metrics-to-gather", r.CPUMetricsToGather,
68+
"metrics list used to calculate node cpu overcommitment ratio")
69+
fs.StringSliceVar(&r.MemoryMetricsToGather, "memory-metrics-to-gather", r.MemoryMetricsToGather,
70+
"metrics list used to calculate node memory overcommitment ratio")
71+
}
72+
73+
func (r *RealtimeOvercommitOptions) ApplyTo(o *realtime.RealtimeOvercommitConfiguration) error {
74+
o.SyncPeriod = r.SyncPeriod
75+
o.SyncPodTimeout = r.SyncPodTimeout
76+
77+
if r.TargetCPULoad > 0.0 && r.TargetMemoryLoad < 1.0 {
78+
o.TargetCPULoad = r.TargetCPULoad
79+
}
80+
if r.TargetMemoryLoad > 0.0 && r.TargetMemoryLoad < 1.0 {
81+
o.TargetMemoryLoad = r.TargetMemoryLoad
82+
}
83+
if r.EstimatedPodCPULoad > 0.0 && r.EstimatedPodCPULoad < 1.0 {
84+
o.EstimatedPodCPULoad = r.EstimatedPodCPULoad
85+
}
86+
if r.EstimatedPodMemoryLoad > 0.0 && r.EstimatedPodMemoryLoad < 1.0 {
87+
o.EstimatedPodMemoryLoad = r.EstimatedPodMemoryLoad
88+
}
89+
90+
o.CPUMetricsToGather = r.CPUMetricsToGather
91+
o.MemoryMetricsToGather = r.MemoryMetricsToGather
92+
93+
return nil
94+
}

cmd/katalyst-agent/app/options/sysadvisor/sysadvisor_base.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/sysadvisor/inference"
2626
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/sysadvisor/metacache"
2727
metricemitter "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/sysadvisor/metric-emitter"
28+
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/sysadvisor/overcommit"
2829
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/sysadvisor/qosaware"
2930
"github.com/kubewharf/katalyst-core/pkg/config/agent/sysadvisor"
3031
)
@@ -75,15 +76,17 @@ type SysAdvisorPluginsOptions struct {
7576
*metacache.MetaCachePluginOptions
7677
*metricemitter.MetricEmitterPluginOptions
7778
*inference.InferencePluginOptions
79+
*overcommit.OvercommitAwarePluginOptions
7880
}
7981

8082
// NewSysAdvisorPluginsOptions creates a new Options with a default config.
8183
func NewSysAdvisorPluginsOptions() *SysAdvisorPluginsOptions {
8284
return &SysAdvisorPluginsOptions{
83-
QoSAwarePluginOptions: qosaware.NewQoSAwarePluginOptions(),
84-
MetaCachePluginOptions: metacache.NewMetaCachePluginOptions(),
85-
MetricEmitterPluginOptions: metricemitter.NewMetricEmitterPluginOptions(),
86-
InferencePluginOptions: inference.NewInferencePluginOptions(),
85+
QoSAwarePluginOptions: qosaware.NewQoSAwarePluginOptions(),
86+
MetaCachePluginOptions: metacache.NewMetaCachePluginOptions(),
87+
MetricEmitterPluginOptions: metricemitter.NewMetricEmitterPluginOptions(),
88+
InferencePluginOptions: inference.NewInferencePluginOptions(),
89+
OvercommitAwarePluginOptions: overcommit.NewOvercommitAwarePluginOptions(),
8790
}
8891
}
8992

@@ -93,6 +96,7 @@ func (o *SysAdvisorPluginsOptions) AddFlags(fss *cliflag.NamedFlagSets) {
9396
o.MetaCachePluginOptions.AddFlags(fss)
9497
o.MetricEmitterPluginOptions.AddFlags(fss)
9598
o.InferencePluginOptions.AddFlags(fss)
99+
o.OvercommitAwarePluginOptions.AddFlags(fss)
96100
}
97101

98102
// ApplyTo fills up config with options
@@ -102,6 +106,7 @@ func (o *SysAdvisorPluginsOptions) ApplyTo(c *sysadvisor.SysAdvisorPluginsConfig
102106
errList = append(errList, o.MetaCachePluginOptions.ApplyTo(c.MetaCachePluginConfiguration))
103107
errList = append(errList, o.MetricEmitterPluginOptions.ApplyTo(c.MetricEmitterPluginConfiguration))
104108
errList = append(errList, o.InferencePluginOptions.ApplyTo(c.InferencePluginConfiguration))
109+
errList = append(errList, o.OvercommitAwarePluginOptions.ApplyTo(c.OvercommitAwarePluginConfiguration))
105110
return errors.NewAggregate(errList)
106111
}
107112

cmd/katalyst-scheduler/app/server.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,9 @@ func Setup(ctx context.Context, opts *options.Options, outOfTreeRegistryOptions
357357
return nil, nil, err
358358
}
359359

360-
eventhandlers.AddCNREventHandler(cc.InformerFactory, cc.InternalInformerFactory)
361-
eventhandlers.AddPodEventHandler(cc.InformerFactory, cc.InternalInformerFactory)
360+
for _, handlerFunc := range eventhandlers.ListEventHandlerFunc() {
361+
handlerFunc(cc.InformerFactory, cc.InternalInformerFactory)
362+
}
362363

363364
return &cc, sched, nil
364365
}

cmd/katalyst-scheduler/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"k8s.io/component-base/logs"
2424

2525
"github.com/kubewharf/katalyst-core/cmd/katalyst-scheduler/app"
26+
"github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/nodeovercommitment"
2627
"github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/noderesourcetopology"
2728
"github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/qosawarenoderesources"
2829

@@ -38,6 +39,7 @@ func main() {
3839
app.WithPlugin(qosawarenoderesources.FitName, qosawarenoderesources.NewFit),
3940
app.WithPlugin(qosawarenoderesources.BalancedAllocationName, qosawarenoderesources.NewBalancedAllocation),
4041
app.WithPlugin(noderesourcetopology.TopologyMatchName, noderesourcetopology.New),
42+
app.WithPlugin(nodeovercommitment.Name, nodeovercommitment.New),
4143
)
4244

4345
if err := runCommand(command); err != nil {

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ require (
1616
github.com/google/cadvisor v0.44.2
1717
github.com/google/uuid v1.3.0
1818
github.com/klauspost/cpuid/v2 v2.2.6
19-
github.com/kubewharf/katalyst-api v0.4.1-0.20240416065828-9edab1e2f1f1
19+
github.com/kubewharf/katalyst-api v0.4.1-0.20240423064035-1a0977f4e08c
2020
github.com/montanaflynn/stats v0.7.1
2121
github.com/opencontainers/runc v1.1.6
2222
github.com/opencontainers/selinux v1.10.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,8 +554,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
554554
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
555555
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
556556
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
557-
github.com/kubewharf/katalyst-api v0.4.1-0.20240416065828-9edab1e2f1f1 h1:wRSFa6v3ONl2D8ZsEyIj3O/I2euSqbwXgWsPdy7w6oY=
558-
github.com/kubewharf/katalyst-api v0.4.1-0.20240416065828-9edab1e2f1f1/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
557+
github.com/kubewharf/katalyst-api v0.4.1-0.20240423064035-1a0977f4e08c h1:8H3twj9fHNtR06Tn5quV7oJuDsuP2R8wFeNlQvJTHB8=
558+
github.com/kubewharf/katalyst-api v0.4.1-0.20240423064035-1a0977f4e08c/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
559559
github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4=
560560
github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
561561
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package overcommitmentaware
18+
19+
import (
20+
"context"
21+
22+
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache"
23+
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin"
24+
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/overcommitmentaware/realtime"
25+
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/overcommitmentaware/reporter"
26+
"github.com/kubewharf/katalyst-core/pkg/config"
27+
"github.com/kubewharf/katalyst-core/pkg/metaserver"
28+
"github.com/kubewharf/katalyst-core/pkg/metrics"
29+
metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool"
30+
)
31+
32+
const (
33+
PluginName = "overcommitment-aware-plugin"
34+
)
35+
36+
// OvercommitmentAwarePlugin calculates node overcommitment ratio,
37+
// values will be reported to node KCNR annotations by the reporter.
38+
type OvercommitmentAwarePlugin struct {
39+
name string
40+
41+
realtimeAdvisor *realtime.RealtimeOvercommitmentAdvisor
42+
reporter reporter.OvercommitRatioReporter
43+
44+
emitter metrics.MetricEmitter
45+
}
46+
47+
func NewOvercommitmentAwarePlugin(
48+
pluginName string, conf *config.Configuration,
49+
_ interface{},
50+
emitterPool metricspool.MetricsEmitterPool,
51+
metaServer *metaserver.MetaServer,
52+
_ metacache.MetaCache,
53+
) (plugin.SysAdvisorPlugin, error) {
54+
emitter := emitterPool.GetDefaultMetricsEmitter()
55+
56+
realtimeOvercommitmentAdvisor := realtime.NewRealtimeOvercommitmentAdvisor(conf, metaServer, emitter)
57+
58+
overcommitRatioReporter, err := reporter.NewOvercommitRatioReporter(emitter, conf, realtimeOvercommitmentAdvisor, metaServer)
59+
if err != nil {
60+
return nil, err
61+
}
62+
63+
op := &OvercommitmentAwarePlugin{
64+
name: pluginName,
65+
66+
realtimeAdvisor: realtimeOvercommitmentAdvisor,
67+
reporter: overcommitRatioReporter,
68+
}
69+
70+
return op, nil
71+
}
72+
73+
func (op *OvercommitmentAwarePlugin) Run(ctx context.Context) {
74+
go op.realtimeAdvisor.Run(ctx)
75+
76+
go op.reporter.Run(ctx)
77+
}
78+
79+
func (op *OvercommitmentAwarePlugin) Name() string {
80+
return op.name
81+
}
82+
83+
func (op *OvercommitmentAwarePlugin) Init() error {
84+
return nil
85+
}

0 commit comments

Comments
 (0)