Skip to content

Commit e166103

Browse files
committed
feat(*): support pod annotation verification
1 parent 8c1681c commit e166103

File tree

10 files changed

+263
-1
lines changed

10 files changed

+263
-1
lines changed

cmd/katalyst-agent/app/options/qrm/memory_plugin.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ type MemoryOptions struct {
4242
EnableNonBindingShareCoresMemoryResourceCheck bool
4343
EnableNUMAAllocationReactor bool
4444
NUMABindResultResourceAllocationAnnotationKey string
45+
EnableMemoryAnnotationValidator bool
46+
MemoryAnnotationValidatorDryRun bool
4547

4648
SockMemOptions
4749
LogCacheOptions
@@ -126,6 +128,8 @@ func NewMemoryOptions() *MemoryOptions {
126128
EnableNonBindingShareCoresMemoryResourceCheck: true,
127129
EnableNUMAAllocationReactor: false,
128130
NUMABindResultResourceAllocationAnnotationKey: consts.QRMResourceAnnotationKeyNUMABindResult,
131+
EnableMemoryAnnotationValidator: true,
132+
MemoryAnnotationValidatorDryRun: true,
129133
SockMemOptions: SockMemOptions{
130134
EnableSettingSockMem: false,
131135
SetGlobalTCPMemRatio: 20, // default: 20% * {host total memory}
@@ -187,6 +191,10 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) {
187191
o.EnableNUMAAllocationReactor, "enable numa allocation reactor for numa binding pods to patch pod numa binding result annotation")
188192
fs.StringVar(&o.NUMABindResultResourceAllocationAnnotationKey, "numa-bind-result-resource-allocation-annotation-key",
189193
o.NUMABindResultResourceAllocationAnnotationKey, "the key of numa bind result resource allocation annotation")
194+
fs.BoolVar(&o.EnableMemoryAnnotationValidator, "enable-memory-annotation-validator",
195+
o.EnableMemoryAnnotationValidator, "enable the memory annotation validator for pods already have memory allocated by runtime")
196+
fs.BoolVar(&o.MemoryAnnotationValidatorDryRun, "memory-annotation-validator-dry-run",
197+
o.MemoryAnnotationValidatorDryRun, "enable the dry run mode for memory annotation validator")
190198
fs.StringVar(&o.OOMPriorityPinnedMapAbsPath, "oom-priority-pinned-bpf-map-path",
191199
o.OOMPriorityPinnedMapAbsPath, "the absolute path of oom priority pinned bpf map")
192200
fs.BoolVar(&o.EnableSettingSockMem, "enable-setting-sockmem",
@@ -248,6 +256,8 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
248256
conf.EnableOOMPriority = o.EnableOOMPriority
249257
conf.EnableNonBindingShareCoresMemoryResourceCheck = o.EnableNonBindingShareCoresMemoryResourceCheck
250258
conf.EnableNUMAAllocationReactor = o.EnableNUMAAllocationReactor
259+
conf.EnableMemoryAnnotationValidator = o.EnableMemoryAnnotationValidator
260+
conf.MemoryAnnotationValidatorDryRun = o.MemoryAnnotationValidatorDryRun
251261
conf.NUMABindResultResourceAllocationAnnotationKey = o.NUMABindResultResourceAllocationAnnotationKey
252262
conf.OOMPriorityPinnedMapAbsPath = o.OOMPriorityPinnedMapAbsPath
253263
conf.EnableSettingSockMem = o.EnableSettingSockMem

cmd/katalyst-agent/app/options/qrm/network_plugin.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ type NetworkOptions struct {
4040
NetBandwidthResourceAllocationAnnotationKey string
4141
NICHealthCheckers []string
4242
EnableNICAllocationReactor bool
43+
EnableNICAnnotationValidator bool
44+
AnnotationValidatorDryRun bool
4345
}
4446

4547
type NetClassOptions struct {
@@ -69,6 +71,8 @@ func NewNetworkOptions() *NetworkOptions {
6971
NetClassIDResourceAllocationAnnotationKey: "qrm.katalyst.kubewharf.io/netcls_id",
7072
NetBandwidthResourceAllocationAnnotationKey: "qrm.katalyst.kubewharf.io/net_bandwidth",
7173
EnableNICAllocationReactor: true,
74+
EnableNICAnnotationValidator: true,
75+
AnnotationValidatorDryRun: true,
7276
NICHealthCheckers: []string{"*"},
7377
}
7478
}
@@ -112,6 +116,10 @@ func (o *NetworkOptions) AddFlags(fss *cliflag.NamedFlagSets) {
112116
o.NetBandwidthResourceAllocationAnnotationKey, "The annotation key of allocated bandwidth for the container, which is ready by runtime")
113117
fs.BoolVar(&o.EnableNICAllocationReactor, "enable-network-resource-plugin-nic-allocation-reactor",
114118
o.EnableNICAllocationReactor, "enable network allocation reactor, default is true")
119+
fs.BoolVar(&o.EnableNICAnnotationValidator, "enable-network-resource-plugin-nic-annotation-validator",
120+
o.EnableNICAnnotationValidator, "enable network annotation validator, default is true")
121+
fs.BoolVar(&o.AnnotationValidatorDryRun, "network-resource-plugin-annotation-validator-dry-run",
122+
o.AnnotationValidatorDryRun, "enable the dry run mode for network annotation validator, default is true")
115123
fs.StringSliceVar(&o.NICHealthCheckers, "network-resource-plugin-nic-health-checkers",
116124
o.NICHealthCheckers, "list of nic health checkers, '*' run all on-by-default checkers,"+
117125
"'ip' run checker 'ip', '-ip' not run checker 'ip'")
@@ -136,7 +144,9 @@ func (o *NetworkOptions) ApplyTo(conf *qrmconfig.NetworkQRMPluginConfig) error {
136144
conf.NetClassIDResourceAllocationAnnotationKey = o.NetClassIDResourceAllocationAnnotationKey
137145
conf.NetBandwidthResourceAllocationAnnotationKey = o.NetBandwidthResourceAllocationAnnotationKey
138146
conf.EnableNICAllocationReactor = o.EnableNICAllocationReactor
147+
conf.EnableNICAnnotationValidator = o.EnableNICAnnotationValidator
139148
conf.NICHealthCheckers = o.NICHealthCheckers
149+
conf.NICAnnotationValidatorDryRun = o.AnnotationValidatorDryRun
140150

141151
return nil
142152
}

pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,14 @@ import (
4343
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/oom"
4444
memoryreactor "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/reactor"
4545
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/state"
46+
memoryvalidator "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/validator"
4647
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/fragmem"
4748
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/hostwatermark"
4849
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/logcache"
4950
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/sockmem"
5051
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
5152
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/reactor"
53+
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/validator"
5254
"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/featuregatenegotiation"
5355
"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler"
5456
"github.com/kubewharf/katalyst-core/pkg/config"
@@ -165,6 +167,9 @@ type DynamicPolicy struct {
165167

166168
numaAllocationReactor reactor.AllocationReactor
167169
numaBindResultResourceAllocationAnnotationKey string
170+
171+
memoryAnnotationValidator validator.AnnotationValidator
172+
annotationValidatorDryRun bool
168173
}
169174

170175
func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration,
@@ -237,6 +242,7 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration
237242
resctrlHinter: newResctrlHinter(&conf.ResctrlConfig, wrappedEmitter),
238243
enableNonBindingShareCoresMemoryResourceCheck: conf.EnableNonBindingShareCoresMemoryResourceCheck,
239244
numaBindResultResourceAllocationAnnotationKey: conf.NUMABindResultResourceAllocationAnnotationKey,
245+
annotationValidatorDryRun: conf.NICAnnotationValidatorDryRun,
240246
}
241247

242248
policyImplement.allocationHandlers = map[string]util.AllocationHandler{
@@ -298,6 +304,13 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration
298304
))
299305
}
300306

307+
policyImplement.memoryAnnotationValidator = validator.DummyAnnotationValidator{}
308+
if conf.EnableMemoryAnnotationValidator {
309+
policyImplement.memoryAnnotationValidator = memoryvalidator.NewMemoryAnnotationValidator(conf,
310+
agentCtx.Client.KubeClient,
311+
agentCtx.MetaServer.PodFetcher)
312+
}
313+
301314
return true, &agent.PluginWrapper{GenericPlugin: pluginWrapper}, nil
302315
}
303316

@@ -920,6 +933,23 @@ func (p *DynamicPolicy) Allocate(ctx context.Context,
920933
// we should do it before GetKatalystQoSLevelFromResourceReq.
921934
isDebugPod := util.IsDebugPod(req.Annotations, p.podDebugAnnoKeys)
922935

936+
valid, err := p.memoryAnnotationValidator.ValidatePodAnnotation(ctx, req.PodUid, req.PodNamespace, req.PodName)
937+
if !isDebugPod || !valid || err != nil {
938+
general.Warningf("pod annotations verification failed: %v", err)
939+
940+
metricTags := []metrics.MetricTag{
941+
{Key: "pod_uid", Val: req.PodUid},
942+
{Key: "pod_namespace", Val: req.PodNamespace},
943+
{Key: "pod_name", Val: req.PodName},
944+
{Key: "error_message", Val: metric.MetricTagValueFormat(err)},
945+
}
946+
p.emitter.StoreInt64(util.MetricNamePodAnnotationVerificationFailed, 1, metrics.MetricTypeNameRaw, metricTags...)
947+
948+
if !p.annotationValidatorDryRun {
949+
return nil, fmt.Errorf("pod annotations verification failed: %v", err)
950+
}
951+
}
952+
923953
existReallocAnno, isReallocation := util.IsReallocation(req.Annotations)
924954

925955
qosLevel, err := util.GetKatalystQoSLevelFromResourceReq(p.qosConfig, req, p.podAnnotationKeptKeys, p.podLabelKeptKeys)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package validator
18+
19+
import (
20+
"context"
21+
22+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
"k8s.io/client-go/kubernetes"
24+
25+
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
26+
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/validator"
27+
"github.com/kubewharf/katalyst-core/pkg/config"
28+
"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod"
29+
)
30+
31+
type memoryAnnotationValidator struct {
32+
conf *config.Configuration
33+
client kubernetes.Interface
34+
podFetcher pod.PodFetcher
35+
}
36+
37+
func NewMemoryAnnotationValidator(conf *config.Configuration, client kubernetes.Interface, podFetcher pod.PodFetcher) validator.AnnotationValidator {
38+
return &memoryAnnotationValidator{
39+
conf: conf,
40+
client: client,
41+
podFetcher: podFetcher,
42+
}
43+
}
44+
45+
func (n *memoryAnnotationValidator) ValidatePodAnnotation(ctx context.Context, podUID, podNameSpace, podName string) (bool, error) {
46+
annoList := map[string]string{
47+
util.AnnotationRdtClosID: "",
48+
util.AnnotationRdtNeedPodMonGroups: "",
49+
}
50+
51+
getPod, err := n.podFetcher.GetPod(context.WithValue(ctx, pod.BypassCacheKey, pod.BypassCacheTrue), podUID)
52+
if err != nil {
53+
getPod, err = n.client.CoreV1().Pods(podNameSpace).Get(ctx, podName, metav1.GetOptions{ResourceVersion: "0"})
54+
if err != nil {
55+
return false, err
56+
}
57+
}
58+
59+
return validator.ValidatePodAnnotations(getPod, annoList)
60+
}

pkg/agent/qrm-plugins/network/staticpolicy/policy.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ import (
4040
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network/state"
4141
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network/staticpolicy/nic"
4242
networkreactor "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network/staticpolicy/reactor"
43+
networkvalidator "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network/staticpolicy/validator"
4344
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
4445
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/reactor"
46+
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/validator"
4547
"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler"
4648
"github.com/kubewharf/katalyst-core/pkg/config"
4749
agentconfig "github.com/kubewharf/katalyst-core/pkg/config/agent"
@@ -109,7 +111,8 @@ type StaticPolicy struct {
109111

110112
lowPriorityGroups map[string]*qrmgeneral.NetworkGroup
111113

112-
nicAllocationReactor reactor.AllocationReactor
114+
nicAllocationReactor reactor.AllocationReactor
115+
nicAnnotationValidator validator.AnnotationValidator
113116

114117
nicManager nic.NICManager
115118

@@ -188,6 +191,13 @@ func NewStaticPolicy(agentCtx *agent.GenericContext, conf *config.Configuration,
188191
))
189192
}
190193

194+
policyImplement.nicAnnotationValidator = validator.DummyAnnotationValidator{}
195+
if conf.EnableNICAnnotationValidator {
196+
policyImplement.nicAnnotationValidator = networkvalidator.NewNICAnnotationValidator(conf,
197+
agentCtx.Client.KubeClient,
198+
agentCtx.MetaServer.PodFetcher)
199+
}
200+
191201
pluginWrapper, err := skeleton.NewRegistrationPluginWrapper(policyImplement, conf.QRMPluginSocketDirs,
192202
func(key string, value int64) {
193203
_ = wrappedEmitter.StoreInt64(key, value, metrics.MetricTypeNameRaw)
@@ -668,6 +678,23 @@ func (p *StaticPolicy) Allocate(ctx context.Context,
668678
return nil, fmt.Errorf("Allocate got nil req")
669679
}
670680

681+
valid, err := p.nicAnnotationValidator.ValidatePodAnnotation(ctx, req.PodUid, req.PodNamespace, req.PodName)
682+
if !valid || err != nil {
683+
general.Warningf("pod annotations verification failed: %v", err)
684+
685+
metricTags := []metrics.MetricTag{
686+
{Key: "pod_uid", Val: req.PodUid},
687+
{Key: "pod_namespace", Val: req.PodNamespace},
688+
{Key: "pod_name", Val: req.PodName},
689+
{Key: "error_message", Val: metric.MetricTagValueFormat(err)},
690+
}
691+
p.emitter.StoreInt64(util.MetricNamePodAnnotationVerificationFailed, 1, metrics.MetricTypeNameRaw, metricTags...)
692+
693+
if !p.qrmConfig.NICAnnotationValidatorDryRun {
694+
return nil, fmt.Errorf("pod annotations verification failed: %v", err)
695+
}
696+
}
697+
671698
existReallocAnno, isReallocation := util.IsReallocation(req.Annotations)
672699

673700
// since qos config util will filter out annotation keys not related to katalyst QoS,
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package validator
18+
19+
import (
20+
"context"
21+
22+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
"k8s.io/client-go/kubernetes"
24+
25+
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util/validator"
26+
"github.com/kubewharf/katalyst-core/pkg/config"
27+
"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod"
28+
)
29+
30+
type nicAnnotationValidator struct {
31+
conf *config.Configuration
32+
client kubernetes.Interface
33+
podFetcher pod.PodFetcher
34+
}
35+
36+
func NewNICAnnotationValidator(conf *config.Configuration, client kubernetes.Interface, podFetcher pod.PodFetcher) validator.AnnotationValidator {
37+
return &nicAnnotationValidator{
38+
conf: conf,
39+
client: client,
40+
podFetcher: podFetcher,
41+
}
42+
}
43+
44+
func (n *nicAnnotationValidator) ValidatePodAnnotation(ctx context.Context, podUID, podNameSpace, podName string) (bool, error) {
45+
annoList := map[string]string{
46+
n.conf.NetworkQRMPluginConfig.IPv4ResourceAllocationAnnotationKey: "",
47+
n.conf.NetworkQRMPluginConfig.IPv6ResourceAllocationAnnotationKey: "",
48+
n.conf.NetworkQRMPluginConfig.NetNSPathResourceAllocationAnnotationKey: "",
49+
n.conf.NetworkQRMPluginConfig.NetInterfaceNameResourceAllocationAnnotationKey: "",
50+
n.conf.NetworkQRMPluginConfig.NetClassIDResourceAllocationAnnotationKey: "",
51+
n.conf.NetworkQRMPluginConfig.NetBandwidthResourceAllocationAnnotationKey: "",
52+
}
53+
54+
getPod, err := n.podFetcher.GetPod(context.WithValue(ctx, pod.BypassCacheKey, pod.BypassCacheTrue), podUID)
55+
if err != nil {
56+
getPod, err = n.client.CoreV1().Pods(podNameSpace).Get(ctx, podName, metav1.GetOptions{ResourceVersion: "0"})
57+
if err != nil {
58+
return false, err
59+
}
60+
}
61+
62+
return validator.ValidatePodAnnotations(getPod, annoList)
63+
}

pkg/agent/qrm-plugins/util/consts.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ const (
9595
MetricNameIrqTuningNicExclusiveIrqCoresCpuUtilMin = "irq_tuning_nic_exclusive_irq_cores_cpu_util_Min"
9696
MetricNameIrqTuningNicExclusiveIrqCoresCpuUsage = "irq_tuning_nic_exclusive_irq_cores_cpu_usage"
9797
MetricNameIrqTuningErr = "irq_tuning_err"
98+
99+
// metrics for pod annotation validator
100+
MetricNamePodAnnotationVerificationFailed = "pod_annotation_verification_failed"
98101
)
99102

100103
const (
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
Copyright 2022 The Katalyst Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package validator
18+
19+
import (
20+
"context"
21+
"fmt"
22+
23+
v1 "k8s.io/api/core/v1"
24+
)
25+
26+
type AnnotationValidator interface {
27+
ValidatePodAnnotation(ctx context.Context, podUID, podNameSpace, podName string) (bool, error)
28+
}
29+
30+
type DummyAnnotationValidator struct{}
31+
32+
func (d DummyAnnotationValidator) ValidatePodAnnotation(_ context.Context, _, _, _ string) (bool, error) {
33+
return true, nil
34+
}
35+
36+
// ValidatePodAnnotations validates the pod annotations
37+
// if any annotation is forbidden, it returns true and the annotation name
38+
// otherwise, it returns false and empty string
39+
func ValidatePodAnnotations(pod *v1.Pod, forbiddenAnnos map[string]string) (bool, error) {
40+
if pod == nil || pod.Annotations == nil {
41+
return true, nil
42+
}
43+
44+
for _, anno := range pod.Annotations {
45+
if _, ok := forbiddenAnnos[anno]; ok {
46+
return false, fmt.Errorf("the pod contains an invalid annotation: %s", anno)
47+
}
48+
}
49+
50+
return true, nil
51+
}

0 commit comments

Comments
 (0)