katalyst-api/pkg/consts/qos.go at 6fcacd6c02d1360dcf9e7efd2dbf8060888da47b · kubewharf/katalyst-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*
Copyright 2022 The Katalyst Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package consts

import kubeschedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config"

type QoSLevel string

const (
	QoSLevelReclaimedCores QoSLevel = "reclaimed_cores"
	QoSLevelSharedCores    QoSLevel = "shared_cores"
	QoSLevelDedicatedCores QoSLevel = "dedicated_cores"
	QoSLevelSystemCores    QoSLevel = "system_cores"
)

// const variables for pod annotations about qos level
const (
	PodAnnotationQoSLevelKey = "katalyst.kubewharf.io/qos_level"

	PodAnnotationQoSLevelReclaimedCores = string(QoSLevelReclaimedCores)
	PodAnnotationQoSLevelSharedCores    = string(QoSLevelSharedCores)
	PodAnnotationQoSLevelDedicatedCores = string(QoSLevelDedicatedCores)
	PodAnnotationQoSLevelSystemCores    = string(QoSLevelSystemCores)
)

// const variables for pod annotations about qos level enhancement in memory
const (
	PodAnnotationMemoryEnhancementKey = "katalyst.kubewharf.io/memory_enhancement"

	// PodAnnotationMemoryEnhancementRssOverUseThreshold provides a mechanism to enable
	// the ability of overcommit for memory, and we will relay on this enhancement to ensure
	// memory protection if rss usage exceeds requests (based on this given ratio)
	PodAnnotationMemoryEnhancementRssOverUseThreshold = "rss_overuse_threshold"

	// PodAnnotationMemoryEnhancementNumaBinding provides a mechanism to enable numa-binding
	// for workload to provide more ultimate running performances.
	//
	// With PodAnnotationMemoryEnhancementNumaBinding but without PodAnnotationMemoryEnhancementNumaExclusive,
	// we have several constraints below:
	// 1. different workloads may still share the same numa
	//   - these workloads may still have contentions on memory bandwidth
	// 2. the request for pod can be settled in a single numa node
	//   - this to avoid complicated cross numa memory capacity/bandwidth control
	//
	// todo: this enhancement is only supported for dedicated-cores now,
	//  the community if to support shared-cores in the short future.
	PodAnnotationMemoryEnhancementNumaBinding       = "numa_binding"
	PodAnnotationMemoryEnhancementNumaBindingEnable = "true"

	// PodAnnotationMemoryEnhancementNumaExclusive provides a mechanism to enable numa-exclusive
	// for A SINGLE Pod to avoid contention on memory bandwidth and so on.
	//
	// - this enhancement is only supported for dedicated-cores, for now and foreseeable future
	PodAnnotationMemoryEnhancementNumaExclusive       = "numa_exclusive"
	PodAnnotationMemoryEnhancementNumaExclusiveEnable = "true"

	// PodAnnotationMemoryEnhancementOOMPriority provides a mechanism to specify
	// the OOM priority for pods. Higher priority values indicate a higher likelihood
	// of surviving OOM events.
	//
	// For different QoS levels, the acceptable value ranges are as follows:
	// - reclaimed_cores: [-100, 0)
	// - shared_cores: [0, 100)
	// - dedicated_cores: [100, 200)
	// - system_cores: [200, 300)
	// Additionally, there are two predefined values for any pod:
	// - -300: Indicates that the OOM priority is ignored, and the pod does not
	//   participate in priority comparison.
	// - 300: Indicates that the OOM priority is set to the highest level, the pod
	//   will never be terminated due to OOM events from the perspective of OOM enhancement
	PodAnnotationMemoryEnhancementOOMPriority = "oom_priority"
)

// const variables for pod annotations about qos level enhancement in cpu
const (
	PodAnnotationCPUEnhancementKey = "katalyst.kubewharf.io/cpu_enhancement"

	// PodAnnotationCPUEnhancementCPUSet provides a mechanism separate cpuset into
	// several orthogonal pools to avoid cpu contentions for different types of workloads,
	// i.e. spark batch, flink streaming, web service may fall into three pools.
	// and, each individual pod should be put into only one pool.
	//
	// - this enhancement is only supported for shared-cores, for now and foreseeable future
	// - all pods will be settled in `default` pool if not specified
	PodAnnotationCPUEnhancementCPUSet = "cpuset_pool"

	// PodAnnotationCPUEnhancementSuppressionToleranceRate provides a mechanism to ensure
	// the quality for reclaimed resources. since reclaimed resources will always change
	// dynamically according to running states of none-reclaimed services, it may reach to
	// a point that the resource contention is still be tolerable for none-reclaimed services,
	// but the reclaimed services runs too slow and would rather be killed and rescheduled.
	// in this case, the workload can use this enhancement to trigger eviction.
	//
	// - this enhancement is only supported for shared-cores, for now and foreseeable future
	PodAnnotationCPUEnhancementSuppressionToleranceRate = "suppression_tolerance_rate"

	// PodAnnotationCPUEnhancementCPUBurstPolicy indicates which policy to enable the cpu burst feature.
	// Optional values: none/static/dynamic
	// Default value: none
	PodAnnotationCPUEnhancementCPUBurstPolicy = "cpu_burst_policy"

	// PodAnnotationCPUEnhancementCPUBurstPolicyNone is the value of cpu burst policy none
	PodAnnotationCPUEnhancementCPUBurstPolicyNone = "none"

	// PodAnnotationCPUEnhancementCPUBurstPolicyStatic is the value of cpu burst policy static
	PodAnnotationCPUEnhancementCPUBurstPolicyStatic = "static"

	// PodAnnotationCPUEnhancementCPUBurstPolicyDynamic is the value of cpu burst policy dynamic
	PodAnnotationCPUEnhancementCPUBurstPolicyDynamic = "dynamic"

	// PodAnnotationCPUEnhancementCPUBurstPercent identifies the upper limit of the allowed burst percent
	PodAnnotationCPUEnhancementCPUBurstPercent = "cpu_burst_percent"
)

// const variables for pod annotations about qos level enhancement in network
const (
	PodAnnotationNetworkEnhancementKey = "katalyst.kubewharf.io/network_enhancement"

	// PodAnnotationNetworkEnhancementNamespaceType provides a mechanism to select nic in different namespaces
	// - PodAnnotationNetworkEnhancementNamespaceTypeHost
	//   - only select nic device in host namespace
	//   - admit failed if not possible
	// - PodAnnotationNetworkEnhancementNamespaceTypeHostPrefer
	//   - prefer tp select nic device in non-host namespace
	//   - also accept nic device in non-host namespace if not possible
	// - PodAnnotationNetworkEnhancementNamespaceTypeNotHost
	//   - only select nic device in non-host namespace
	//   - admit failed if not possible
	// - PodAnnotationNetworkEnhancementNamespaceTypeNotHostPrefer
	//   - only select nic device in non-host namespace
	//	 - also accept nic device in host namespace if not possible
	PodAnnotationNetworkEnhancementNamespaceType              = "namespace_type"
	PodAnnotationNetworkEnhancementNamespaceTypeHost          = "host_ns"
	PodAnnotationNetworkEnhancementNamespaceTypeHostPrefer    = "host_ns_preferred"
	PodAnnotationNetworkEnhancementNamespaceTypeNotHost       = "anti_host_ns"
	PodAnnotationNetworkEnhancementNamespaceTypeNotHostPrefer = "anti_host_ns_preferred"

	// PodAnnotationNetworkEnhancementAffinityRestricted sets as true to indicate
	// we must ensure the numa affinity for nic devices, and we should admit failed if not possible
	PodAnnotationNetworkEnhancementAffinityRestricted     = "topology_affinity_restricted"
	PodAnnotationNetworkEnhancementAffinityRestrictedTrue = "true"
)

// ResourcePluginPolicyName is a string type for QosResourceManager plugin policy
type ResourcePluginPolicyName string

// const variables for QRM plugin policy name
const (
	// ResourcePluginPolicyNameDynamic is the name of the dynamic policy.
	ResourcePluginPolicyNameDynamic ResourcePluginPolicyName = "dynamic"
	// ResourcePluginPolicyNameNative is the name of the native policy.
	ResourcePluginPolicyNameNative ResourcePluginPolicyName = "native"
	// ResourcePluginPolicyNameStatic is the name of the static policy.
	ResourcePluginPolicyNameStatic ResourcePluginPolicyName = "static"
)

// const variables for node resource topology scoring strategy
const (
	// BalancedAllocation strategy favors nodes with balanced resource usage rate
	BalancedAllocation kubeschedulerconfig.ScoringStrategyType = "BalancedAllocation"
	// LeastNUMANodes strategy favors nodes which requires least amount of NUMA nodes to satisfy resource requests for given pod
	LeastNUMANodes kubeschedulerconfig.ScoringStrategyType = "LeastNUMANodes"
)