Skip to content

Commit ecb151d

Browse files
committed
Add CRD for SelfNodeRemediationConfig
1 parent 97698cb commit ecb151d

File tree

2 files changed

+187
-0
lines changed

2 files changed

+187
-0
lines changed
2.08 KB
Binary file not shown.
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
apiVersion: apiextensions.k8s.io/v1
2+
kind: CustomResourceDefinition
3+
metadata:
4+
annotations:
5+
controller-gen.kubebuilder.io/version: v0.14.0
6+
operatorframework.io/installed-alongside-9666ece3a3e945cb: operators/self-node-remediation.v0.9.0
7+
generation: 1
8+
labels:
9+
olm.managed: "true"
10+
operators.coreos.com/self-node-remediation.operators: ""
11+
self-node-remediation-operator: ""
12+
name: selfnoderemediationconfigs.self-node-remediation.medik8s.io
13+
spec:
14+
conversion:
15+
strategy: None
16+
group: self-node-remediation.medik8s.io
17+
names:
18+
kind: SelfNodeRemediationConfig
19+
listKind: SelfNodeRemediationConfigList
20+
plural: selfnoderemediationconfigs
21+
shortNames:
22+
- snrc
23+
- snrconfig
24+
singular: selfnoderemediationconfig
25+
scope: Namespaced
26+
versions:
27+
- name: v1alpha1
28+
schema:
29+
openAPIV3Schema:
30+
description: SelfNodeRemediationConfig is the Schema for the selfnoderemediationconfigs
31+
API in which a user can configure the self node remediation agents
32+
properties:
33+
apiVersion:
34+
description: |-
35+
APIVersion defines the versioned schema of this representation of an object.
36+
Servers should convert recognized schemas to the latest internal value, and
37+
may reject unrecognized values.
38+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
39+
type: string
40+
kind:
41+
description: |-
42+
Kind is a string value representing the REST resource this object represents.
43+
Servers may infer this from the endpoint the client submits requests to.
44+
Cannot be updated.
45+
In CamelCase.
46+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
47+
type: string
48+
metadata:
49+
type: object
50+
spec:
51+
description: SelfNodeRemediationConfigSpec defines the desired state of
52+
SelfNodeRemediationConfig
53+
properties:
54+
apiCheckInterval:
55+
default: 15s
56+
description: |-
57+
The frequency for api-server connectivity check.
58+
Valid time units are "ms", "s", "m", "h".
59+
the frequency for api-server connectivity check
60+
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
61+
type: string
62+
apiServerTimeout:
63+
default: 5s
64+
description: |-
65+
Timeout for each api-connectivity check.
66+
Valid time units are "ms", "s", "m", "h".
67+
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
68+
type: string
69+
customDsTolerations:
70+
description: CustomDsTolerations allows to add custom tolerations
71+
snr agents that are running on the ds in order to support remediation
72+
for different types of nodes.
73+
items:
74+
description: |-
75+
The pod this Toleration is attached to tolerates any taint that matches
76+
the triple <key,value,effect> using the matching operator <operator>.
77+
properties:
78+
effect:
79+
description: |-
80+
Effect indicates the taint effect to match. Empty means match all taint effects.
81+
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
82+
type: string
83+
key:
84+
description: |-
85+
Key is the taint key that the toleration applies to. Empty means match all taint keys.
86+
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
87+
type: string
88+
operator:
89+
description: |-
90+
Operator represents a key's relationship to the value.
91+
Valid operators are Exists and Equal. Defaults to Equal.
92+
Exists is equivalent to wildcard for value, so that a pod can
93+
tolerate all taints of a particular category.
94+
type: string
95+
tolerationSeconds:
96+
description: |-
97+
TolerationSeconds represents the period of time the toleration (which must be
98+
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
99+
it is not set, which means tolerate the taint forever (do not evict). Zero and
100+
negative values will be treated as 0 (evict immediately) by the system.
101+
format: int64
102+
type: integer
103+
value:
104+
description: |-
105+
Value is the taint value the toleration matches to.
106+
If the operator is Exists, the value should be empty, otherwise just a regular string.
107+
type: string
108+
type: object
109+
type: array
110+
endpointHealthCheckUrl:
111+
description: |-
112+
EndpointHealthCheckUrl is an url that self node remediation agents which run on control-plane node will try to access when they can't contact their peers.
113+
This is a part of self diagnostics which will decide whether the node should be remediated or not.
114+
It will be ignored when empty (which is the default).
115+
type: string
116+
hostPort:
117+
default: 30001
118+
description: HostPort is used for internal communication between SNR
119+
agents.
120+
minimum: 1
121+
type: integer
122+
isSoftwareRebootEnabled:
123+
default: true
124+
description: |-
125+
IsSoftwareRebootEnabled indicates whether self node remediation agent will do software reboot,
126+
if the watchdog device can not be used or will use watchdog only,
127+
without a fallback to software reboot.
128+
type: boolean
129+
maxApiErrorThreshold:
130+
default: 3
131+
description: After this threshold, the node will start contacting
132+
its peers.
133+
minimum: 1
134+
type: integer
135+
peerApiServerTimeout:
136+
default: 5s
137+
description: |-
138+
The timeout for api-server connectivity check.
139+
Valid time units are "ms", "s", "m", "h".
140+
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
141+
type: string
142+
peerDialTimeout:
143+
default: 5s
144+
description: |-
145+
Timeout for establishing connection to peer.
146+
Valid time units are "ms", "s", "m", "h".
147+
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
148+
type: string
149+
peerRequestTimeout:
150+
default: 5s
151+
description: |-
152+
Timeout for each peer request.
153+
Valid time units are "ms", "s", "m", "h".
154+
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
155+
type: string
156+
peerUpdateInterval:
157+
default: 15m
158+
description: |-
159+
The frequency for updating peers.
160+
Valid time units are "ms", "s", "m", "h".
161+
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
162+
type: string
163+
safeTimeToAssumeNodeRebootedSeconds:
164+
description: |-
165+
SafeTimeToAssumeNodeRebootedSeconds is the time after which the healthy self node remediation
166+
agents will assume the unhealthy node has been rebooted, and it is safe to recover affected workloads.
167+
This is extremely important as starting replacement Pods while they are still running on the failed
168+
node will likely lead to data corruption and violation of run-once semantics.
169+
In an effort to prevent this, the operator ignores values lower than a minimum calculated from the
170+
ApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields,
171+
and the unhealthy node's individual watchdog timeout.
172+
type: integer
173+
watchdogFilePath:
174+
default: /dev/watchdog
175+
description: WatchdogFilePath is the watchdog file path that should
176+
be available on each node, e.g. /dev/watchdog.
177+
type: string
178+
type: object
179+
status:
180+
description: SelfNodeRemediationConfigStatus defines the observed state
181+
of SelfNodeRemediationConfig
182+
type: object
183+
type: object
184+
served: true
185+
storage: true
186+
subresources:
187+
status: {}

0 commit comments

Comments
 (0)