Skip to content

Commit 95890f0

Browse files
cheina97adamjensenbot
authored andcommitted
feat: fabric and gateway health probe
1 parent 44f3886 commit 95890f0

File tree

11 files changed

+139
-8
lines changed

11 files changed

+139
-8
lines changed

cmd/fabric/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"sigs.k8s.io/controller-runtime/pkg/cache"
3232
"sigs.k8s.io/controller-runtime/pkg/client"
3333
"sigs.k8s.io/controller-runtime/pkg/client/config"
34+
"sigs.k8s.io/controller-runtime/pkg/healthz"
3435
"sigs.k8s.io/controller-runtime/pkg/log"
3536
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
3637

@@ -142,6 +143,14 @@ func run(cmd *cobra.Command, _ []string) error {
142143
return fmt.Errorf("unable to create manager: %w", err)
143144
}
144145

146+
// Register the healthiness probes.
147+
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
148+
return fmt.Errorf("unable to set up healthz probe: %w", err)
149+
}
150+
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
151+
return fmt.Errorf("unable to set up readyz probe: %w", err)
152+
}
153+
145154
gwr, err := sourcedetector.NewGatewayReconciler(
146155
mgr.GetClient(),
147156
mgr.GetScheme(),

cmd/gateway/geneve/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"k8s.io/klog/v2"
2626
ctrl "sigs.k8s.io/controller-runtime"
2727
"sigs.k8s.io/controller-runtime/pkg/client/config"
28+
"sigs.k8s.io/controller-runtime/pkg/healthz"
2829
"sigs.k8s.io/controller-runtime/pkg/log"
2930
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
3031

@@ -95,6 +96,14 @@ func run(cmd *cobra.Command, _ []string) error {
9596
return fmt.Errorf("unable to create manager: %w", err)
9697
}
9798

99+
// Register the healthiness probes.
100+
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
101+
return fmt.Errorf("unable to set up healthz probe: %w", err)
102+
}
103+
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
104+
return fmt.Errorf("unable to set up readyz probe: %w", err)
105+
}
106+
98107
inr, err := geneve.NewInternalNodeReconciler(
99108
mgr.GetClient(),
100109
mgr.GetScheme(),

cmd/gateway/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
ctrl "sigs.k8s.io/controller-runtime"
3030
"sigs.k8s.io/controller-runtime/pkg/client"
3131
"sigs.k8s.io/controller-runtime/pkg/client/config"
32+
"sigs.k8s.io/controller-runtime/pkg/healthz"
3233
"sigs.k8s.io/controller-runtime/pkg/log"
3334
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
3435

@@ -160,6 +161,14 @@ func run(cmd *cobra.Command, _ []string) error {
160161
return fmt.Errorf("unable to create manager: %w", err)
161162
}
162163

164+
// Register the healthiness probes.
165+
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
166+
return fmt.Errorf("unable to set up healthz probe: %w", err)
167+
}
168+
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
169+
return fmt.Errorf("unable to set up readyz probe: %w", err)
170+
}
171+
163172
if connoptions.EnableConnectionController {
164173
// Setup the connection controller.
165174
connr, err := connection.NewConnectionsReconciler(

cmd/gateway/wireguard/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"sigs.k8s.io/controller-runtime/pkg/cache"
3030
"sigs.k8s.io/controller-runtime/pkg/client/config"
3131
"sigs.k8s.io/controller-runtime/pkg/event"
32+
"sigs.k8s.io/controller-runtime/pkg/healthz"
3233
"sigs.k8s.io/controller-runtime/pkg/log"
3334
"sigs.k8s.io/controller-runtime/pkg/metrics"
3435
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
@@ -107,6 +108,14 @@ func run(cmd *cobra.Command, _ []string) error {
107108
return fmt.Errorf("unable to create manager: %w", err)
108109
}
109110

111+
// Register the healthiness probes.
112+
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
113+
return fmt.Errorf("unable to set up healthz probe: %w", err)
114+
}
115+
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
116+
return fmt.Errorf("unable to set up readyz probe: %w", err)
117+
}
118+
110119
// Setup the controller.
111120
pkr, err := wireguard.NewPublicKeysReconciler(
112121
mgr.GetClient(),

deployments/liqo/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@
8686
| networking.enabled | bool | `true` | Use the default Liqo networking module. |
8787
| networking.fabric.config.fullMasquerade | bool | `false` | Enabe/Disable the full masquerade mode for the fabric pod. It means that all traffic will be masquerade using the first external cidr IP, instead of using the pod IP. Full masquerade is useful when the cluster nodeports uses a PodCIDR IP to masqerade the incoming traffic. IMPORTANT: Please consider that enabling this feature will masquerade the source IP of traffic towards a remote cluster, making impossible for a pod that receives the traffic to know the original source IP. |
8888
| networking.fabric.config.gatewayMasqueradeBypass | bool | `false` | Enable/Disable the masquerade bypass for the gateway pods. It means that the packets from gateway pods will not be masqueraded from the host where the pod is scheduled. This is useful in scenarios where CNIs masquerade the traffic from pod to nodes. For example this is required when using the Azure CNI or Kindnet. |
89+
| networking.fabric.config.healthProbeBindAddressPort | string | `"8081"` | Set the port where the fabric pod will expose the health probe. To disable the health probe, set the port to 0. |
90+
| networking.fabric.config.metricsAddressPort | string | `"8082"` | Set the port where the fabric pod will expose the metrics. To disable the metrics, set the port to 0. |
8991
| networking.fabric.config.nftablesMonitor | bool | `true` | Enable/Disable the nftables monitor for the fabric pod. It means that the fabric pod will monitor the nftables rules and will restore them in case of changes. In some cases (like K3S), this monitor can cause a huge amount of CPU usage. If you are experiencing high CPU usage, you can disable this feature. |
9092
| networking.fabric.image.name | string | `"ghcr.io/liqotech/fabric"` | Image repository for the fabric pod. |
9193
| networking.fabric.image.version | string | `""` | Custom version for the fabric image. If not specified, the global tag is used. |

deployments/liqo/templates/liqo-fabric-daemonset.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ spec:
4545
- --podname=$(POD_NAME)
4646
- --nodename=$(NODE_NAME)
4747
- --geneve-port={{ .Values.networking.genevePort }}
48+
- --health-probe-bind-address=:{{ .Values.networking.fabric.config.healthProbeBindAddressPort}}
49+
- --metrics-address=:{{ .Values.networking.fabric.config.metricsAddressPort}}
4850
{{- if not .Values.requirements.kernel.enabled }}
4951
- --disable-kernel-version-check
5052
{{- end }}
@@ -79,6 +81,16 @@ spec:
7981
valueFrom:
8082
fieldRef:
8183
fieldPath: metadata.name
84+
{{- if and .Values.networking.fabric.config.healthProbeBindAddressPort (ne .Values.networking.fabric.config.healthProbeBindAddressPort "0") }}
85+
ports:
86+
- name: healthz
87+
containerPort: {{ .Values.networking.fabric.config.healthProbeBindAddressPort }}
88+
protocol: TCP
89+
readinessProbe:
90+
httpGet:
91+
path: /readyz
92+
port: healthz
93+
{{- end }}
8294
hostNetwork: true
8395
{{- if .Values.networking.fabric.pod.priorityClassName }}
8496
priorityClassName: {{ .Values.networking.fabric.pod.priorityClassName }}

deployments/liqo/templates/liqo-wireguard-gateway-client-template.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ spec:
7979
- containerPort: 8082
8080
name: gw-metrics
8181
{{- end }}
82+
- containerPort: 8083
83+
name: healthz
84+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
85+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
86+
#readinessProbe:
87+
# httpGet:
88+
# path: /readyz
89+
# port: healthz
8290
env:
8391
- name: NODE_NAME
8492
valueFrom:
@@ -117,6 +125,14 @@ spec:
117125
- containerPort: 8084
118126
name: wg-metrics
119127
{{- end }}
128+
- containerPort: 8085
129+
name: healthz
130+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
131+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
132+
#readinessProbe:
133+
# httpGet:
134+
# path: /readyz
135+
# port: healthz
120136
securityContext:
121137
capabilities:
122138
add:
@@ -155,6 +171,14 @@ spec:
155171
- containerPort: 8086
156172
name: gv-metrics
157173
{{- end }}
174+
- containerPort: 8087
175+
name: healthz
176+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
177+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
178+
#readinessProbe:
179+
# httpGet:
180+
# path: /readyz
181+
# port: healthz
158182
env:
159183
- name: NODE_NAME
160184
valueFrom:

deployments/liqo/templates/liqo-wireguard-gateway-server-template-eks.yaml

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ spec:
106106
- containerPort: 8082
107107
name: gw-metrics
108108
{{- end }}
109+
ports:
110+
- containerPort: 8083
111+
name: healthz
112+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
113+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
114+
#readinessProbe:
115+
# httpGet:
116+
# path: /readyz
117+
# port: healthz
109118
env:
110119
- name: NODE_NAME
111120
valueFrom:
@@ -143,6 +152,15 @@ spec:
143152
- containerPort: 8084
144153
name: wg-metrics
145154
{{- end }}
155+
ports:
156+
- containerPort: 8085
157+
name: healthz
158+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
159+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
160+
#readinessProbe:
161+
# httpGet:
162+
# path: /readyz
163+
# port: healthz
146164
securityContext:
147165
capabilities:
148166
add:
@@ -172,7 +190,7 @@ spec:
172190
{{- if .Values.metrics.enabled }}
173191
- --metrics-address=:8086
174192
{{- end }}
175-
- --health-probe-bind-address=:8086
193+
- --health-probe-bind-address=:8087
176194
volumeMounts:
177195
- name: ipc
178196
mountPath: /ipc
@@ -181,6 +199,15 @@ spec:
181199
- containerPort: 8086
182200
name: gv-metrics
183201
{{- end }}
202+
ports:
203+
- containerPort: 8087
204+
name: healthz
205+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
206+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
207+
#readinessProbe:
208+
# httpGet:
209+
# path: /readyz
210+
# port: healthz
184211
env:
185212
- name: NODE_NAME
186213
valueFrom:

deployments/liqo/templates/liqo-wireguard-gateway-server-template.yaml

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ spec:
8080
{{- include "liqo.concatenateMap" $d | nindent 16 }}
8181
{{- end }}
8282
{{- if .Values.metrics.enabled }}
83-
- --metrics-address=:8084
83+
- --metrics-address=:8082
8484
{{- end }}
85-
- --health-probe-bind-address=:8085
85+
- --health-probe-bind-address=:8083
8686
- --ping-enabled=true
8787
- --ping-loss-threshold={{ .Values.networking.gatewayTemplates.ping.lossThreshold }}
8888
- --ping-interval={{ .Values.networking.gatewayTemplates.ping.interval }}
@@ -96,9 +96,17 @@ spec:
9696
mountPath: /ipc
9797
{{- if .Values.metrics.enabled }}
9898
ports:
99-
- containerPort: 8084
99+
- containerPort: 8082
100100
name: gw-metrics
101101
{{- end }}
102+
- containerPort: 8083
103+
name: healthz
104+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
105+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
106+
#readinessProbe:
107+
# httpGet:
108+
# path: /readyz
109+
# port: healthz
102110
env:
103111
- name: NODE_NAME
104112
valueFrom:
@@ -127,15 +135,23 @@ spec:
127135
- --mtu={{"{{ .Spec.MTU }}"}}
128136
- --listen-port={{"{{ .Spec.Endpoint.Port }}"}}
129137
{{- if .Values.metrics.enabled }}
130-
- --metrics-address=:8082
138+
- --metrics-address=:8084
131139
{{- end }}
132-
- --health-probe-bind-address=:8083
140+
- --health-probe-bind-address=:8085
133141
- --implementation={{ .Values.networking.gatewayTemplates.wireguard.implementation }}
134142
{{- if .Values.metrics.enabled }}
135143
ports:
136-
- containerPort: 8082
144+
- containerPort: 8084
137145
name: wg-metrics
138146
{{- end }}
147+
- containerPort: 8085
148+
name: healthz
149+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
150+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
151+
#readinessProbe:
152+
# httpGet:
153+
# path: /readyz
154+
# port: healthz
139155
securityContext:
140156
capabilities:
141157
add:
@@ -174,6 +190,14 @@ spec:
174190
- containerPort: 8086
175191
name: gv-metrics
176192
{{- end }}
193+
- containerPort: 8087
194+
name: healthz
195+
# ATTENTION: uncomment the readinessProbe section if you are aware of the consequences.
196+
# If you have more replicas of the same gateway, the passive ones will not reach the ready state.
197+
#readinessProbe:
198+
# httpGet:
199+
# path: /readyz
200+
# port: healthz
177201
env:
178202
- name: NODE_NAME
179203
valueFrom:

deployments/liqo/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ networking:
124124
# In some cases (like K3S), this monitor can cause a huge amount of CPU usage.
125125
# If you are experiencing high CPU usage, you can disable this feature.
126126
nftablesMonitor: true
127+
# -- Set the port where the fabric pod will expose the health probe.
128+
# To disable the health probe, set the port to 0.
129+
healthProbeBindAddressPort: "8081"
130+
# -- Set the port where the fabric pod will expose the metrics.
131+
# To disable the metrics, set the port to 0.
132+
metricsAddressPort: "8082"
127133

128134
authentication:
129135
# -- Enable/Disable the authentication module.

0 commit comments

Comments
 (0)