Skip to content

Commit a4e8f59

Browse files
naiming-zededaeriknordmark
authored andcommitted
add controller-driven kube-vip load balancer for K3S_BASE clusters
This PR implements controller-driven Kubernetes LoadBalancer services for CLUSTER_TYPE_K3S_BASE in eve-k. - pkg/pillar/types/clustertypes.go: Add LBInterfaceConfig (interface + CIDR string) and LBInterfaces []LBInterfaceConfig to both EdgeNodeClusterConfig and EdgeNodeClusterStatus. - pkg/pillar/cmd/zedagent/parseconfig.go: Parse LoadBalancerService from the controller proto and populate EdgeNodeClusterConfig.LBInterfaces (K3S_BASE only; first interface/CIDR entry applied). - pkg/pillar/cmd/zedkube/clusterstatus.go: Relay LBInterfaces from EdgeNodeClusterConfig into EdgeNodeClusterStatus on the bootstrap node only; non-bootstrap nodes publish an empty list so they do not trigger kube-vip setup. - pkg/pillar/dpcmanager/dns.go: Filter kube-vip VIPs out of DeviceNetworkStatus.AddrInfoList using the LBInterfaces CIDR range, preventing VIPs from being used as source addresses for controller-bound traffic. - pkg/kube/cluster-init.sh: Add check_kubevip_lb loop that reads EdgeNodeClusterStatus JSON each iteration and calls kubevip-apply.sh or kubevip-delete.sh when the LB config changes. Persists last-applied state to avoid redundant re-applies across restarts. - pkg/kube/kubevip-apply.sh / kubevip-delete.sh: Scripts to install/remove the kube-vip DaemonSet and kube-vip-cloud-provider Deployment, configuring the IP pool via a kubevip ConfigMap. - pkg/kube/kubevip-ds.yaml: kube-vip DaemonSet manifest (ARP mode, control-plane nodes). - pkg/kube/config.yaml: Disable k3s built-in ServiceLB (servicelb) and Traefik for K3S_BASE — kube-vip replaces ServiceLB; users bring their own ingress. - pkg/pillar/docs/zedkube.md: Document the feature with an overview diagram, data-flow, EVE-API proto, and DeviceNetworkStatus filtering notes. Signed-off-by: naiming-zededa <naiming@zededa.com>
1 parent 26330c5 commit a4e8f59

File tree

15 files changed

+505
-80
lines changed

15 files changed

+505
-80
lines changed

pkg/kube/cluster-init.sh

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,51 @@ monitor_cluster_config_change() {
847847
done
848848
}
849849

850+
# Persistent state file for kube-vip: stores "<iface> <cidr>" of the last
851+
# successfully applied config so that container/device restarts do not
852+
# trigger a redundant re-apply (which causes "configmaps kubevip already
853+
# exists" errors from kube-vip-cloud-provider).
854+
KUBEVIP_STATE_FILE="/var/lib/kubevip-applied"
855+
856+
# check_kubevip_lb is called once per main loop iteration. It reads the
857+
# EdgeNodeClusterStatus published by zedkube and applies or removes the kube-vip
858+
# load balancer configuration when LBInterfaces[0] changes. On the bootstrap
859+
# node this field is set by zedkube; on other nodes it is empty so no action
860+
# is taken. prev_lb_state is seeded from KUBEVIP_STATE_FILE on first call so
861+
# that restarts skip re-applying an already-configured kube-vip.
862+
prev_lb_state=""
863+
check_kubevip_lb() {
864+
# Seed in-memory state from the persistent file on first call.
865+
if [ -z "$prev_lb_state" ] && [ -f "$KUBEVIP_STATE_FILE" ]; then
866+
prev_lb_state=$(cat "$KUBEVIP_STATE_FILE")
867+
fi
868+
local lb_iface="" lb_cidr="" lb=""
869+
if [ -f "$enc_status_file" ]; then
870+
enc_data=$(cat "$enc_status_file")
871+
lb_iface=$(echo "$enc_data" | jq -r '.LBInterfaces[0].Interface // ""')
872+
lb_cidr=$(echo "$enc_data" | jq -r '.LBInterfaces[0].IPPrefix // ""')
873+
fi
874+
lb=$(printf '%s %s' "$lb_iface" "$lb_cidr" | xargs) # trim whitespace
875+
if [ "$lb" = "$prev_lb_state" ]; then
876+
return
877+
fi
878+
if [ -n "$lb_iface" ] && [ -n "$lb_cidr" ]; then
879+
logmsg "check_kubevip_lb: applying kube-vip with iface=$lb_iface cidr=$lb_cidr"
880+
if /usr/bin/kubevip-apply.sh "$lb_iface" "$lb_cidr"; then
881+
prev_lb_state="$lb"
882+
echo "$prev_lb_state" > "$KUBEVIP_STATE_FILE"
883+
fi
884+
elif [ -n "$prev_lb_state" ]; then
885+
logmsg "check_kubevip_lb: removing kube-vip"
886+
if /usr/bin/kubevip-delete.sh; then
887+
prev_lb_state="$lb"
888+
rm -f "$KUBEVIP_STATE_FILE"
889+
fi
890+
else
891+
prev_lb_state="$lb"
892+
fi
893+
}
894+
850895
# started when we detect registration addition
851896
# start cleaning up some components
852897
# these are cluster-wide operations, only one nodes initiates it
@@ -928,7 +973,7 @@ EOF
928973
elif [ $cluster_type -eq $CLUSTER_TYPE_REPLICATED_STORAGE ]; then
929974
cp "${KUBE_MANIFESTS_SRC_DIR}/${K3S_CONFIG_FILE_DISABLE_LOCAL_PATH}" "${K3S_CONFIG_DIR}/${K3S_CONFIG_FILE_DISABLE_LOCAL_PATH}"
930975
elif [ $cluster_type -eq $CLUSTER_TYPE_K3S_BASE ]; then
931-
rm "${K3S_CONFIG_DIR}/${K3S_CONFIG_FILE_DISABLE_LOCAL_PATH}"
976+
rm -f "${K3S_CONFIG_DIR}/${K3S_CONFIG_FILE_DISABLE_LOCAL_PATH}"
932977
else
933978
logmsg "possible unhandled cluster type $cluster_type in (provision_cluster_config_file)"
934979
fi
@@ -1408,6 +1453,7 @@ fi
14081453
check_log_file_size "containerd-user.log"
14091454
check_kubeconfig_yaml_files
14101455
check_and_remove_excessive_k3s_logs
1456+
check_kubevip_lb
14111457
wait_for_item "wait"
14121458
sleep 15
14131459
done

pkg/kube/config.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ etcd-expose-metrics: true
1212
container-runtime-endpoint: "/run/containerd-user/containerd.sock"
1313
disable-network-policy: true
1414
disable-cloud-controller: true
15+
disable:
16+
- servicelb
17+
- traefik
1518
kubelet-arg:
1619
- "node-status-update-frequency=2s"
1720
- "cpu-manager-policy=static"

pkg/kube/kubevip-apply.sh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
# kubevip-apply.sh
77
# This script creates a Kube-VIP ConfigMap with the specified interface and CIDR range
88
# and applies the necessary Kubernetes resources.
9-
# This script is for testing only, not for production use.
109

1110
# Function to display usage information
1211
show_usage() {
@@ -15,7 +14,6 @@ show_usage() {
1514
echo ""
1615
echo "This script creates a Kube-VIP ConfigMap with the specified interface and CIDR range"
1716
echo "and applies the necessary Kubernetes resources."
18-
echo "This script is for testing only, not for production use."
1917
}
2018

2119
# Check if we have two arguments
@@ -53,10 +51,8 @@ metadata:
5351
namespace: kube-system
5452
data:
5553
# Global settings for all LoadBalancer services
56-
cidr-default: "${CIDR}" # Default CIDR for LoadBalancer services
57-
cidr-global: "${CIDR}" # Default CIDR for LoadBalancer services
58-
interface-default: "${INTERFACE}" # All global LoadBalancer IPs will be advertised on ${INTERFACE}
59-
interface-global: "${INTERFACE}" # All global LoadBalancer IPs will be advertised on ${INTERFACE}
54+
cidr-global: "${CIDR}"
55+
interface-global: "${INTERFACE}"
6056
EOF
6157

6258
echo "Created Kube-VIP ConfigMap with interface ${INTERFACE} and CIDR ${CIDR}"

pkg/kube/kubevip-delete.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#
66
# kubevip-delete.sh
77
# This script deletes the Kube-VIP ConfigMap and associated resources.
8-
# This script is for testing only, not for production use.
98

109
if kubectl delete -f /etc/kubevip-ds.yaml && \
1110
kubectl delete -f /etc/kubevip-cm.yaml && \

pkg/kube/kubevip-ds.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ kind: DaemonSet
88
metadata:
99
labels:
1010
app.kubernetes.io/name: kube-vip-ds
11-
app.kubernetes.io/version: v0.8.9
11+
app.kubernetes.io/version: v1.1.0
1212
name: kube-vip-ds
1313
namespace: kube-system
1414
spec:
@@ -19,7 +19,7 @@ spec:
1919
metadata:
2020
labels:
2121
app.kubernetes.io/name: kube-vip-ds
22-
app.kubernetes.io/version: v0.8.9
22+
app.kubernetes.io/version: v1.1.0
2323
spec:
2424
affinity:
2525
nodeAffinity:
@@ -69,7 +69,7 @@ spec:
6969
value: "2"
7070
- name: prometheus_server
7171
value: 0.0.0.0:2112
72-
image: ghcr.io/kube-vip/kube-vip:v0.8.9
72+
image: ghcr.io/kube-vip/kube-vip:v1.1.0
7373
imagePullPolicy: IfNotPresent
7474
name: kube-vip
7575
resources: {}

pkg/kube/kubevirt-utils.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ Kubevirt_install() {
2424
# Uses a targeted merge patch on developerConfiguration.featureGates only, so that
2525
# permittedHostDevices (PCIe/GPU/USB passthrough) is never disturbed.
2626
Kubevirt_migrate_feature_gates() {
27+
if [ -f /var/lib/base-k3s-mode ]; then
28+
return 0
29+
fi
2730
current_gates=$(kubectl get kubevirt kubevirt -n kubevirt \
2831
-o jsonpath='{.spec.configuration.developerConfiguration.featureGates[*]}' 2>/dev/null)
2932
if echo "$current_gates" | tr ' ' '\n' | grep -qx "CPUManager"; then

pkg/pillar/cmd/zedagent/parseconfig.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3513,6 +3513,25 @@ func parseEdgeNodeClusterConfig(getconfigCtx *getconfigContext,
35133513
enClusterConfig.TieBreakerNodeID = types.UUIDandVersion{UUID: tieBreakerNodeID}
35143514
}
35153515

3516+
// Parse LoadBalancerService — only supported for ClusterTypeK3sBase clusters.
3517+
// Each proto interface contributes one LBInterfaceConfig entry using its first CIDR.
3518+
lbSvc := zcfgCluster.GetLoadBalancerService()
3519+
if lbSvc != nil && enClusterConfig.ClusterType == types.ClusterTypeK3sBase {
3520+
for _, iface := range lbSvc.GetInterfaces() {
3521+
ifName := iface.GetInterfaceName()
3522+
cidrs := iface.GetAddressCidrs()
3523+
if ifName == "" || len(cidrs) == 0 {
3524+
continue
3525+
}
3526+
if _, _, lbErr := net.ParseCIDR(cidrs[0]); lbErr != nil {
3527+
log.Errorf("parseEdgeNodeClusterConfig: invalid LB CIDR %s: %v", cidrs[0], lbErr)
3528+
continue
3529+
}
3530+
enClusterConfig.LBInterfaces = append(enClusterConfig.LBInterfaces,
3531+
types.LBInterfaceConfig{Interface: ifName, IPPrefix: cidrs[0]})
3532+
}
3533+
}
3534+
35163535
log.Functionf("parseEdgeNodeClusterConfig: ENCluster API, Config %+v, %v", zcfgCluster, enClusterConfig)
35173536
ctx.pubEdgeNodeClusterConfig.Publish("global", enClusterConfig)
35183537
}

pkg/pillar/cmd/zedkube/clusterstatus.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ func (z *zedkube) publishKubeConfigStatus() {
156156
log.Errorf("publishKubeConfigStatus: cluster token is not from configitme or encrypted")
157157
}
158158

159+
// Only the bootstrap node manages kube-vip load balancing; other nodes leave
160+
// LBInterfaces empty so cluster-init.sh does not apply kubevip.
161+
if z.clusterConfig.BootstrapNode {
162+
status.LBInterfaces = z.clusterConfig.LBInterfaces
163+
}
164+
159165
// publish the cluster status for the kube container
160166
log.Functionf("publishKubeConfigStatus: publishing")
161167
z.pubEdgeNodeClusterStatus.Publish("global", status)

pkg/pillar/cmd/zedkube/kubeservice.go

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,14 @@ func (z *zedkube) collectKubeSvcs() {
5858
// Continue anyway, we might have some services
5959
}
6060

61+
// Collect kube-vip load balancer pool status if kubevip is deployed
62+
lbPoolStatus := collectLBPoolStatus(clientset, serviceInfoList)
63+
6164
// Create new KubeUserServices struct with collected data
6265
newKubeUserServices := types.KubeUserServices{
63-
UserService: serviceInfoList,
64-
UserIngress: ingressInfoList,
66+
UserService: serviceInfoList,
67+
UserIngress: ingressInfoList,
68+
LBPoolStatus: lbPoolStatus,
6569
}
6670

6771
// Get previous published data to compare
@@ -323,6 +327,38 @@ func (z *zedkube) GetAllKubeIngresses(clientset *kubernetes.Clientset, serviceIn
323327
return ingressInfoList, nil
324328
}
325329

330+
// collectLBPoolStatus reads the kubevip ConfigMap from kube-system to get the configured
331+
// load balancer pool, and gathers IPs currently allocated to LoadBalancer-type services.
332+
// Returns nil if the kubevip ConfigMap does not exist (kubevip not yet deployed).
333+
func collectLBPoolStatus(clientset *kubernetes.Clientset, services []types.KubeServiceInfo) *types.KubeLBPoolStatus {
334+
cm, err := clientset.CoreV1().ConfigMaps("kube-system").Get(
335+
context.Background(), "kubevip", metav1.GetOptions{})
336+
if err != nil {
337+
// kubevip ConfigMap not present — not deployed yet
338+
return nil
339+
}
340+
341+
iface := cm.Data["interface-global"]
342+
cidr := cm.Data["cidr-global"]
343+
if iface == "" || cidr == "" {
344+
return nil
345+
}
346+
347+
var allocatedIPs []string
348+
for _, svc := range services {
349+
if svc.Type == corev1.ServiceTypeLoadBalancer && svc.LoadBalancerIP != "" {
350+
allocatedIPs = append(allocatedIPs, svc.LoadBalancerIP)
351+
}
352+
}
353+
sort.Strings(allocatedIPs)
354+
355+
return &types.KubeLBPoolStatus{
356+
Interface: iface,
357+
IPPrefix: cidr,
358+
AllocatedIPs: allocatedIPs,
359+
}
360+
}
361+
326362
// isDeviceInterfaceIP checks if the given IP is assigned to any of the device's network interfaces
327363
func (z *zedkube) isDeviceInterfaceIP(ipStr string) bool {
328364
ip := net.ParseIP(ipStr)

0 commit comments

Comments
 (0)