Skip to content

Commit 7630ae8

Browse files
oilbeaterclaude
authored andcommitted
fix: BGP speaker reads IPs from pod annotations instead of pod status (kubeovn#6344)
BGP Speaker's syncSubnetRoutes() previously read pod IPs from pod.Status.PodIPs, which is managed by kubelet and only contains primary CNI IPs. This caused incorrect behavior when Kube-OVN runs as a secondary CNI (non-primary mode) or when attachment networks are configured — the speaker would either advertise the wrong IPs or miss attachment network IPs entirely. Replace pod.Status.PodIPs with pod annotation-based IP discovery ({provider}.kubernetes.io/ip_address), which is the unified and reliable source for all Kube-OVN managed IPs across both primary and attachment networks. Signed-off-by: Mengxin Liu <liumengxinfly@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> (cherry picked from commit f3dca4b)
1 parent f6d2ea3 commit 7630ae8

File tree

2 files changed

+294
-34
lines changed

2 files changed

+294
-34
lines changed

pkg/speaker/subnet.go

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22
package speaker
33

44
import (
5+
"fmt"
56
"net/netip"
67
"strings"
78

9+
corev1 "k8s.io/api/core/v1"
810
"k8s.io/apimachinery/pkg/labels"
911
"k8s.io/klog/v2"
1012
"k8s.io/utils/set"
1113

14+
kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
1215
"github.com/kubeovn/kube-ovn/pkg/util"
1316
)
1417

@@ -53,12 +56,14 @@ func (c *Controller) syncSubnetRoutes() {
5356
}
5457
}
5558

56-
localSubnets := make(map[string]string, 2)
59+
subnetByName := make(map[string]*kubeovnv1.Subnet, len(subnets))
5760
for _, subnet := range subnets {
5861
if !subnet.Status.IsReady() || len(subnet.Annotations) == 0 {
5962
continue
6063
}
6164

65+
subnetByName[subnet.Name] = subnet
66+
6267
policy := subnet.Annotations[util.BgpAnnotation]
6368
switch policy {
6469
case "":
@@ -79,53 +84,68 @@ func (c *Controller) syncSubnetRoutes() {
7984
bgpExpected[afi].Insert(prefix.String())
8085
}
8186
}
82-
case announcePolicyLocal:
83-
localSubnets[subnet.Name] = subnet.Spec.CIDRBlock
8487
default:
85-
klog.Warningf("invalid subnet annotation %s=%s", util.BgpAnnotation, policy)
88+
if policy != announcePolicyLocal {
89+
klog.Warningf("invalid subnet annotation %s=%s", util.BgpAnnotation, policy)
90+
}
8691
}
8792
}
8893

94+
collectPodExpectedPrefixes(pods, subnetByName, c.config.NodeName, bgpExpected)
95+
96+
if err := c.reconcileRoutes(bgpExpected); err != nil {
97+
klog.Errorf("failed to reconcile routes: %s", err.Error())
98+
}
99+
}
100+
101+
// collectPodExpectedPrefixes iterates over pods and collects IPs that should be announced via BGP.
102+
// It reads IPs from pod annotations ({provider}.kubernetes.io/ip_address) instead of pod.Status.PodIPs,
103+
// so that attachment network IPs and non-primary CNI IPs are correctly announced.
104+
func collectPodExpectedPrefixes(pods []*corev1.Pod, subnetByName map[string]*kubeovnv1.Subnet, nodeName string, bgpExpected prefixMap) {
105+
ipAddrSuffix := fmt.Sprintf(util.IPAddressAnnotationTemplate, "")
89106
for _, pod := range pods {
90-
if pod.Status.PodIP == "" || len(pod.Annotations) == 0 || !isPodAlive(pod) {
107+
if len(pod.Annotations) == 0 || !isPodAlive(pod) {
91108
continue
92109
}
93110

94-
ips := make(map[string]string, 2)
95-
if policy := pod.Annotations[util.BgpAnnotation]; policy != "" {
111+
podBgpPolicy := pod.Annotations[util.BgpAnnotation]
112+
113+
for key, ipStr := range pod.Annotations {
114+
if ipStr == "" || !strings.HasSuffix(key, ipAddrSuffix) {
115+
continue
116+
}
117+
provider := strings.TrimSuffix(key, ipAddrSuffix)
118+
if provider == "" {
119+
continue
120+
}
121+
122+
lsKey := fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, provider)
123+
subnetName := pod.Annotations[lsKey]
124+
if subnetName == "" {
125+
continue
126+
}
127+
subnet := subnetByName[subnetName]
128+
if subnet == nil {
129+
continue
130+
}
131+
132+
policy := podBgpPolicy
133+
if policy == "" {
134+
policy = subnet.Annotations[util.BgpAnnotation]
135+
}
136+
96137
switch policy {
97-
case "true":
98-
fallthrough
99-
case announcePolicyCluster:
100-
for _, podIP := range pod.Status.PodIPs {
101-
ips[util.CheckProtocol(podIP.IP)] = podIP.IP
138+
case "true", announcePolicyCluster:
139+
for ip := range strings.SplitSeq(ipStr, ",") {
140+
addExpectedPrefix(strings.TrimSpace(ip), bgpExpected)
102141
}
103142
case announcePolicyLocal:
104-
if pod.Spec.NodeName == c.config.NodeName {
105-
for _, podIP := range pod.Status.PodIPs {
106-
ips[util.CheckProtocol(podIP.IP)] = podIP.IP
107-
}
108-
}
109-
default:
110-
klog.Warningf("invalid pod annotation %s=%s", util.BgpAnnotation, policy)
111-
}
112-
} else if pod.Spec.NodeName == c.config.NodeName {
113-
cidrBlock := localSubnets[pod.Annotations[util.LogicalSwitchAnnotation]]
114-
if cidrBlock != "" {
115-
for _, podIP := range pod.Status.PodIPs {
116-
if util.CIDRContainIP(cidrBlock, podIP.IP) {
117-
ips[util.CheckProtocol(podIP.IP)] = podIP.IP
143+
if pod.Spec.NodeName == nodeName {
144+
for ip := range strings.SplitSeq(ipStr, ",") {
145+
addExpectedPrefix(strings.TrimSpace(ip), bgpExpected)
118146
}
119147
}
120148
}
121149
}
122-
123-
for _, ip := range ips {
124-
addExpectedPrefix(ip, bgpExpected)
125-
}
126-
}
127-
128-
if err := c.reconcileRoutes(bgpExpected); err != nil {
129-
klog.Errorf("failed to reconcile routes: %s", err.Error())
130150
}
131151
}

pkg/speaker/subnet_test.go

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
package speaker
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
"github.com/osrg/gobgp/v4/api"
8+
"github.com/stretchr/testify/require"
9+
corev1 "k8s.io/api/core/v1"
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
12+
kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
13+
"github.com/kubeovn/kube-ovn/pkg/util"
14+
)
15+
16+
func TestCollectPodExpectedPrefixes(t *testing.T) {
17+
const (
18+
localNode = "node1"
19+
remoteNode = "node2"
20+
)
21+
22+
newSubnet := func(name, cidr, bgpPolicy string) *kubeovnv1.Subnet {
23+
s := &kubeovnv1.Subnet{
24+
ObjectMeta: metav1.ObjectMeta{Name: name},
25+
Spec: kubeovnv1.SubnetSpec{CIDRBlock: cidr},
26+
}
27+
s.Status.SetCondition(kubeovnv1.ConditionType(kubeovnv1.Ready), "Init", "")
28+
if bgpPolicy != "" {
29+
s.Annotations = map[string]string{util.BgpAnnotation: bgpPolicy}
30+
} else {
31+
s.Annotations = map[string]string{}
32+
}
33+
return s
34+
}
35+
36+
newPod := func(name, nodeName string, annotations map[string]string) *corev1.Pod {
37+
return &corev1.Pod{
38+
ObjectMeta: metav1.ObjectMeta{
39+
Name: name,
40+
Annotations: annotations,
41+
},
42+
Spec: corev1.PodSpec{
43+
NodeName: nodeName,
44+
},
45+
Status: corev1.PodStatus{
46+
Phase: corev1.PodRunning,
47+
},
48+
}
49+
}
50+
51+
tests := []struct {
52+
name string
53+
subnets []*kubeovnv1.Subnet
54+
pods []*corev1.Pod
55+
expectedV4 []string
56+
expectedV6 []string
57+
}{
58+
{
59+
name: "primary network with pod bgp=true announces IP",
60+
subnets: []*kubeovnv1.Subnet{
61+
newSubnet("ovn-default", "10.16.0.0/16", ""),
62+
},
63+
pods: []*corev1.Pod{
64+
newPod("pod1", remoteNode, map[string]string{
65+
util.BgpAnnotation: "true",
66+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
67+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
68+
}),
69+
},
70+
expectedV4: []string{"10.16.0.5/32"},
71+
},
72+
{
73+
name: "attachment network with subnet bgp=cluster announces IP",
74+
subnets: []*kubeovnv1.Subnet{
75+
newSubnet("attach-subnet", "192.168.1.0/24", "cluster"),
76+
},
77+
pods: []*corev1.Pod{
78+
newPod("pod1", remoteNode, map[string]string{
79+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
80+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
81+
}),
82+
},
83+
expectedV4: []string{"192.168.1.10/32"},
84+
},
85+
{
86+
name: "attachment network with subnet bgp=local on local node announces IP",
87+
subnets: []*kubeovnv1.Subnet{
88+
newSubnet("attach-subnet", "192.168.1.0/24", "local"),
89+
},
90+
pods: []*corev1.Pod{
91+
newPod("pod1", localNode, map[string]string{
92+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
93+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
94+
}),
95+
},
96+
expectedV4: []string{"192.168.1.10/32"},
97+
},
98+
{
99+
name: "attachment network with subnet bgp=local on remote node does not announce",
100+
subnets: []*kubeovnv1.Subnet{
101+
newSubnet("attach-subnet", "192.168.1.0/24", "local"),
102+
},
103+
pods: []*corev1.Pod{
104+
newPod("pod1", remoteNode, map[string]string{
105+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
106+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
107+
}),
108+
},
109+
},
110+
{
111+
name: "no bgp annotation on pod or subnet does not announce",
112+
subnets: []*kubeovnv1.Subnet{
113+
newSubnet("ovn-default", "10.16.0.0/16", ""),
114+
},
115+
pods: []*corev1.Pod{
116+
newPod("pod1", localNode, map[string]string{
117+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
118+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
119+
}),
120+
},
121+
},
122+
{
123+
name: "non-primary CNI mode: only attachment annotations, no primary",
124+
subnets: []*kubeovnv1.Subnet{
125+
newSubnet("attach-subnet", "192.168.1.0/24", "cluster"),
126+
},
127+
pods: []*corev1.Pod{
128+
newPod("pod1", localNode, map[string]string{
129+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.ns.ovn"): "192.168.1.20",
130+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.ns.ovn"): "attach-subnet",
131+
}),
132+
},
133+
expectedV4: []string{"192.168.1.20/32"},
134+
},
135+
{
136+
name: "pod bgp annotation overrides subnet annotation",
137+
subnets: []*kubeovnv1.Subnet{
138+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
139+
},
140+
pods: []*corev1.Pod{
141+
newPod("pod1", remoteNode, map[string]string{
142+
util.BgpAnnotation: "local",
143+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
144+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
145+
}),
146+
},
147+
// Pod says "local" but pod is on remote node, so nothing announced
148+
},
149+
{
150+
name: "dual-stack pod with bgp=cluster",
151+
subnets: []*kubeovnv1.Subnet{
152+
newSubnet("ovn-default", "10.16.0.0/16,fd00::/112", "cluster"),
153+
},
154+
pods: []*corev1.Pod{
155+
newPod("pod1", localNode, map[string]string{
156+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5,fd00::5",
157+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
158+
}),
159+
},
160+
expectedV4: []string{"10.16.0.5/32"},
161+
expectedV6: []string{"fd00::5/128"},
162+
},
163+
{
164+
name: "multiple networks on same pod",
165+
subnets: []*kubeovnv1.Subnet{
166+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
167+
newSubnet("attach-subnet", "192.168.1.0/24", "local"),
168+
},
169+
pods: []*corev1.Pod{
170+
newPod("pod1", localNode, map[string]string{
171+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
172+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
173+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
174+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
175+
}),
176+
},
177+
expectedV4: []string{"10.16.0.5/32", "192.168.1.10/32"},
178+
},
179+
{
180+
name: "dead pod is skipped",
181+
subnets: []*kubeovnv1.Subnet{
182+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
183+
},
184+
pods: []*corev1.Pod{
185+
{
186+
ObjectMeta: metav1.ObjectMeta{
187+
Name: "dead-pod",
188+
Annotations: map[string]string{
189+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
190+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
191+
},
192+
},
193+
Spec: corev1.PodSpec{
194+
NodeName: localNode,
195+
RestartPolicy: corev1.RestartPolicyNever,
196+
},
197+
Status: corev1.PodStatus{
198+
Phase: corev1.PodFailed,
199+
},
200+
},
201+
},
202+
},
203+
{
204+
name: "pod with no annotations is skipped",
205+
subnets: []*kubeovnv1.Subnet{
206+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
207+
},
208+
pods: []*corev1.Pod{
209+
newPod("pod1", localNode, nil),
210+
},
211+
},
212+
}
213+
214+
for _, tt := range tests {
215+
t.Run(tt.name, func(t *testing.T) {
216+
subnetByName := make(map[string]*kubeovnv1.Subnet, len(tt.subnets))
217+
for _, s := range tt.subnets {
218+
subnetByName[s.Name] = s
219+
}
220+
221+
bgpExpected := make(prefixMap)
222+
collectPodExpectedPrefixes(tt.pods, subnetByName, localNode, bgpExpected)
223+
224+
var gotV4, gotV6 []string
225+
for afi, prefixes := range bgpExpected {
226+
for p := range prefixes {
227+
switch afi {
228+
case api.Family_AFI_IP:
229+
gotV4 = append(gotV4, p)
230+
case api.Family_AFI_IP6:
231+
gotV6 = append(gotV6, p)
232+
}
233+
}
234+
}
235+
236+
require.ElementsMatch(t, tt.expectedV4, gotV4, "IPv4 prefixes mismatch")
237+
require.ElementsMatch(t, tt.expectedV6, gotV6, "IPv6 prefixes mismatch")
238+
})
239+
}
240+
}

0 commit comments

Comments
 (0)