Skip to content

Commit c333b1c

Browse files
oilbeaterclaude
andcommitted
fix: BGP speaker reads IPs from pod annotations instead of pod status
BGP Speaker's syncSubnetRoutes() previously read pod IPs from pod.Status.PodIPs, which is managed by kubelet and only contains primary CNI IPs. This caused incorrect behavior when Kube-OVN runs as a secondary CNI (non-primary mode) or when attachment networks are configured — the speaker would either advertise the wrong IPs or miss attachment network IPs entirely. Replace pod.Status.PodIPs with pod annotation-based IP discovery ({provider}.kubernetes.io/ip_address), which is the unified and reliable source for all Kube-OVN managed IPs across both primary and attachment networks. Signed-off-by: Mengxin Liu <liumengxinfly@gmail.com> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Mengxin Liu <liumengxinfly@gmail.com>
1 parent e6a657c commit c333b1c

File tree

2 files changed

+294
-34
lines changed

2 files changed

+294
-34
lines changed

pkg/speaker/subnet.go

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22
package speaker
33

44
import (
5+
"fmt"
56
"net/netip"
67
"strings"
78

9+
corev1 "k8s.io/api/core/v1"
810
"k8s.io/apimachinery/pkg/labels"
911
"k8s.io/klog/v2"
1012
"k8s.io/utils/set"
1113

14+
kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
1215
"github.com/kubeovn/kube-ovn/pkg/util"
1316
)
1417

@@ -53,12 +56,14 @@ func (c *Controller) syncSubnetRoutes() {
5356
}
5457
}
5558

56-
localSubnets := make(map[string]string, 2)
59+
subnetByName := make(map[string]*kubeovnv1.Subnet, len(subnets))
5760
for _, subnet := range subnets {
5861
if !subnet.Status.IsReady() || len(subnet.Annotations) == 0 {
5962
continue
6063
}
6164

65+
subnetByName[subnet.Name] = subnet
66+
6267
policy := subnet.Annotations[util.BgpAnnotation]
6368
switch policy {
6469
case "":
@@ -79,53 +84,68 @@ func (c *Controller) syncSubnetRoutes() {
7984
bgpExpected[afi].Insert(prefix.String())
8085
}
8186
}
82-
case announcePolicyLocal:
83-
localSubnets[subnet.Name] = subnet.Spec.CIDRBlock
8487
default:
85-
klog.Warningf("invalid subnet annotation %s=%s", util.BgpAnnotation, policy)
88+
if policy != announcePolicyLocal {
89+
klog.Warningf("invalid subnet annotation %s=%s", util.BgpAnnotation, policy)
90+
}
8691
}
8792
}
8893

94+
collectPodExpectedPrefixes(pods, subnetByName, c.config.NodeName, bgpExpected)
95+
96+
if err := c.reconcileRoutes(bgpExpected); err != nil {
97+
klog.Errorf("failed to reconcile routes: %s", err.Error())
98+
}
99+
}
100+
101+
// collectPodExpectedPrefixes iterates over pods and collects IPs that should be announced via BGP.
102+
// It reads IPs from pod annotations ({provider}.kubernetes.io/ip_address) instead of pod.Status.PodIPs,
103+
// so that attachment network IPs and non-primary CNI IPs are correctly announced.
104+
func collectPodExpectedPrefixes(pods []*corev1.Pod, subnetByName map[string]*kubeovnv1.Subnet, nodeName string, bgpExpected prefixMap) {
105+
ipAddrSuffix := fmt.Sprintf(util.IPAddressAnnotationTemplate, "")
89106
for _, pod := range pods {
90-
if pod.Status.PodIP == "" || len(pod.Annotations) == 0 || !isPodAlive(pod) {
107+
if len(pod.Annotations) == 0 || !isPodAlive(pod) {
91108
continue
92109
}
93110

94-
ips := make(map[string]string, 2)
95-
if policy := pod.Annotations[util.BgpAnnotation]; policy != "" {
111+
podBgpPolicy := pod.Annotations[util.BgpAnnotation]
112+
113+
for key, ipStr := range pod.Annotations {
114+
if ipStr == "" || !strings.HasSuffix(key, ipAddrSuffix) {
115+
continue
116+
}
117+
provider := strings.TrimSuffix(key, ipAddrSuffix)
118+
if provider == "" {
119+
continue
120+
}
121+
122+
lsKey := fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, provider)
123+
subnetName := pod.Annotations[lsKey]
124+
if subnetName == "" {
125+
continue
126+
}
127+
subnet := subnetByName[subnetName]
128+
if subnet == nil {
129+
continue
130+
}
131+
132+
policy := podBgpPolicy
133+
if policy == "" {
134+
policy = subnet.Annotations[util.BgpAnnotation]
135+
}
136+
96137
switch policy {
97-
case "true":
98-
fallthrough
99-
case announcePolicyCluster:
100-
for _, podIP := range pod.Status.PodIPs {
101-
ips[util.CheckProtocol(podIP.IP)] = podIP.IP
138+
case "true", announcePolicyCluster:
139+
for ip := range strings.SplitSeq(ipStr, ",") {
140+
addExpectedPrefix(strings.TrimSpace(ip), bgpExpected)
102141
}
103142
case announcePolicyLocal:
104-
if pod.Spec.NodeName == c.config.NodeName {
105-
for _, podIP := range pod.Status.PodIPs {
106-
ips[util.CheckProtocol(podIP.IP)] = podIP.IP
107-
}
108-
}
109-
default:
110-
klog.Warningf("invalid pod annotation %s=%s", util.BgpAnnotation, policy)
111-
}
112-
} else if pod.Spec.NodeName == c.config.NodeName {
113-
cidrBlock := localSubnets[pod.Annotations[util.LogicalSwitchAnnotation]]
114-
if cidrBlock != "" {
115-
for _, podIP := range pod.Status.PodIPs {
116-
if util.CIDRContainIP(cidrBlock, podIP.IP) {
117-
ips[util.CheckProtocol(podIP.IP)] = podIP.IP
143+
if pod.Spec.NodeName == nodeName {
144+
for ip := range strings.SplitSeq(ipStr, ",") {
145+
addExpectedPrefix(strings.TrimSpace(ip), bgpExpected)
118146
}
119147
}
120148
}
121149
}
122-
123-
for _, ip := range ips {
124-
addExpectedPrefix(ip, bgpExpected)
125-
}
126-
}
127-
128-
if err := c.reconcileRoutes(bgpExpected); err != nil {
129-
klog.Errorf("failed to reconcile routes: %s", err.Error())
130150
}
131151
}

pkg/speaker/subnet_test.go

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
package speaker
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
"github.com/osrg/gobgp/v4/api"
8+
"github.com/stretchr/testify/require"
9+
corev1 "k8s.io/api/core/v1"
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
12+
kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
13+
"github.com/kubeovn/kube-ovn/pkg/util"
14+
)
15+
16+
func TestCollectPodExpectedPrefixes(t *testing.T) {
17+
const (
18+
localNode = "node1"
19+
remoteNode = "node2"
20+
)
21+
22+
newSubnet := func(name, cidr, bgpPolicy string) *kubeovnv1.Subnet {
23+
s := &kubeovnv1.Subnet{
24+
ObjectMeta: metav1.ObjectMeta{Name: name},
25+
Spec: kubeovnv1.SubnetSpec{CIDRBlock: cidr},
26+
}
27+
s.Status.SetCondition(kubeovnv1.ConditionType(kubeovnv1.Ready), "Init", "")
28+
if bgpPolicy != "" {
29+
s.Annotations = map[string]string{util.BgpAnnotation: bgpPolicy}
30+
} else {
31+
s.Annotations = map[string]string{}
32+
}
33+
return s
34+
}
35+
36+
newPod := func(name, nodeName string, annotations map[string]string) *corev1.Pod {
37+
return &corev1.Pod{
38+
ObjectMeta: metav1.ObjectMeta{
39+
Name: name,
40+
Annotations: annotations,
41+
},
42+
Spec: corev1.PodSpec{
43+
NodeName: nodeName,
44+
},
45+
Status: corev1.PodStatus{
46+
Phase: corev1.PodRunning,
47+
},
48+
}
49+
}
50+
51+
tests := []struct {
52+
name string
53+
subnets []*kubeovnv1.Subnet
54+
pods []*corev1.Pod
55+
expectedV4 []string
56+
expectedV6 []string
57+
}{
58+
{
59+
name: "primary network with pod bgp=true announces IP",
60+
subnets: []*kubeovnv1.Subnet{
61+
newSubnet("ovn-default", "10.16.0.0/16", ""),
62+
},
63+
pods: []*corev1.Pod{
64+
newPod("pod1", remoteNode, map[string]string{
65+
util.BgpAnnotation: "true",
66+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
67+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
68+
}),
69+
},
70+
expectedV4: []string{"10.16.0.5/32"},
71+
},
72+
{
73+
name: "attachment network with subnet bgp=cluster announces IP",
74+
subnets: []*kubeovnv1.Subnet{
75+
newSubnet("attach-subnet", "192.168.1.0/24", "cluster"),
76+
},
77+
pods: []*corev1.Pod{
78+
newPod("pod1", remoteNode, map[string]string{
79+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
80+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
81+
}),
82+
},
83+
expectedV4: []string{"192.168.1.10/32"},
84+
},
85+
{
86+
name: "attachment network with subnet bgp=local on local node announces IP",
87+
subnets: []*kubeovnv1.Subnet{
88+
newSubnet("attach-subnet", "192.168.1.0/24", "local"),
89+
},
90+
pods: []*corev1.Pod{
91+
newPod("pod1", localNode, map[string]string{
92+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
93+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
94+
}),
95+
},
96+
expectedV4: []string{"192.168.1.10/32"},
97+
},
98+
{
99+
name: "attachment network with subnet bgp=local on remote node does not announce",
100+
subnets: []*kubeovnv1.Subnet{
101+
newSubnet("attach-subnet", "192.168.1.0/24", "local"),
102+
},
103+
pods: []*corev1.Pod{
104+
newPod("pod1", remoteNode, map[string]string{
105+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
106+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
107+
}),
108+
},
109+
},
110+
{
111+
name: "no bgp annotation on pod or subnet does not announce",
112+
subnets: []*kubeovnv1.Subnet{
113+
newSubnet("ovn-default", "10.16.0.0/16", ""),
114+
},
115+
pods: []*corev1.Pod{
116+
newPod("pod1", localNode, map[string]string{
117+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
118+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
119+
}),
120+
},
121+
},
122+
{
123+
name: "non-primary CNI mode: only attachment annotations, no primary",
124+
subnets: []*kubeovnv1.Subnet{
125+
newSubnet("attach-subnet", "192.168.1.0/24", "cluster"),
126+
},
127+
pods: []*corev1.Pod{
128+
newPod("pod1", localNode, map[string]string{
129+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.ns.ovn"): "192.168.1.20",
130+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.ns.ovn"): "attach-subnet",
131+
}),
132+
},
133+
expectedV4: []string{"192.168.1.20/32"},
134+
},
135+
{
136+
name: "pod bgp annotation overrides subnet annotation",
137+
subnets: []*kubeovnv1.Subnet{
138+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
139+
},
140+
pods: []*corev1.Pod{
141+
newPod("pod1", remoteNode, map[string]string{
142+
util.BgpAnnotation: "local",
143+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
144+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
145+
}),
146+
},
147+
// Pod says "local" but pod is on remote node, so nothing announced
148+
},
149+
{
150+
name: "dual-stack pod with bgp=cluster",
151+
subnets: []*kubeovnv1.Subnet{
152+
newSubnet("ovn-default", "10.16.0.0/16,fd00::/112", "cluster"),
153+
},
154+
pods: []*corev1.Pod{
155+
newPod("pod1", localNode, map[string]string{
156+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5,fd00::5",
157+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
158+
}),
159+
},
160+
expectedV4: []string{"10.16.0.5/32"},
161+
expectedV6: []string{"fd00::5/128"},
162+
},
163+
{
164+
name: "multiple networks on same pod",
165+
subnets: []*kubeovnv1.Subnet{
166+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
167+
newSubnet("attach-subnet", "192.168.1.0/24", "local"),
168+
},
169+
pods: []*corev1.Pod{
170+
newPod("pod1", localNode, map[string]string{
171+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
172+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
173+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "net1.default.ovn"): "192.168.1.10",
174+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "net1.default.ovn"): "attach-subnet",
175+
}),
176+
},
177+
expectedV4: []string{"10.16.0.5/32", "192.168.1.10/32"},
178+
},
179+
{
180+
name: "dead pod is skipped",
181+
subnets: []*kubeovnv1.Subnet{
182+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
183+
},
184+
pods: []*corev1.Pod{
185+
{
186+
ObjectMeta: metav1.ObjectMeta{
187+
Name: "dead-pod",
188+
Annotations: map[string]string{
189+
fmt.Sprintf(util.IPAddressAnnotationTemplate, "ovn"): "10.16.0.5",
190+
fmt.Sprintf(util.LogicalSwitchAnnotationTemplate, "ovn"): "ovn-default",
191+
},
192+
},
193+
Spec: corev1.PodSpec{
194+
NodeName: localNode,
195+
RestartPolicy: corev1.RestartPolicyNever,
196+
},
197+
Status: corev1.PodStatus{
198+
Phase: corev1.PodFailed,
199+
},
200+
},
201+
},
202+
},
203+
{
204+
name: "pod with no annotations is skipped",
205+
subnets: []*kubeovnv1.Subnet{
206+
newSubnet("ovn-default", "10.16.0.0/16", "cluster"),
207+
},
208+
pods: []*corev1.Pod{
209+
newPod("pod1", localNode, nil),
210+
},
211+
},
212+
}
213+
214+
for _, tt := range tests {
215+
t.Run(tt.name, func(t *testing.T) {
216+
subnetByName := make(map[string]*kubeovnv1.Subnet, len(tt.subnets))
217+
for _, s := range tt.subnets {
218+
subnetByName[s.Name] = s
219+
}
220+
221+
bgpExpected := make(prefixMap)
222+
collectPodExpectedPrefixes(tt.pods, subnetByName, localNode, bgpExpected)
223+
224+
var gotV4, gotV6 []string
225+
for afi, prefixes := range bgpExpected {
226+
for p := range prefixes {
227+
switch afi {
228+
case api.Family_AFI_IP:
229+
gotV4 = append(gotV4, p)
230+
case api.Family_AFI_IP6:
231+
gotV6 = append(gotV6, p)
232+
}
233+
}
234+
}
235+
236+
require.ElementsMatch(t, tt.expectedV4, gotV4, "IPv4 prefixes mismatch")
237+
require.ElementsMatch(t, tt.expectedV6, gotV6, "IPv6 prefixes mismatch")
238+
})
239+
}
240+
}

0 commit comments

Comments
 (0)