Skip to content

Commit fe35e45

Browse files
committed
revert commit 70df1aba7dd36c38905dc643ca709d8e85e541a7
Signed-off-by: clyi <clyi@alauda.io>
1 parent 3a0e1ec commit fe35e45

File tree

2 files changed

+331
-1
lines changed

2 files changed

+331
-1
lines changed

dist/images/Dockerfile.base

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ ADD patches/e4e6ea9c5f4ba080b719924e470daa8094ff38a7.patch $SRC_DIR
6363
ADD patches/e7d3ba53cdcbc524bb29c54ddb07b83cc4258ed7.patch $SRC_DIR
6464
ADD patches/9286e1fd578fdb8f565a0f4aa9066b538295e1ac.patch $SRC_DIR
6565
ADD patches/737d9f932edada5a91f315b5f382daada8dee952.patch $SRC_DIR
66+
ADD patches/revert_70df1aba7dd36c38905dc643ca709d8e85e541a7.patch $SRC_DIR
6667

6768
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
6869
--mount=type=cache,target=/var/lib/apt,sharing=locked \
@@ -125,7 +126,9 @@ RUN cd /usr/src/ && git clone -b branch-25.03 --depth=1 https://github.com/ovn-o
125126
# northd: skip arp/nd request for lrp addresses from localnet ports
126127
git apply $SRC_DIR/477695a010affe56efdd66b60510fa612f8704c1.patch && \
127128
# add skip conntrack ipcidrs support
128-
git apply $SRC_DIR/737d9f932edada5a91f315b5f382daada8dee952.patch
129+
git apply $SRC_DIR/737d9f932edada5a91f315b5f382daada8dee952.patch && \
130+
# revert lport_pb_is_chassis_resident changes to fix BFD flap issues
131+
git apply $SRC_DIR/revert_70df1aba7dd36c38905dc643ca709d8e85e541a7.patch
129132

130133
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
131134
--mount=type=cache,target=/var/lib/apt,sharing=locked \
Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
commit 70df1aba7dd36c38905dc643ca709d8e85e541a7
2+
Author: Ales Musil <amusil@redhat.com>
3+
Date: Wed Jun 18 07:33:10 2025 +0200
4+
5+
controller: Modify the behavior of lport_pb_is_chassis_resident.
6+
7+
The function behavior would be unexpected in cases when there is
8+
BFD flap between two gateway chassis. This would result in two
9+
chassis racing to claim the same port binding. This leads to
10+
multiple problems like unsent gARPs. The gARP problem on its
11+
own was solved [0], but unfortunately reintroduced [1] in
12+
slightly different form.
13+
14+
The conclusion from the investigation is that the function in
15+
the current form is easy to misuse. To prevent that use
16+
only SB state to determine which chassis has actually claimed
17+
the port binding. This is the desired behavior in cases where
18+
this function was used previously.
19+
20+
The code that determines which chassis should claim port binding
21+
based on BFD status remains unchanged, thus the behavior during
22+
failover should be the same with minimizing the impact for BFD
23+
flapping cases.
24+
25+
[0] 289ec19b01ad ("pinctrl: Fix missing garp.")
26+
[1] 05527bd6ccdb ("controller: Extract garp_rarp to engine node.").
27+
28+
Acked-by: Dumitru Ceara <dceara@redhat.com>
29+
Signed-off-by: Ales Musil <amusil@redhat.com>
30+
(cherry picked from commit aa088003406662340df41aaac6e68e5d96272fa6)
31+
32+
diff --git b/controller/lport.c a/controller/lport.c
33+
index 077e66acf..92de375b5 100644
34+
--- b/controller/lport.c
35+
+++ a/controller/lport.c
36+
@@ -65,32 +65,35 @@ lport_lookup_by_key(struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
37+
38+
bool
39+
lport_pb_is_chassis_resident(const struct sbrec_chassis *chassis,
40+
+ const struct sset *active_tunnels,
41+
const struct sbrec_port_binding *pb)
42+
{
43+
if (!pb || !pb->chassis) {
44+
return false;
45+
}
46+
-
47+
- /* Note: we rely on SB to provide information about who owns the port
48+
- * binding. In particular, for chassisredirect ports, this avoids issues
49+
- * when the underlying BFD state changes are only detected by some of the
50+
- * chassis in the associated HA_Chassis_Group. */
51+
- return pb->chassis == chassis;
52+
+ if (strcmp(pb->type, "chassisredirect")) {
53+
+ return pb->chassis == chassis;
54+
+ } else {
55+
+ return ha_chassis_group_is_active(pb->ha_chassis_group,
56+
+ active_tunnels, chassis);
57+
+ }
58+
}
59+
60+
bool
61+
lport_is_chassis_resident(struct ovsdb_idl_index *sbrec_port_binding_by_name,
62+
const struct sbrec_chassis *chassis,
63+
+ const struct sset *active_tunnels,
64+
const char *port_name)
65+
{
66+
const struct sbrec_port_binding *pb
67+
= lport_lookup_by_name(sbrec_port_binding_by_name, port_name);
68+
- return lport_pb_is_chassis_resident(chassis, pb);
69+
+ return lport_pb_is_chassis_resident(chassis, active_tunnels, pb);
70+
}
71+
72+
bool
73+
lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
74+
const struct sbrec_chassis *chassis,
75+
+ const struct sset *active_tunnels,
76+
const char *port_name)
77+
{
78+
const struct sbrec_port_binding *pb = lport_lookup_by_name(
79+
@@ -100,14 +103,14 @@ lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
80+
return false;
81+
}
82+
83+
- if (lport_pb_is_chassis_resident(chassis, pb)) {
84+
+ if (lport_pb_is_chassis_resident(chassis, active_tunnels, pb)) {
85+
return true;
86+
}
87+
88+
const struct sbrec_port_binding *cr_pb =
89+
lport_get_cr_port(sbrec_port_binding_by_name, pb, NULL);
90+
91+
- return lport_pb_is_chassis_resident(chassis, cr_pb);
92+
+ return lport_pb_is_chassis_resident(chassis, active_tunnels, cr_pb);
93+
}
94+
95+
const struct sbrec_port_binding *
96+
diff --git b/controller/lport.h a/controller/lport.h
97+
index 6d48301d2..8b1809a27 100644
98+
--- b/controller/lport.h
99+
+++ a/controller/lport.h
100+
@@ -62,12 +62,15 @@ const struct sbrec_multicast_group *mcgroup_lookup_by_dp_name(
101+
bool
102+
lport_is_chassis_resident(struct ovsdb_idl_index *sbrec_port_binding_by_name,
103+
const struct sbrec_chassis *chassis,
104+
+ const struct sset *active_tunnels,
105+
const char *port_name);
106+
bool lport_pb_is_chassis_resident(const struct sbrec_chassis *chassis,
107+
+ const struct sset *active_tunnels,
108+
const struct sbrec_port_binding *pb);
109+
110+
bool lport_is_local(struct ovsdb_idl_index *sbrec_port_binding_by_name,
111+
const struct sbrec_chassis *chassis,
112+
+ const struct sset *active_tunnels,
113+
const char *port_name);
114+
const struct sbrec_port_binding *lport_get_peer(
115+
const struct sbrec_port_binding *,
116+
diff --git b/controller/ovn-controller.c a/controller/ovn-controller.c
117+
index 8d02f4c28..4be51102b 100644
118+
--- b/controller/ovn-controller.c
119+
+++ a/controller/ovn-controller.c
120+
@@ -5027,6 +5027,7 @@ en_route_run(struct engine_node *node, void *data)
121+
.sbrec_port_binding_by_name = sbrec_port_binding_by_name,
122+
.chassis = chassis,
123+
.dynamic_routing_port_mapping = dynamic_routing_port_mapping,
124+
+ .active_tunnels = &rt_data->active_tunnels,
125+
.local_datapaths = &rt_data->local_datapaths,
126+
.local_lports = &rt_data->local_lports,
127+
.local_bindings = &rt_data->lbinding_data.bindings,
128+
@@ -5129,6 +5130,7 @@ route_runtime_data_handler(struct engine_node *node, void *data)
129+
struct tracked_lport *lport = shash_node->data;
130+
131+
if (route_exchange_find_port(sbrec_port_binding_by_name, chassis,
132+
+ &rt_data->active_tunnels,
133+
lport->pb, NULL)) {
134+
/* XXX: Until we get I-P support for route exchange we need to
135+
* request recompute. */
136+
@@ -5186,6 +5188,8 @@ route_sb_port_binding_data_handler(struct engine_node *node, void *data)
137+
engine_ovsdb_node_get_index(
138+
engine_get_input("SB_port_binding", node),
139+
"name");
140+
+ struct ed_type_runtime_data *rt_data =
141+
+ engine_get_input_data("runtime_data", node);
142+
143+
144+
/* There are the following cases where we need to handle updates to the
145+
@@ -5209,7 +5213,8 @@ route_sb_port_binding_data_handler(struct engine_node *node, void *data)
146+
}
147+
148+
if (route_exchange_find_port(sbrec_port_binding_by_name, chassis,
149+
- sbrec_pb, NULL)) {
150+
+ &rt_data->active_tunnels, sbrec_pb,
151+
+ NULL)) {
152+
/* XXX: Until we get I-P support for route exchange we need to
153+
* request recompute. */
154+
return false;
155+
@@ -6442,6 +6447,7 @@ main(int argc, char *argv[])
156+
ovnsb_idl_loop.idl),
157+
br_int, chassis,
158+
&runtime_data->local_datapaths,
159+
+ &runtime_data->active_tunnels,
160+
&runtime_data->local_active_ports_ipv6_pd,
161+
&runtime_data->local_active_ports_ras,
162+
ovsrec_open_vswitch_table_get(
163+
diff --git b/controller/physical.c a/controller/physical.c
164+
index d068d65c1..d0d9d9006 100644
165+
--- b/controller/physical.c
166+
+++ a/controller/physical.c
167+
@@ -871,7 +871,8 @@ put_replace_router_port_mac_flows(const struct physical_ctx *ctx,
168+
struct ofpact_mac *replace_mac;
169+
char *cr_peer_name = xasprintf("cr-%s", rport_binding->logical_port);
170+
if (lport_is_chassis_resident(ctx->sbrec_port_binding_by_name,
171+
- ctx->chassis, cr_peer_name)) {
172+
+ ctx->chassis, ctx->active_tunnels,
173+
+ cr_peer_name)) {
174+
/* If a router port's chassisredirect port is
175+
* resident on this chassis, then we need not do mac replace. */
176+
free(cr_peer_name);
177+
diff --git b/controller/pinctrl.c a/controller/pinctrl.c
178+
index 893a2c440..748d4b442 100644
179+
--- b/controller/pinctrl.c
180+
+++ a/controller/pinctrl.c
181+
@@ -377,7 +377,8 @@ pinctrl_handle_bfd_msg(struct rconn *swconn, const struct flow *ip_flow,
182+
static void bfd_monitor_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
183+
const struct sbrec_bfd_table *bfd_table,
184+
struct ovsdb_idl_index *sbrec_port_binding_by_name,
185+
- const struct sbrec_chassis *chassis)
186+
+ const struct sbrec_chassis *chassis,
187+
+ const struct sset *active_tunnels)
188+
OVS_REQUIRES(pinctrl_mutex);
189+
static void init_fdb_entries(void);
190+
static void destroy_fdb_entries(void);
191+
@@ -1443,6 +1444,7 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn *ovnsb_idl_txn,
192+
struct ovsdb_idl_index *sbrec_port_binding_by_name,
193+
const struct shash *local_active_ports_ipv6_pd,
194+
const struct sbrec_chassis *chassis,
195+
+ const struct sset *active_tunnels,
196+
const struct hmap *local_datapaths)
197+
OVS_REQUIRES(pinctrl_mutex)
198+
{
199+
@@ -1471,8 +1473,9 @@ prepare_ipv6_prefixd(struct ovsdb_idl_txn *ovnsb_idl_txn,
200+
}
201+
202+
char *redirect_name = xasprintf("cr-%s", pb->logical_port);
203+
- bool resident = lport_is_chassis_resident(sbrec_port_binding_by_name,
204+
- chassis, redirect_name);
205+
+ bool resident = lport_is_chassis_resident(
206+
+ sbrec_port_binding_by_name, chassis, active_tunnels,
207+
+ redirect_name);
208+
free(redirect_name);
209+
if ((strcmp(pb->type, "l3gateway") || pb->chassis != chassis) &&
210+
!resident) {
211+
@@ -4086,6 +4089,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
212+
const struct ovsrec_bridge *br_int,
213+
const struct sbrec_chassis *chassis,
214+
const struct hmap *local_datapaths,
215+
+ const struct sset *active_tunnels,
216+
const struct shash *local_active_ports_ipv6_pd,
217+
const struct shash *local_active_ports_ras,
218+
const struct ovsrec_open_vswitch_table *ovs_table,
219+
@@ -4106,7 +4110,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
220+
prepare_ipv6_ras(local_active_ports_ras, sbrec_port_binding_by_name);
221+
prepare_ipv6_prefixd(ovnsb_idl_txn, sbrec_port_binding_by_name,
222+
local_active_ports_ipv6_pd, chassis,
223+
- local_datapaths);
224+
+ active_tunnels, local_datapaths);
225+
controller_event_run(ovnsb_idl_txn, ce_table, chassis);
226+
ip_mcast_sync(ovnsb_idl_txn, chassis, local_datapaths,
227+
sbrec_datapath_binding_by_key,
228+
@@ -4121,7 +4125,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
229+
sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
230+
chassis);
231+
bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
232+
- chassis);
233+
+ chassis, active_tunnels);
234+
run_put_fdbs(ovnsb_idl_txn, sbrec_port_binding_by_key,
235+
sbrec_datapath_binding_by_key, sbrec_fdb_by_dp_key_mac,
236+
cur_cfg);
237+
@@ -8369,7 +8373,8 @@ static void
238+
bfd_monitor_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
239+
const struct sbrec_bfd_table *bfd_table,
240+
struct ovsdb_idl_index *sbrec_port_binding_by_name,
241+
- const struct sbrec_chassis *chassis)
242+
+ const struct sbrec_chassis *chassis,
243+
+ const struct sset *active_tunnels)
244+
OVS_REQUIRES(pinctrl_mutex)
245+
{
246+
struct bfd_entry *entry;
247+
@@ -8401,8 +8406,9 @@ bfd_monitor_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
248+
}
249+
250+
char *redirect_name = xasprintf("cr-%s", pb->logical_port);
251+
- bool resident = lport_is_chassis_resident(sbrec_port_binding_by_name,
252+
- chassis, redirect_name);
253+
+ bool resident = lport_is_chassis_resident(
254+
+ sbrec_port_binding_by_name, chassis, active_tunnels,
255+
+ redirect_name);
256+
free(redirect_name);
257+
if ((strcmp(pb->type, "l3gateway") || pb->chassis != chassis) &&
258+
!resident) {
259+
diff --git b/controller/pinctrl.h a/controller/pinctrl.h
260+
index 77df0abfb..f2aed9b5e 100644
261+
--- b/controller/pinctrl.h
262+
+++ a/controller/pinctrl.h
263+
@@ -59,6 +59,7 @@ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
264+
const struct sbrec_ecmp_nexthop_table *,
265+
const struct ovsrec_bridge *, const struct sbrec_chassis *,
266+
const struct hmap *local_datapaths,
267+
+ const struct sset *active_tunnels,
268+
const struct shash *local_active_ports_ipv6_pd,
269+
const struct shash *local_active_ports_ras,
270+
const struct ovsrec_open_vswitch_table *ovs_table,
271+
diff --git b/controller/route.c a/controller/route.c
272+
index e6b34855f..9a1858a08 100644
273+
--- b/controller/route.c
274+
+++ a/controller/route.c
275+
@@ -52,6 +52,7 @@ advertise_route_hash(const struct in6_addr *dst, unsigned int plen)
276+
const struct sbrec_port_binding*
277+
route_exchange_find_port(struct ovsdb_idl_index *sbrec_port_binding_by_name,
278+
const struct sbrec_chassis *chassis,
279+
+ const struct sset *active_tunnels,
280+
const struct sbrec_port_binding *pb,
281+
const char **dynamic_routing_port_name)
282+
{
283+
@@ -82,7 +83,7 @@ route_exchange_find_port(struct ovsdb_idl_index *sbrec_port_binding_by_name,
284+
smap_get(&cr_pb->options, "dynamic-routing-port-name");
285+
}
286+
287+
- if (!lport_pb_is_chassis_resident(chassis, cr_pb)) {
288+
+ if (!lport_pb_is_chassis_resident(chassis, active_tunnels, cr_pb)) {
289+
return NULL;
290+
}
291+
292+
@@ -186,6 +187,7 @@ route_run(struct route_ctx_in *r_ctx_in,
293+
const struct sbrec_port_binding *repb =
294+
route_exchange_find_port(r_ctx_in->sbrec_port_binding_by_name,
295+
r_ctx_in->chassis,
296+
+ r_ctx_in->active_tunnels,
297+
local_peer, &port_name);
298+
if (port_name) {
299+
lr_has_port_name_filter = true;
300+
@@ -289,6 +291,7 @@ route_run(struct route_ctx_in *r_ctx_in,
301+
false);
302+
if (lport_is_local(r_ctx_in->sbrec_port_binding_by_name,
303+
r_ctx_in->chassis,
304+
+ r_ctx_in->active_tunnels,
305+
route->tracked_port->logical_port)) {
306+
priority = PRIORITY_LOCAL_BOUND;
307+
sset_add(r_ctx_out->tracked_ports_local,
308+
diff --git b/controller/route.h a/controller/route.h
309+
index e5a96bb31..7d0dd14f1 100644
310+
--- b/controller/route.h
311+
+++ a/controller/route.h
312+
@@ -35,6 +35,7 @@ struct route_ctx_in {
313+
struct ovsdb_idl_index *sbrec_port_binding_by_name;
314+
const struct sbrec_chassis *chassis;
315+
const char *dynamic_routing_port_mapping;
316+
+ const struct sset *active_tunnels;
317+
const struct hmap *local_datapaths;
318+
const struct sset *local_lports;
319+
struct shash *local_bindings;
320+
@@ -81,6 +82,7 @@ struct advertise_route_entry {
321+
const struct sbrec_port_binding *route_exchange_find_port(
322+
struct ovsdb_idl_index *sbrec_port_binding_by_name,
323+
const struct sbrec_chassis *chassis,
324+
+ const struct sset *active_tunnels,
325+
const struct sbrec_port_binding *pb,
326+
const char **dynamic_routing_port_name);
327+
uint32_t advertise_route_hash(const struct in6_addr *dst, unsigned int plen);

0 commit comments

Comments
 (0)