Skip to content

Commit e76ef7f

Browse files
committed
bgpd: Fix wrong pthread event cancelling
0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=130719886083648) at ./nptl/pthread_kill.c:44 1 __pthread_kill_internal (signo=6, threadid=130719886083648) at ./nptl/pthread_kill.c:78 2 __GI___pthread_kill (threadid=130719886083648, signo=signo@entry=6) at ./nptl/pthread_kill.c:89 3 0x000076e399e42476 in __GI_raise (sig=6) at ../sysdeps/posix/raise.c:26 4 0x000076e39a34f950 in core_handler (signo=6, siginfo=0x76e3985fca30, context=0x76e3985fc900) at lib/sigevent.c:258 5 <signal handler called> 6 __pthread_kill_implementation (no_tid=0, signo=6, threadid=130719886083648) at ./nptl/pthread_kill.c:44 7 __pthread_kill_internal (signo=6, threadid=130719886083648) at ./nptl/pthread_kill.c:78 8 __GI___pthread_kill (threadid=130719886083648, signo=signo@entry=6) at ./nptl/pthread_kill.c:89 9 0x000076e399e42476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 10 0x000076e399e287f3 in __GI_abort () at ./stdlib/abort.c:79 11 0x000076e39a39874b in _zlog_assert_failed (xref=0x76e39a46cca0 <_xref.27>, extra=0x0) at lib/zlog.c:789 12 0x000076e39a369dde in cancel_event_helper (m=0x5eda32df5e40, arg=0x5eda33afeed0, flags=1) at lib/event.c:1428 13 0x000076e39a369ef6 in event_cancel_event_ready (m=0x5eda32df5e40, arg=0x5eda33afeed0) at lib/event.c:1470 14 0x00005eda0a94a5b3 in bgp_stop (connection=0x5eda33afeed0) at bgpd/bgp_fsm.c:1355 15 0x00005eda0a94b4ae in bgp_stop_with_notify (connection=0x5eda33afeed0, code=8 '\b', sub_code=0 '\000') at bgpd/bgp_fsm.c:1610 16 0x00005eda0a979498 in bgp_packet_add (connection=0x5eda33afeed0, peer=0x5eda33b11800, s=0x76e3880daf90) at bgpd/bgp_packet.c:152 17 0x00005eda0a97a80f in bgp_keepalive_send (peer=0x5eda33b11800) at bgpd/bgp_packet.c:639 18 0x00005eda0a9511fd in peer_process (hb=0x5eda33c9ab80, arg=0x76e3985ffaf0) at bgpd/bgp_keepalives.c:111 19 0x000076e39a2cd8e6 in hash_iterate (hash=0x76e388000be0, func=0x5eda0a95105e <peer_process>, arg=0x76e3985ffaf0) at lib/hash.c:252 20 0x00005eda0a951679 in bgp_keepalives_start (arg=0x5eda3306af80) at bgpd/bgp_keepalives.c:214 21 0x000076e39a2c9932 in frr_pthread_inner (arg=0x5eda3306af80) at lib/frr_pthread.c:180 22 0x000076e399e94ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 23 0x000076e399f26850 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 (gdb) f 12 12 0x000076e39a369dde in cancel_event_helper (m=0x5eda32df5e40, arg=0x5eda33afeed0, flags=1) at lib/event.c:1428 1428 assert(m->owner == pthread_self()); In this decode the attempt to cancel the connection's events from the wrong thread is causing the crash. Modify the code to create an event on the bm->master to cancel the events for the connection. Signed-off-by: Donald Sharp <sharpd@nvidia.com>
1 parent a5d188a commit e76ef7f

File tree

4 files changed

+14
-1
lines changed

4 files changed

+14
-1
lines changed

bgpd/bgp_fsm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
182182
EVENT_OFF(going_away->t_delayopen);
183183
EVENT_OFF(going_away->t_connect_check_r);
184184
EVENT_OFF(going_away->t_connect_check_w);
185+
EVENT_OFF(going_away->t_stop_with_notify);
185186
EVENT_OFF(keeper->t_routeadv);
186187
EVENT_OFF(keeper->t_connect);
187188
EVENT_OFF(keeper->t_delayopen);
@@ -2031,6 +2032,8 @@ enum bgp_fsm_state_progress bgp_stop(struct peer_connection *connection)
20312032
EVENT_OFF(connection->t_connect_check_r);
20322033
EVENT_OFF(connection->t_connect_check_w);
20332034

2035+
EVENT_OFF(connection->t_stop_with_notify);
2036+
20342037
/* Stop all timers. */
20352038
EVENT_OFF(connection->t_start);
20362039
EVENT_OFF(connection->t_connect);
@@ -3525,3 +3528,10 @@ void bgp_peer_gr_flags_update(struct peer *peer)
35253528
}
35263529
}
35273530
}
3531+
3532+
void bgp_event_stop_with_notify(struct event *event)
3533+
{
3534+
struct peer_connection *connection = EVENT_ARG(event);
3535+
3536+
BGP_EVENT_ADD(connection, TCP_fatal_error);
3537+
}

bgpd/bgp_fsm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ enum bgp_fsm_state_progress {
109109
extern void bgp_fsm_nht_update(struct peer_connection *connection,
110110
struct peer *peer, bool has_valid_nexthops);
111111
extern void bgp_event(struct event *event);
112+
extern void bgp_event_stop_with_notify(struct event *event);
112113
extern int bgp_event_update(struct peer_connection *connection,
113114
enum bgp_fsm_events event);
114115
extern enum bgp_fsm_state_progress bgp_stop(struct peer_connection *connection);

bgpd/bgp_packet.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ static void bgp_packet_add(struct peer_connection *connection,
148148
EC_BGP_SENDQ_STUCK_PROPER,
149149
"%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
150150
peer, sendholdtime);
151-
BGP_EVENT_ADD(connection, TCP_fatal_error);
151+
event_add_event(bm->master, bgp_event_stop_with_notify, connection, 0,
152+
&connection->t_stop_with_notify);
152153
} else if (delta > (intmax_t)holdtime &&
153154
monotime(NULL) - peer->last_sendq_warn > 5) {
154155
flog_warn(

bgpd/bgpd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,6 +1266,7 @@ struct peer_connection {
12661266

12671267
struct event *t_connect_check_r;
12681268
struct event *t_connect_check_w;
1269+
struct event *t_stop_with_notify;
12691270

12701271
struct event *t_gr_restart;
12711272
struct event *t_gr_stale;

0 commit comments

Comments
 (0)