Skip to content

Commit 5c53ff5

Browse files
Eric DumazetTortel
Eric Dumazet
authored andcommitted
tcp: fix a potential deadlock in tcp_get_info()
Taking socket spinlock in tcp_get_info() can deadlock, as inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i], while packet processing can use the reverse locking order. We could avoid this locking for TCP_LISTEN states, but lockdep would certainly get confused as all TCP sockets share same lockdep classes. [ 523.722504] ====================================================== [ 523.728706] [ INFO: possible circular locking dependency detected ] [ 523.734990] 4.1.0-dbg-DEV #1676 Not tainted [ 523.739202] ------------------------------------------------------- [ 523.745474] ss/18032 is trying to acquire lock: [ 523.750002] (slock-AF_INET){+.-...}, at: [<ffffffff81669d44>] tcp_get_info+0x2c4/0x360 [ 523.758129] [ 523.758129] but task is already holding lock: [ 523.763968] (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [<ffffffff816bcb75>] inet_diag_dump_icsk+0x1d5/0x6c0 [ 523.774661] [ 523.774661] which lock already depends on the new lock. [ 523.774661] [ 523.782850] [ 523.782850] the existing dependency chain (in reverse order) is: [ 523.790326] -> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}: [ 523.796599] [<ffffffff811126bb>] lock_acquire+0xbb/0x270 [ 523.802565] [<ffffffff816f5868>] _raw_spin_lock+0x38/0x50 [ 523.808628] [<ffffffff81665af8>] __inet_hash_nolisten+0x78/0x110 [ 523.815273] [<ffffffff816819db>] tcp_v4_syn_recv_sock+0x24b/0x350 [ 523.822067] [<ffffffff81684d41>] tcp_check_req+0x3c1/0x500 [ 523.828199] [<ffffffff81682d09>] tcp_v4_do_rcv+0x239/0x3d0 [ 523.834331] [<ffffffff816842fe>] tcp_v4_rcv+0xa8e/0xc10 [ 523.840202] [<ffffffff81658fa3>] ip_local_deliver_finish+0x133/0x3e0 [ 523.847214] [<ffffffff81659a9a>] ip_local_deliver+0xaa/0xc0 [ 523.853440] [<ffffffff816593b8>] ip_rcv_finish+0x168/0x5c0 [ 523.859624] [<ffffffff81659db7>] ip_rcv+0x307/0x420 Lets use u64_sync infrastructure instead. As a bonus, 64bit arches get optimized, as these are nop for them. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]> Change-Id: I0b62e73f0dd955da9d1bbb977a6de448721a089d
1 parent c371779 commit 5c53ff5

File tree

4 files changed

+17
-4
lines changed

4 files changed

+17
-4
lines changed

include/linux/tcp.h

+2
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ struct tcp_sock {
166166
* sum(delta(snd_una)), or how many bytes
167167
* were acked.
168168
*/
169+
struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */
170+
169171
u32 snd_una; /* First byte we want an ack for */
170172
u32 snd_sml; /* Last byte of the most recently transmitted small packet */
171173
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */

net/ipv4/tcp.c

+7-4
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ void tcp_init_sock(struct sock *sk)
409409
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
410410
tp->snd_cwnd_clamp = ~0;
411411
tp->mss_cache = TCP_MSS_DEFAULT;
412+
u64_stats_init(&tp->syncp);
412413

413414
tp->reordering = sysctl_tcp_reordering;
414415
tcp_enable_early_retrans(tp);
@@ -2694,6 +2695,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
26942695
u32 now = tcp_time_stamp;
26952696
u64 rate64;
26962697
u32 rate;
2698+
unsigned int start;
26972699

26982700
memset(info, 0, sizeof(*info));
26992701

@@ -2762,12 +2764,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
27622764
rate64 = rate != ~0U ? rate : ~0ULL;
27632765
put_unaligned(rate64, &info->tcpi_max_pacing_rate);
27642766

2765-
spin_lock_bh(&sk->sk_lock.slock);
2766-
info->tcpi_bytes_acked = tp->bytes_acked;
2767-
info->tcpi_bytes_received = tp->bytes_received;
2767+
do {
2768+
start = u64_stats_fetch_begin_irq(&tp->syncp);
2769+
info->tcpi_bytes_acked = tp->bytes_acked;
2770+
info->tcpi_bytes_received = tp->bytes_received;
2771+
} while (u64_stats_fetch_retry_irq(&tp->syncp, start));
27682772
info->tcpi_segs_out = tp->segs_out;
27692773
info->tcpi_segs_in = tp->segs_in;
2770-
spin_unlock_bh(&sk->sk_lock.slock);
27712774

27722775
/*
27732776
* Expose reference count for socket.

net/ipv4/tcp_fastopen.c

+4
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
208208
skb_set_owner_r(skb2, child);
209209
__skb_queue_tail(&child->sk_receive_queue, skb2);
210210
tp->syn_data_acked = 1;
211+
212+
/* u64_stats_update_begin(&tp->syncp) not needed here,
213+
* as we certainly are not changing upper 32bit value (0)
214+
*/
211215
tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
212216
} else {
213217
end_seq = TCP_SKB_CB(skb)->seq + 1;

net/ipv4/tcp_input.c

+4
Original file line numberDiff line numberDiff line change
@@ -3321,7 +3321,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
33213321
{
33223322
u32 delta = ack - tp->snd_una;
33233323

3324+
u64_stats_update_begin(&tp->syncp);
33243325
tp->bytes_acked += delta;
3326+
u64_stats_update_end(&tp->syncp);
33253327
tp->snd_una = ack;
33263328
}
33273329

@@ -3330,7 +3332,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
33303332
{
33313333
u32 delta = seq - tp->rcv_nxt;
33323334

3335+
u64_stats_update_begin(&tp->syncp);
33333336
tp->bytes_received += delta;
3337+
u64_stats_update_end(&tp->syncp);
33343338
tp->rcv_nxt = seq;
33353339
}
33363340

0 commit comments

Comments
 (0)