-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path0040-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
More file actions
120 lines (105 loc) · 4.11 KB
/
0040-tcp-introduce-per-route-feature-RTAX_FEATURE_ECN_LOW.patch
File metadata and controls
120 lines (105 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
From 41d3789fb47d6293d8babeaecae4f3fe8adca5d3 Mon Sep 17 00:00:00 2001
From: David Morley <morleyd@google.com>
Date: Fri, 14 Jul 2023 11:07:56 -0400
Subject: [PATCH 15/18] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW
Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW.
This feature indicates that the given destination network is a
low-latency ECN environment, meaning both that ECN CE marks are
applied by the network using a low-latency marking threshold and also
that TCP endpoints provide precise per-data-segment ECN feedback in
ACKs (where the ACK ECE flag echoes the received CE status of all
newly-acknowledged data segments). This feature indication can be used
by congestion control algorithms to decide how to interpret ECN
signals over the given destination network.
This feature is appropriate for datacenter-style ECN marking, such as
the ECN marking approach expected by DCTCP or BBR congestion control
modules.
Signed-off-by: David Morley <morleyd@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Tested-by: David Morley <morleyd@google.com>
Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/net/tcp.h | 10 ++++++++++
include/uapi/linux/rtnetlink.h | 4 +++-
net/ipv4/tcp_minisocks.c | 2 ++
net/ipv4/tcp_output.c | 6 ++++--
4 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6416cd66631b..5398a2b09e25 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -372,6 +372,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
+#define TCP_ECN_LOW 16
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
@@ -723,6 +724,15 @@ static inline void tcp_fast_path_check(struct sock *sk)
tcp_fast_path_on(tp);
}
+static inline void tcp_set_ecn_low_from_dst(struct sock *sk,
+ const struct dst_entry *dst)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW))
+ tp->ecn_flags |= TCP_ECN_LOW;
+}
+
u32 tcp_delack_max(const struct sock *sk);
/* Compute the actual rto_min value */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 51c13cf9c5ae..de8dcba26bec 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -506,9 +506,11 @@ enum {
#define RTAX_FEATURE_SACK (1 << 1)
#define RTAX_FEATURE_TIMESTAMP (1 << 2)
#define RTAX_FEATURE_ALLFRAG (1 << 3)
+#define RTAX_FEATURE_ECN_LOW (1 << 4)
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \
- RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG)
+ RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG \
+ | RTAX_FEATURE_ECN_LOW)
struct rta_session {
__u8 proto;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b98d476f1594..ca5c89cc7dc8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -439,6 +439,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
bool ca_got_dst = false;
+ tcp_set_ecn_low_from_dst(sk, dst);
+
if (ca_key != TCP_CA_UNSPEC) {
const struct tcp_congestion_ops *ca;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 62a6dc1d870c..1e8e9dc5fd20 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -332,10 +332,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+ const struct dst_entry *dst = __sk_dst_get(sk);
if (!use_ecn) {
- const struct dst_entry *dst = __sk_dst_get(sk);
-
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
use_ecn = true;
}
@@ -347,6 +346,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
INET_ECN_xmit(sk);
+
+ if (dst)
+ tcp_set_ecn_low_from_dst(sk, dst);
}
}
--
2.39.2