Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tcp: ECT_1_NEGOTIATION and NEEDS_ACCECN identifiers

Two flags for congestion control (CC) module are added in this patch
related to AccECN negotiation. First, a new flag (TCP_CONG_NEEDS_ACCECN)
defines that the CC expects to negotiate AccECN functionality using the
ECE, CWR and AE flags in the TCP header.

Second, during ECN negotiation, ECT(0) in the IP header is used. This
patch enables CC to control whether ECT(0) or ECT(1) should be used on
a per-segment basis. A new flag (TCP_CONG_ECT_1_NEGOTIATION) defines the
expected ECT value in the IP header by the CA when not-yet initialized
for the connection.

The detailed AccECN negotiaotn can be found in IETF RFC9768.

Co-developed-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Signed-off-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260131222515.8485-5-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Chia-Yu Chang and committed by
Paolo Abeni
100f946b 6f74bc8b

+52 -10
+17 -3
include/net/inet_ecn.h
··· 51 51 return outer; 52 52 } 53 53 54 + /* Apply either ECT(0) or ECT(1) */ 55 + static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1) 56 + { 57 + __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0; 58 + 59 + /* Mask the complete byte in case the connection alternates between 60 + * ECT(0) and ECT(1). 61 + */ 62 + inet_sk(sk)->tos &= ~INET_ECN_MASK; 63 + inet_sk(sk)->tos |= ect; 64 + if (inet6_sk(sk)) { 65 + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; 66 + inet6_sk(sk)->tclass |= ect; 67 + } 68 + } 69 + 54 70 static inline void INET_ECN_xmit(struct sock *sk) 55 71 { 56 - inet_sk(sk)->tos |= INET_ECN_ECT_0; 57 - if (inet6_sk(sk) != NULL) 58 - inet6_sk(sk)->tclass |= INET_ECN_ECT_0; 72 + __INET_ECN_xmit(sk, false); 59 73 } 60 74 61 75 static inline void INET_ECN_dontxmit(struct sock *sk)
+20 -1
include/net/tcp.h
··· 1215 1215 #define TCP_CONG_NON_RESTRICTED BIT(0) 1216 1216 /* Requires ECN/ECT set on all packets */ 1217 1217 #define TCP_CONG_NEEDS_ECN BIT(1) 1218 - #define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) 1218 + /* Require successfully negotiated AccECN capability */ 1219 + #define TCP_CONG_NEEDS_ACCECN BIT(2) 1220 + /* Use ECT(1) instead of ECT(0) while the CA is uninitialized */ 1221 + #define TCP_CONG_ECT_1_NEGOTIATION BIT(3) 1222 + #define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \ 1223 + TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION) 1219 1224 1220 1225 union tcp_cc_info; 1221 1226 ··· 1359 1354 const struct inet_connection_sock *icsk = inet_csk(sk); 1360 1355 1361 1356 return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; 1357 + } 1358 + 1359 + static inline bool tcp_ca_needs_accecn(const struct sock *sk) 1360 + { 1361 + const struct inet_connection_sock *icsk = inet_csk(sk); 1362 + 1363 + return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN; 1364 + } 1365 + 1366 + static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk) 1367 + { 1368 + const struct inet_connection_sock *icsk = inet_csk(sk); 1369 + 1370 + return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION; 1362 1371 } 1363 1372 1364 1373 static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
+10 -3
include/net/tcp_ecn.h
··· 31 31 TCP_ACCECN_OPTION_FULL = 2, 32 32 }; 33 33 34 + /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ 35 + static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk) 36 + { 37 + __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk)); 38 + } 39 + 34 40 static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) 35 41 { 36 42 /* Do not set CWR if in AccECN mode! */ ··· 567 561 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; 568 562 else if (tcp_ca_needs_ecn(sk) || 569 563 tcp_bpf_ca_needs_ecn(sk)) 570 - INET_ECN_xmit(sk); 564 + INET_ECN_xmit_ect_1_negotiation(sk); 571 565 572 566 if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) { 573 567 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; ··· 585 579 bool use_ecn, use_accecn; 586 580 u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); 587 581 588 - use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN; 582 + use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN || 583 + tcp_ca_needs_accecn(sk); 589 584 use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN || 590 585 tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN || 591 586 tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn; ··· 602 595 603 596 if (use_ecn) { 604 597 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) 605 - INET_ECN_xmit(sk); 598 + INET_ECN_xmit_ect_1_negotiation(sk); 606 599 607 600 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; 608 601 if (use_accecn) {
+3 -2
net/ipv4/tcp_cong.c
··· 16 16 #include <linux/gfp.h> 17 17 #include <linux/jhash.h> 18 18 #include <net/tcp.h> 19 + #include <net/tcp_ecn.h> 19 20 #include <trace/events/tcp.h> 20 21 21 22 static DEFINE_SPINLOCK(tcp_cong_list_lock); ··· 228 227 229 228 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); 230 229 if (ca->flags & TCP_CONG_NEEDS_ECN) 231 - INET_ECN_xmit(sk); 230 + INET_ECN_xmit_ect_1_negotiation(sk); 232 231 else 233 232 INET_ECN_dontxmit(sk); 234 233 } ··· 258 257 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); 259 258 260 259 if (ca->flags & TCP_CONG_NEEDS_ECN) 261 - INET_ECN_xmit(sk); 260 + INET_ECN_xmit_ect_1_negotiation(sk); 262 261 else 263 262 INET_ECN_dontxmit(sk); 264 263
+2 -1
net/ipv4/tcp_input.c
··· 7495 7495 u32 ecn_ok_dst; 7496 7496 7497 7497 if (tcp_accecn_syn_requested(th) && 7498 - READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3) { 7498 + (READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3 || 7499 + tcp_ca_needs_accecn(listen_sk))) { 7499 7500 inet_rsk(req)->ecn_ok = 1; 7500 7501 tcp_rsk(req)->accecn_ok = 1; 7501 7502 tcp_rsk(req)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &