Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'accecn-protocol-patch-series'

TCP preparations for AccECN support

Just code reshuffling, no functional changes.

Link: https://patch.msgid.link/20250911110642.87529-1-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+149 -129
+2 -2
include/linux/tcp.h
··· 285 285 * Header prediction flags 286 286 * 0x5?10 << 16 + snd_wnd in net byte order 287 287 */ 288 + u8 nonagle : 4,/* Disable Nagle algorithm? */ 289 + rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ 288 290 __be32 pred_flags; 289 291 u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ 290 292 u64 tcp_mstamp; /* most recent packet received/sent */ ··· 305 303 * Options received (usually on last packet, some only on SYN packets). 306 304 */ 307 305 struct tcp_options_received rx_opt; 308 - u8 nonagle : 4,/* Disable Nagle algorithm? */ 309 - rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ 310 306 __cacheline_group_end(tcp_sock_write_txrx); 311 307 312 308 /* RX read-write hotpath cache lines */
+27 -27
include/net/tcp.h
··· 821 821 return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); 822 822 } 823 823 824 - static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) 825 - { 826 - /* mptcp hooks are only on the slow path */ 827 - if (sk_is_mptcp((struct sock *)tp)) 828 - return; 829 - 830 - tp->pred_flags = htonl((tp->tcp_header_len << 26) | 831 - ntohl(TCP_FLAG_ACK) | 832 - snd_wnd); 833 - } 834 - 835 - static inline void tcp_fast_path_on(struct tcp_sock *tp) 836 - { 837 - __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); 838 - } 839 - 840 - static inline void tcp_fast_path_check(struct sock *sk) 841 - { 842 - struct tcp_sock *tp = tcp_sk(sk); 843 - 844 - if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && 845 - tp->rcv_wnd && 846 - atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && 847 - !tp->urg_data) 848 - tcp_fast_path_on(tp); 849 - } 850 - 851 824 u32 tcp_delack_max(const struct sock *sk); 852 825 853 826 /* Compute the actual rto_min value */ ··· 1778 1805 rx_opt->ts_recent_stamp + TCP_PAWS_MSL)) 1779 1806 return false; 1780 1807 return true; 1808 + } 1809 + 1810 + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) 1811 + { 1812 + /* mptcp hooks are only on the slow path */ 1813 + if (sk_is_mptcp((struct sock *)tp)) 1814 + return; 1815 + 1816 + tp->pred_flags = htonl((tp->tcp_header_len << 26) | 1817 + ntohl(TCP_FLAG_ACK) | 1818 + snd_wnd); 1819 + } 1820 + 1821 + static inline void tcp_fast_path_on(struct tcp_sock *tp) 1822 + { 1823 + __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); 1824 + } 1825 + 1826 + static inline void tcp_fast_path_check(struct sock *sk) 1827 + { 1828 + struct tcp_sock *tp = tcp_sk(sk); 1829 + 1830 + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && 1831 + tp->rcv_wnd && 1832 + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && 1833 + !tp->urg_data) 1834 + tcp_fast_path_on(tp); 1781 1835 } 1782 1836 1783 1837 bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+116
include/net/tcp_ecn.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + #ifndef _TCP_ECN_H 3 + #define _TCP_ECN_H 4 + 5 + #include <linux/tcp.h> 6 + #include <linux/skbuff.h> 7 + 8 + #include <net/inet_connection_sock.h> 9 + #include <net/sock.h> 10 + #include <net/tcp.h> 11 + #include <net/inet_ecn.h> 12 + 13 + static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) 14 + { 15 + if (tcp_ecn_mode_rfc3168(tp)) 16 + tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 17 + } 18 + 19 + static inline void tcp_ecn_accept_cwr(struct sock *sk, 20 + const struct sk_buff *skb) 21 + { 22 + if (tcp_hdr(skb)->cwr) { 23 + tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 24 + 25 + /* If the sender is telling us it has entered CWR, then its 26 + * cwnd may be very low (even just 1 packet), so we should ACK 27 + * immediately. 28 + */ 29 + if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) 30 + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; 31 + } 32 + } 33 + 34 + static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) 35 + { 36 + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 37 + } 38 + 39 + static inline void tcp_ecn_rcv_synack(struct tcp_sock *tp, 40 + const struct tcphdr *th) 41 + { 42 + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr)) 43 + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 44 + } 45 + 46 + static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, 47 + const struct tcphdr *th) 48 + { 49 + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) 50 + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 51 + } 52 + 53 + static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, 54 + const struct tcphdr *th) 55 + { 56 + if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) 57 + return true; 58 + return false; 59 + } 60 + 61 + /* Packet ECN state for a SYN-ACK */ 62 + static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) 63 + { 64 + const struct tcp_sock *tp = tcp_sk(sk); 65 + 66 + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; 67 + if (tcp_ecn_disabled(tp)) 68 + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; 69 + else if (tcp_ca_needs_ecn(sk) || 70 + tcp_bpf_ca_needs_ecn(sk)) 71 + INET_ECN_xmit(sk); 72 + } 73 + 74 + /* Packet ECN state for a SYN. */ 75 + static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) 76 + { 77 + struct tcp_sock *tp = tcp_sk(sk); 78 + bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); 79 + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || 80 + tcp_ca_needs_ecn(sk) || bpf_needs_ecn; 81 + 82 + if (!use_ecn) { 83 + const struct dst_entry *dst = __sk_dst_get(sk); 84 + 85 + if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) 86 + use_ecn = true; 87 + } 88 + 89 + tp->ecn_flags = 0; 90 + 91 + if (use_ecn) { 92 + if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) 93 + INET_ECN_xmit(sk); 94 + 95 + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; 96 + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 97 + } 98 + } 99 + 100 + static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) 101 + { 102 + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) 103 + /* tp->ecn_flags are cleared at a later point in time when 104 + * SYN ACK is ultimatively being received. 105 + */ 106 + TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); 107 + } 108 + 109 + static inline void 110 + tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) 111 + { 112 + if (inet_rsk(req)->ecn_ok) 113 + th->ece = 1; 114 + } 115 + 116 + #endif /* _LINUX_TCP_ECN_H */
+2 -2
net/ipv4/tcp.c
··· 5145 5145 /* 32bit arches with 8byte alignment on u64 fields might need padding 5146 5146 * before tcp_clock_cache. 5147 5147 */ 5148 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 92 + 4); 5148 + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 91 + 4); 5149 5149 5150 5150 /* RX read-write hotpath cache lines */ 5151 5151 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received); ··· 5162 5162 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked); 5163 5163 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est); 5164 5164 CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space); 5165 - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99); 5165 + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 96); 5166 5166 } 5167 5167 5168 5168 void __init tcp_init(void)
+1 -44
net/ipv4/tcp_input.c
··· 72 72 #include <linux/prefetch.h> 73 73 #include <net/dst.h> 74 74 #include <net/tcp.h> 75 + #include <net/tcp_ecn.h> 75 76 #include <net/proto_memory.h> 76 77 #include <net/inet_common.h> 77 78 #include <linux/ipsec.h> ··· 340 339 (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk)); 341 340 } 342 341 343 - static void tcp_ecn_queue_cwr(struct tcp_sock *tp) 344 - { 345 - if (tcp_ecn_mode_rfc3168(tp)) 346 - tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 347 - } 348 - 349 - static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) 350 - { 351 - if (tcp_hdr(skb)->cwr) { 352 - tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 353 - 354 - /* If the sender is telling us it has entered CWR, then its 355 - * cwnd may be very low (even just 1 packet), so we should ACK 356 - * immediately. 357 - */ 358 - if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) 359 - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; 360 - } 361 - } 362 - 363 - static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) 364 - { 365 - tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 366 - } 367 - 368 342 static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb) 369 343 { 370 344 struct tcp_sock *tp = tcp_sk(sk); ··· 373 397 tp->ecn_flags |= TCP_ECN_SEEN; 374 398 break; 375 399 } 376 - } 377 - 378 - static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) 379 - { 380 - if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr)) 381 - tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 382 - } 383 - 384 - static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) 385 - { 386 - if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) 387 - tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 388 - } 389 - 390 - static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) 391 - { 392 - if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) 393 - return true; 394 - return false; 395 400 } 396 401 397 402 static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
+1 -54
net/ipv4/tcp_output.c
··· 38 38 #define pr_fmt(fmt) "TCP: " fmt 39 39 40 40 #include <net/tcp.h> 41 + #include <net/tcp_ecn.h> 41 42 #include <net/mptcp.h> 42 43 #include <net/proto_memory.h> 43 44 ··· 318 317 } 319 318 320 319 return new_win; 321 - } 322 - 323 - /* Packet ECN state for a SYN-ACK */ 324 - static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) 325 - { 326 - const struct tcp_sock *tp = tcp_sk(sk); 327 - 328 - TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; 329 - if (tcp_ecn_disabled(tp)) 330 - TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; 331 - else if (tcp_ca_needs_ecn(sk) || 332 - tcp_bpf_ca_needs_ecn(sk)) 333 - INET_ECN_xmit(sk); 334 - } 335 - 336 - /* Packet ECN state for a SYN. */ 337 - static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) 338 - { 339 - struct tcp_sock *tp = tcp_sk(sk); 340 - bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); 341 - bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || 342 - tcp_ca_needs_ecn(sk) || bpf_needs_ecn; 343 - 344 - if (!use_ecn) { 345 - const struct dst_entry *dst = __sk_dst_get(sk); 346 - 347 - if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) 348 - use_ecn = true; 349 - } 350 - 351 - tp->ecn_flags = 0; 352 - 353 - if (use_ecn) { 354 - TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; 355 - tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 356 - if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) 357 - INET_ECN_xmit(sk); 358 - } 359 - } 360 - 361 - static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) 362 - { 363 - if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) 364 - /* tp->ecn_flags are cleared at a later point in time when 365 - * SYN ACK is ultimatively being received. 366 - */ 367 - TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); 368 - } 369 - 370 - static void 371 - tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) 372 - { 373 - if (inet_rsk(req)->ecn_ok) 374 - th->ece = 1; 375 320 } 376 321 377 322 /* Set up ECN state for a packet on a ESTABLISHED socket that is about to