Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'tcp-receiver-changes'

Eric Dumazet says:

====================
tcp: receiver changes

Before accepting an incoming packet:

- Make sure to not accept a packet beyond advertized RWIN.
If not, increment a new SNMP counter (LINUX_MIB_BEYOND_WINDOW)

- ooo packets should update rcv_mss and tp->scaling_ratio.

- Make sure to not accept packet beyond sk_rcvbuf limit.

This series includes three associated packetdrill tests.
====================

Link: https://patch.msgid.link/20250711114006.480026-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+152 -14
+1
Documentation/networking/net_cachelines/snmp.rst
··· 36 36 unsigned_long LINUX_MIB_TIMEWAITKILLED 37 37 unsigned_long LINUX_MIB_PAWSACTIVEREJECTED 38 38 unsigned_long LINUX_MIB_PAWSESTABREJECTED 39 + unsigned_long LINUX_MIB_BEYOND_WINDOW 39 40 unsigned_long LINUX_MIB_TSECR_REJECTED 40 41 unsigned_long LINUX_MIB_PAWS_OLD_ACK 41 42 unsigned_long LINUX_MIB_PAWS_TW_REJECTED
+8 -1
include/net/dropreason-core.h
··· 45 45 FN(TCP_LISTEN_OVERFLOW) \ 46 46 FN(TCP_OLD_SEQUENCE) \ 47 47 FN(TCP_INVALID_SEQUENCE) \ 48 + FN(TCP_INVALID_END_SEQUENCE) \ 48 49 FN(TCP_INVALID_ACK_SEQUENCE) \ 49 50 FN(TCP_RESET) \ 50 51 FN(TCP_INVALID_SYN) \ ··· 304 303 SKB_DROP_REASON_TCP_LISTEN_OVERFLOW, 305 304 /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */ 306 305 SKB_DROP_REASON_TCP_OLD_SEQUENCE, 307 - /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ 306 + /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field. */ 308 307 SKB_DROP_REASON_TCP_INVALID_SEQUENCE, 308 + /** 309 + * @SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE: 310 + * Not acceptable END_SEQ field. 311 + * Corresponds to LINUX_MIB_BEYOND_WINDOW. 312 + */ 313 + SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE, 309 314 /** 310 315 * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ 311 316 * field because ack sequence is not in the window between snd_una
+1 -1
include/net/sock.h
··· 1553 1553 } 1554 1554 1555 1555 static inline bool 1556 - sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) 1556 + sk_rmem_schedule(struct sock *sk, const struct sk_buff *skb, int size) 1557 1557 { 1558 1558 return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb)); 1559 1559 }
+1
include/uapi/linux/snmp.h
··· 186 186 LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ 187 187 LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ 188 188 LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ 189 + LINUX_MIB_BEYOND_WINDOW, /* BeyondWindow */ 189 190 LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */ 190 191 LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */ 191 192 LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */
+1
net/ipv4/proc.c
··· 189 189 SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), 190 190 SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), 191 191 SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), 192 + SNMP_MIB_ITEM("BeyondWindow", LINUX_MIB_BEYOND_WINDOW), 192 193 SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED), 193 194 SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK), 194 195 SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED),
+36 -12
net/ipv4/tcp_input.c
··· 4391 4391 * (borrowed from freebsd) 4392 4392 */ 4393 4393 4394 - static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp, 4394 + static enum skb_drop_reason tcp_sequence(const struct sock *sk, 4395 4395 u32 seq, u32 end_seq) 4396 4396 { 4397 + const struct tcp_sock *tp = tcp_sk(sk); 4398 + 4397 4399 if (before(end_seq, tp->rcv_wup)) 4398 4400 return SKB_DROP_REASON_TCP_OLD_SEQUENCE; 4399 4401 4400 - if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) 4401 - return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; 4402 + if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) { 4403 + if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) 4404 + return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; 4405 + 4406 + /* Only accept this packet if receive queue is empty. */ 4407 + if (skb_queue_len(&sk->sk_receive_queue)) 4408 + return SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE; 4409 + } 4402 4410 4403 4411 return SKB_NOT_DROPPED_YET; 4404 4412 } ··· 4888 4880 static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); 4889 4881 static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb); 4890 4882 4891 - static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, 4883 + /* Check if this incoming skb can be added to socket receive queues 4884 + * while satisfying sk->sk_rcvbuf limit. 4885 + */ 4886 + static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb) 4887 + { 4888 + unsigned int new_mem = atomic_read(&sk->sk_rmem_alloc) + skb->truesize; 4889 + 4890 + return new_mem <= sk->sk_rcvbuf; 4891 + } 4892 + 4893 + static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb, 4892 4894 unsigned int size) 4893 4895 { 4894 - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 4896 + if (!tcp_can_ingest(sk, skb) || 4895 4897 !sk_rmem_schedule(sk, skb, size)) { 4896 4898 4897 4899 if (tcp_prune_queue(sk, skb) < 0) ··· 4933 4915 return; 4934 4916 } 4935 4917 4918 + tcp_measure_rcv_mss(sk, skb); 4936 4919 /* Disable header prediction. */ 4937 4920 tp->pred_flags = 0; 4938 4921 inet_csk_schedule_ack(sk); ··· 5517 5498 tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); 5518 5499 tp->ooo_last_skb = rb_to_skb(prev); 5519 5500 if (!prev || goal <= 0) { 5520 - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 5501 + if (tcp_can_ingest(sk, skb) && 5521 5502 !tcp_under_memory_pressure(sk)) 5522 5503 break; 5523 5504 goal = sk->sk_rcvbuf >> 3; ··· 5551 5532 5552 5533 NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); 5553 5534 5554 - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 5535 + if (!tcp_can_ingest(sk, in_skb)) 5555 5536 tcp_clamp_window(sk); 5556 5537 else if (tcp_under_memory_pressure(sk)) 5557 5538 tcp_adjust_rcv_ssthresh(sk); 5558 5539 5559 - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 5540 + if (tcp_can_ingest(sk, in_skb)) 5560 5541 return 0; 5561 5542 5562 5543 tcp_collapse_ofo_queue(sk); ··· 5566 5547 NULL, 5567 5548 tp->copied_seq, tp->rcv_nxt); 5568 5549 5569 - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 5550 + if (tcp_can_ingest(sk, in_skb)) 5570 5551 return 0; 5571 5552 5572 5553 /* Collapsing did not help, destructive actions follow. ··· 5574 5555 5575 5556 tcp_prune_ofo_queue(sk, in_skb); 5576 5557 5577 - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 5558 + if (tcp_can_ingest(sk, in_skb)) 5578 5559 return 0; 5579 5560 5580 5561 /* If we are really being abused, tell the caller to silently ··· 5900 5881 5901 5882 step1: 5902 5883 /* Step 1: check sequence number */ 5903 - reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 5884 + reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 5904 5885 if (reason) { 5905 5886 /* RFC793, page 37: "In all states except SYN-SENT, all reset 5906 5887 * (RST) segments are validated by checking their SEQ-fields." ··· 5911 5892 if (!th->rst) { 5912 5893 if (th->syn) 5913 5894 goto syn_challenge; 5895 + NET_INC_STATS(sock_net(sk), LINUX_MIB_BEYOND_WINDOW); 5914 5896 if (!tcp_oow_rate_limited(sock_net(sk), skb, 5915 5897 LINUX_MIB_TCPACKSKIPPEDSEQ, 5916 5898 &tp->last_oow_ack_time)) ··· 6130 6110 if (tcp_checksum_complete(skb)) 6131 6111 goto csum_error; 6132 6112 6113 + if (after(TCP_SKB_CB(skb)->end_seq, 6114 + tp->rcv_nxt + tcp_receive_window(tp))) 6115 + goto validate; 6116 + 6133 6117 if ((int)skb->truesize > sk->sk_forward_alloc) 6134 6118 goto step5; 6135 6119 ··· 6189 6165 /* 6190 6166 * Standard slow path. 6191 6167 */ 6192 - 6168 + validate: 6193 6169 if (!tcp_validate_incoming(sk, skb, th, 1)) 6194 6170 return; 6195 6171
+27
tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + --mss=1000 4 + 5 + `./defaults.sh 6 + sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` 7 + 8 + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 9 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 10 + +0 bind(3, ..., ...) = 0 11 + +0 listen(3, 1) = 0 12 + 13 + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> 14 + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> 15 + +.1 < . 1:1(0) ack 1 win 257 16 + 17 + +0 accept(3, ..., ...) = 4 18 + 19 + +0 < . 2001:11001(9000) ack 1 win 257 20 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001> 21 + 22 + // check that ooo packet properly updates tcpi_rcv_mss 23 + +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }% 24 + 25 + +0 < . 11001:21001(10000) ack 1 win 257 26 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001> 27 +
+44
tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + --mss=1000 4 + 5 + `./defaults.sh` 6 + 7 + 0 `nstat -n` 8 + 9 + // Establish a connection. 10 + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 11 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 12 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 13 + +0 bind(3, ..., ...) = 0 14 + +0 listen(3, 1) = 0 15 + 16 + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> 17 + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> 18 + +.1 < . 1:1(0) ack 1 win 257 19 + 20 + +0 accept(3, ..., ...) = 4 21 + 22 + +0 < P. 1:4001(4000) ack 1 win 257 23 + +0 > . 1:1(0) ack 4001 win 5000 24 + 25 + // packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW 26 + +0 < P. 4001:54001(50000) ack 1 win 257 27 + +0 > . 1:1(0) ack 4001 win 5000 28 + 29 + // ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW 30 + +1 < P. 5001:55001(50000) ack 1 win 257 31 + +0 > . 1:1(0) ack 4001 win 5000 32 + 33 + // SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW 34 + +0 < P. 70001:80001(10000) ack 1 win 257 35 + +0 > . 1:1(0) ack 4001 win 5000 36 + 37 + +0 read(4, ..., 100000) = 4000 38 + 39 + // If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd 40 + +0 < P. 4001:54001(50000) ack 1 win 257 41 + +.040 > . 1:1(0) ack 54001 win 0 42 + 43 + // Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. 44 + +0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
+33
tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + --mss=1000 4 + 5 + `./defaults.sh` 6 + 7 + 0 `nstat -n` 8 + 9 + // Establish a connection. 10 + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 11 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 12 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 13 + +0 bind(3, ..., ...) = 0 14 + +0 listen(3, 1) = 0 15 + 16 + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> 17 + +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> 18 + +.1 < . 1:1(0) ack 1 win 257 19 + 20 + +0 accept(3, ..., ...) = 4 21 + 22 + +0 < P. 1:20001(20000) ack 1 win 257 23 + +.04 > . 1:1(0) ack 20001 win 18000 24 + 25 + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 26 + +0 < P. 20001:80001(60000) ack 1 win 257 27 + +0 > . 1:1(0) ack 20001 win 18000 28 + 29 + +0 read(4, ..., 20000) = 20000 30 + // A too big packet is accepted if the receive queue is empty 31 + +0 < P. 20001:80001(60000) ack 1 win 257 32 + +0 > . 1:1(0) ack 80001 win 0 33 +