Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tcp: be less liberal in TSEcr received while in SYN_RECV state

Yong-Hao Zou mentioned that linux was not strict as other OS in 3WHS,
for flows using TCP TS option (RFC 7323)

As hinted by an old comment in tcp_check_req(),
we can check the TSEcr value in the incoming packet corresponds
to one of the SYNACK TSval values we have sent.

In this patch, I record the oldest and most recent values
that SYNACK packets have used.

Send a challenge ACK if we receive a TSEcr outside
of this range, and increase a new SNMP counter.

nstat -az | grep TSEcrRejected
TcpExtTSEcrRejected 0 0.0

Due to TCP fastopen implementation, do not apply yet these checks
for fastopen flows.

v2: No longer use req->num_timeout, but treq->snt_tsval_first
to detect when first SYNACK is prepared. This means
we make sure to not send an initial zero TSval.
Make sure MPTCP and TCP selftests are passing.
Change MIB name to TcpExtTSEcrRejected

v1: https://lore.kernel.org/netdev/CADVnQykD8i4ArpSZaPKaoNxLJ2if2ts9m4As+=Jvdkrgx1qMHw@mail.gmail.com/T/

Reported-by: Yong-Hao Zou <yonghaoz1994@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250225171048.3105061-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
3ba07527 91c8d8e4

+28 -11
+1
Documentation/networking/net_cachelines/snmp.rst
··· 36 36 unsigned_long LINUX_MIB_TIMEWAITKILLED 37 37 unsigned_long LINUX_MIB_PAWSACTIVEREJECTED 38 38 unsigned_long LINUX_MIB_PAWSESTABREJECTED 39 + unsigned_long LINUX_MIB_TSECR_REJECTED 39 40 unsigned_long LINUX_MIB_DELAYEDACKLOST 40 41 unsigned_long LINUX_MIB_LISTENOVERFLOWS 41 42 unsigned_long LINUX_MIB_LISTENDROPS
+2
include/linux/tcp.h
··· 160 160 u32 rcv_isn; 161 161 u32 snt_isn; 162 162 u32 ts_off; 163 + u32 snt_tsval_first; 164 + u32 snt_tsval_last; 163 165 u32 last_oow_ack_time; /* last SYNACK */ 164 166 u32 rcv_nxt; /* the ack # by SYNACK. For 165 167 * FastOpen it's the seq#
+1
include/uapi/linux/snmp.h
··· 186 186 LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ 187 187 LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ 188 188 LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ 189 + LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */ 189 190 LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */ 190 191 LINUX_MIB_DELAYEDACKS, /* DelayedACKs */ 191 192 LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */
+1
net/ipv4/proc.c
··· 189 189 SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), 190 190 SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), 191 191 SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), 192 + SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED), 192 193 SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK), 193 194 SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), 194 195 SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
+1
net/ipv4/syncookies.c
··· 279 279 ireq->smc_ok = 0; 280 280 281 281 treq->snt_synack = 0; 282 + treq->snt_tsval_first = 0; 282 283 treq->tfo_listener = false; 283 284 treq->txhash = net_tx_rndhash(); 284 285 treq->rcv_isn = ntohl(th->seq) - 1;
+1
net/ipv4/tcp_input.c
··· 7081 7081 tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; 7082 7082 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 7083 7083 tcp_rsk(req)->snt_synack = 0; 7084 + tcp_rsk(req)->snt_tsval_first = 0; 7084 7085 tcp_rsk(req)->last_oow_ack_time = 0; 7085 7086 req->mss = rx_opt->mss_clamp; 7086 7087 req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
+15 -11
net/ipv4/tcp_minisocks.c
··· 663 663 struct sock *child; 664 664 const struct tcphdr *th = tcp_hdr(skb); 665 665 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 666 + bool tsecr_reject = false; 666 667 bool paws_reject = false; 667 668 bool own_req; 668 669 ··· 673 672 674 673 if (tmp_opt.saw_tstamp) { 675 674 tmp_opt.ts_recent = READ_ONCE(req->ts_recent); 676 - if (tmp_opt.rcv_tsecr) 675 + if (tmp_opt.rcv_tsecr) { 676 + if (inet_rsk(req)->tstamp_ok && !fastopen) 677 + tsecr_reject = !between(tmp_opt.rcv_tsecr, 678 + tcp_rsk(req)->snt_tsval_first, 679 + READ_ONCE(tcp_rsk(req)->snt_tsval_last)); 677 680 tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; 681 + } 678 682 /* We do not store true stamp, but it is not required, 679 683 * it can be estimated (approximately) 680 684 * from another data. ··· 794 788 tcp_rsk(req)->snt_isn + 1)) 795 789 return sk; 796 790 797 - /* Also, it would be not so bad idea to check rcv_tsecr, which 798 - * is essentially ACK extension and too early or too late values 799 - * should cause reset in unsynchronized states. 800 - */ 801 - 802 791 /* RFC793: "first check sequence number". */ 803 792 804 - if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, 805 - TCP_SKB_CB(skb)->end_seq, 806 - tcp_rsk(req)->rcv_nxt, 807 - tcp_rsk(req)->rcv_nxt + 808 - tcp_synack_window(req))) { 793 + if (paws_reject || tsecr_reject || 794 + !tcp_in_window(TCP_SKB_CB(skb)->seq, 795 + TCP_SKB_CB(skb)->end_seq, 796 + tcp_rsk(req)->rcv_nxt, 797 + tcp_rsk(req)->rcv_nxt + 798 + tcp_synack_window(req))) { 809 799 /* Out of window: send ACK and drop. */ 810 800 if (!(flg & TCP_FLAG_RST) && 811 801 !tcp_oow_rate_limited(sock_net(sk), skb, ··· 810 808 req->rsk_ops->send_ack(sk, skb, req); 811 809 if (paws_reject) 812 810 NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 811 + else if (tsecr_reject) 812 + NET_INC_STATS(sock_net(sk), LINUX_MIB_TSECRREJECTED); 813 813 return NULL; 814 814 } 815 815
+6
net/ipv4/tcp_output.c
··· 943 943 opts->options |= OPTION_TS; 944 944 opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) + 945 945 tcp_rsk(req)->ts_off; 946 + if (!tcp_rsk(req)->snt_tsval_first) { 947 + if (!opts->tsval) 948 + opts->tsval = ~0U; 949 + tcp_rsk(req)->snt_tsval_first = opts->tsval; 950 + } 951 + WRITE_ONCE(tcp_rsk(req)->snt_tsval_last, opts->tsval); 946 952 opts->tsecr = READ_ONCE(req->ts_recent); 947 953 remaining -= TCPOLEN_TSTAMP_ALIGNED; 948 954 }