Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'tcp-ts-usec-resolution'

Eric Dumazet says:

====================
tcp: add optional usec resolution to TCP TS

As discussed in various public places in 2016, Google adopted
usec resolution in RFC 7323 TS values, at Van Jacobson suggestion.

Goals were :

1) better observability of delays in networking stacks/fabrics.

2) better disambiguation of events based on TSval/ecr values.

3) building block for congestion control modules needing usec resolution.

Back then we implemented a schem based on private SYN options
to safely negotiate the feature.

For upstream submission, we chose to use a much simpler route
attribute because this feature is probably going to be used
in private networks.

ip route add 10/8 ... features tcp_usec_ts

References:

https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf
https://datatracker.ietf.org/doc/draft-wang-tcpm-low-latency-opt/

First two patches are fixing old minor bugs and might be taken
by stable teams (thanks to appropriate Fixes: tags)
====================

Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

+190 -101
+1 -1
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
··· 2259 2259 2260 2260 if (tp->snd_una != snd_una) { 2261 2261 tp->snd_una = snd_una; 2262 - tp->rcv_tstamp = tcp_time_stamp(tp); 2262 + tp->rcv_tstamp = tcp_jiffies32; 2263 2263 if (tp->snd_una == tp->snd_nxt && 2264 2264 !csk_flag_nochk(csk, CSK_TX_FAILOVER)) 2265 2265 csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
+8 -1
include/linux/tcp.h
··· 152 152 u64 snt_synack; /* first SYNACK sent time */ 153 153 bool tfo_listener; 154 154 bool is_mptcp; 155 + s8 req_usec_ts; 155 156 #if IS_ENABLED(CONFIG_MPTCP) 156 157 bool drop_req; 157 158 #endif ··· 258 257 u8 compressed_ack; 259 258 u8 dup_ack_counter:2, 260 259 tlp_retrans:1, /* TLP is a retransmission */ 261 - unused:5; 260 + tcp_usec_ts:1, /* TSval values in usec */ 261 + unused:4; 262 262 u32 chrono_start; /* Start time in jiffies of a TCP chrono */ 263 263 u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ 264 264 u8 chrono_type:2, /* current chronograph type */ ··· 577 575 void tcp_sock_set_quickack(struct sock *sk, int val); 578 576 int tcp_sock_set_syncnt(struct sock *sk, int val); 579 577 int tcp_sock_set_user_timeout(struct sock *sk, int val); 578 + 579 + static inline bool dst_tcp_usec_ts(const struct dst_entry *dst) 580 + { 581 + return dst_feature(dst, RTAX_FEATURE_TCP_USEC_TS); 582 + } 580 583 581 584 #endif /* _LINUX_TCP_H */
+2 -1
include/net/inet_timewait_sock.h
··· 67 67 /* And these are ours. */ 68 68 unsigned int tw_transparent : 1, 69 69 tw_flowlabel : 20, 70 - tw_pad : 3, /* 3 bits hole */ 70 + tw_usec_ts : 1, 71 + tw_pad : 2, /* 2 bits hole */ 71 72 tw_tos : 8; 72 73 u32 tw_txhash; 73 74 u32 tw_priority;
+43 -16
include/net/tcp.h
··· 166 166 #define MAX_TCP_KEEPCNT 127 167 167 #define MAX_TCP_SYNCNT 127 168 168 169 - #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24) 169 + /* Ensure that TCP PAWS checks are relaxed after ~2147 seconds 170 + * to avoid overflows. This assumes a clock smaller than 1 Mhz. 171 + * Default clock is 1 Khz, tcp_usec_ts uses 1 Mhz. 172 + */ 173 + #define TCP_PAWS_WRAP (INT_MAX / USEC_PER_SEC) 174 + 170 175 #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated 171 176 * after this time. It should be equal 172 177 * (or greater than) TCP_TIMEWAIT_LEN ··· 803 798 return div_u64(tcp_clock_ns(), NSEC_PER_USEC); 804 799 } 805 800 806 - /* This should only be used in contexts where tp->tcp_mstamp is up to date */ 807 - static inline u32 tcp_time_stamp(const struct tcp_sock *tp) 801 + static inline u64 tcp_clock_ms(void) 808 802 { 809 - return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); 803 + return div_u64(tcp_clock_ns(), NSEC_PER_MSEC); 810 804 } 811 805 812 - /* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ 813 - static inline u32 tcp_ns_to_ts(u64 ns) 806 + /* TCP Timestamp included in TS option (RFC 1323) can either use ms 807 + * or usec resolution. Each socket carries a flag to select one or other 808 + * resolution, as the route attribute could change anytime. 809 + * Each flow must stick to initial resolution. 810 + */ 811 + static inline u32 tcp_clock_ts(bool usec_ts) 814 812 { 815 - return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); 813 + return usec_ts ? tcp_clock_us() : tcp_clock_ms(); 816 814 } 817 815 818 - /* Could use tcp_clock_us() / 1000, but this version uses a single divide */ 819 - static inline u32 tcp_time_stamp_raw(void) 816 + static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) 820 817 { 821 - return tcp_ns_to_ts(tcp_clock_ns()); 818 + return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); 819 + } 820 + 821 + static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp) 822 + { 823 + if (tp->tcp_usec_ts) 824 + return tp->tcp_mstamp; 825 + return tcp_time_stamp_ms(tp); 822 826 } 823 827 824 828 void tcp_mstamp_refresh(struct tcp_sock *tp); ··· 837 823 return max_t(s64, t1 - t0, 0); 838 824 } 839 825 840 - static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) 841 - { 842 - return tcp_ns_to_ts(skb->skb_mstamp_ns); 843 - } 844 - 845 826 /* provide the departure time in us unit */ 846 827 static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) 847 828 { 848 829 return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); 849 830 } 850 831 832 + /* Provide skb TSval in usec or ms unit */ 833 + static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb) 834 + { 835 + if (usec_ts) 836 + return tcp_skb_timestamp_us(skb); 837 + 838 + return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC); 839 + } 840 + 841 + static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw) 842 + { 843 + return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset; 844 + } 845 + 846 + static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) 847 + { 848 + return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off; 849 + } 851 850 852 851 #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) 853 852 ··· 1626 1599 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) 1627 1600 return true; 1628 1601 if (unlikely(!time_before32(ktime_get_seconds(), 1629 - rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))) 1602 + rx_opt->ts_recent_stamp + TCP_PAWS_WRAP))) 1630 1603 return true; 1631 1604 /* 1632 1605 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
+10 -6
include/uapi/linux/rtnetlink.h
··· 502 502 503 503 #define RTAX_MAX (__RTAX_MAX - 1) 504 504 505 - #define RTAX_FEATURE_ECN (1 << 0) 506 - #define RTAX_FEATURE_SACK (1 << 1) 507 - #define RTAX_FEATURE_TIMESTAMP (1 << 2) 508 - #define RTAX_FEATURE_ALLFRAG (1 << 3) 505 + #define RTAX_FEATURE_ECN (1 << 0) 506 + #define RTAX_FEATURE_SACK (1 << 1) /* unused */ 507 + #define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ 508 + #define RTAX_FEATURE_ALLFRAG (1 << 3) 509 + #define RTAX_FEATURE_TCP_USEC_TS (1 << 4) 509 510 510 - #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \ 511 - RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG) 511 + #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ 512 + RTAX_FEATURE_SACK | \ 513 + RTAX_FEATURE_TIMESTAMP | \ 514 + RTAX_FEATURE_ALLFRAG | \ 515 + RTAX_FEATURE_TCP_USEC_TS) 512 516 513 517 struct rta_session { 514 518 __u8 proto;
+1
include/uapi/linux/tcp.h
··· 170 170 #define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ 171 171 #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ 172 172 #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ 173 + #define TCPI_OPT_USEC_TS 64 /* usec timestamps */ 173 174 174 175 /* 175 176 * Sender's congestion state indicating normal or abnormal situations
+19 -13
net/ipv4/syncookies.c
··· 41 41 * requested/supported by the syn/synack exchange. 42 42 */ 43 43 #define TSBITS 6 44 - #define TSMASK (((__u32)1 << TSBITS) - 1) 45 44 46 45 static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, 47 46 u32 count, int c) ··· 51 52 count, &syncookie_secret[c]); 52 53 } 53 54 55 + /* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ 56 + static u64 tcp_ns_to_ts(bool usec_ts, u64 val) 57 + { 58 + if (usec_ts) 59 + return div_u64(val, NSEC_PER_USEC); 60 + 61 + return div_u64(val, NSEC_PER_MSEC); 62 + } 54 63 55 64 /* 56 65 * when syncookies are in effect and tcp timestamps are enabled we encode ··· 69 62 */ 70 63 u64 cookie_init_timestamp(struct request_sock *req, u64 now) 71 64 { 72 - struct inet_request_sock *ireq; 73 - u32 ts, ts_now = tcp_ns_to_ts(now); 65 + const struct inet_request_sock *ireq = inet_rsk(req); 66 + u64 ts, ts_now = tcp_ns_to_ts(false, now); 74 67 u32 options = 0; 75 - 76 - ireq = inet_rsk(req); 77 68 78 69 options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK; 79 70 if (ireq->sack_ok) ··· 79 74 if (ireq->ecn_ok) 80 75 options |= TS_OPT_ECN; 81 76 82 - ts = ts_now & ~TSMASK; 77 + ts = (ts_now >> TSBITS) << TSBITS; 83 78 ts |= options; 84 - if (ts > ts_now) { 85 - ts >>= TSBITS; 86 - ts--; 87 - ts <<= TSBITS; 88 - ts |= options; 89 - } 90 - return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ); 79 + if (ts > ts_now) 80 + ts -= (1UL << TSBITS); 81 + 82 + if (tcp_rsk(req)->req_usec_ts) 83 + return ts * NSEC_PER_USEC; 84 + return ts * NSEC_PER_MSEC; 91 85 } 92 86 93 87 ··· 306 302 treq->af_specific = af_ops; 307 303 308 304 treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; 305 + treq->req_usec_ts = -1; 306 + 309 307 #if IS_ENABLED(CONFIG_MPTCP) 310 308 treq->is_mptcp = sk_is_mptcp(sk); 311 309 if (treq->is_mptcp) {
+18 -8
net/ipv4/tcp.c
··· 3629 3629 tp->fastopen_no_cookie = val; 3630 3630 break; 3631 3631 case TCP_TIMESTAMP: 3632 - if (!tp->repair) 3632 + if (!tp->repair) { 3633 3633 err = -EPERM; 3634 - else 3635 - WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw()); 3634 + break; 3635 + } 3636 + /* val is an opaque field, 3637 + * and low order bit contains usec_ts enable bit. 3638 + * Its a best effort, and we do not care if user makes an error. 3639 + */ 3640 + tp->tcp_usec_ts = val & 1; 3641 + WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts)); 3636 3642 break; 3637 3643 case TCP_REPAIR_WINDOW: 3638 3644 err = tcp_repair_set_window(tp, optval, optlen); ··· 3760 3754 info->tcpi_options |= TCPI_OPT_ECN_SEEN; 3761 3755 if (tp->syn_data_acked) 3762 3756 info->tcpi_options |= TCPI_OPT_SYN_DATA; 3757 + if (tp->tcp_usec_ts) 3758 + info->tcpi_options |= TCPI_OPT_USEC_TS; 3763 3759 3764 3760 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); 3765 3761 info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, ··· 3825 3817 info->tcpi_total_rto = tp->total_rto; 3826 3818 info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; 3827 3819 info->tcpi_total_rto_time = tp->total_rto_time; 3828 - if (tp->rto_stamp) { 3829 - info->tcpi_total_rto_time += tcp_time_stamp_raw() - 3830 - tp->rto_stamp; 3831 - } 3820 + if (tp->rto_stamp) 3821 + info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp; 3832 3822 3833 3823 unlock_sock_fast(sk, slow); 3834 3824 } ··· 4151 4145 break; 4152 4146 4153 4147 case TCP_TIMESTAMP: 4154 - val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset); 4148 + val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset); 4149 + if (tp->tcp_usec_ts) 4150 + val |= 1; 4151 + else 4152 + val &= ~1; 4155 4153 break; 4156 4154 case TCP_NOTSENT_LOWAT: 4157 4155 val = READ_ONCE(tp->notsent_lowat);
+27 -21
net/ipv4/tcp_input.c
··· 693 693 tp->rcv_rtt_est.time = tp->tcp_mstamp; 694 694 } 695 695 696 + static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp) 697 + { 698 + u32 delta, delta_us; 699 + 700 + delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; 701 + if (tp->tcp_usec_ts) 702 + return delta; 703 + 704 + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { 705 + if (!delta) 706 + delta = 1; 707 + delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); 708 + return delta_us; 709 + } 710 + return -1; 711 + } 712 + 696 713 static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, 697 714 const struct sk_buff *skb) 698 715 { ··· 721 704 722 705 if (TCP_SKB_CB(skb)->end_seq - 723 706 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { 724 - u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; 725 - u32 delta_us; 707 + s32 delta = tcp_rtt_tsopt_us(tp); 726 708 727 - if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { 728 - if (!delta) 729 - delta = 1; 730 - delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); 731 - tcp_rcv_rtt_update(tp, delta_us, 0); 732 - } 709 + if (delta >= 0) 710 + tcp_rcv_rtt_update(tp, delta, 0); 733 711 } 734 712 } 735 713 ··· 2454 2442 const struct sk_buff *skb) 2455 2443 { 2456 2444 return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && 2457 - tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb)); 2445 + tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); 2458 2446 } 2459 2447 2460 2448 /* Nothing was retransmitted or returned timestamp is less ··· 2868 2856 static void tcp_update_rto_time(struct tcp_sock *tp) 2869 2857 { 2870 2858 if (tp->rto_stamp) { 2871 - tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp; 2859 + tp->total_rto_time += tcp_time_stamp_ms(tp) - tp->rto_stamp; 2872 2860 tp->rto_stamp = 0; 2873 2861 } 2874 2862 } ··· 3158 3146 * left edge of the send window. 3159 3147 * See draft-ietf-tcplw-high-performance-00, section 3.3. 3160 3148 */ 3161 - if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 3162 - flag & FLAG_ACKED) { 3163 - u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; 3149 + if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && 3150 + tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) 3151 + seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp); 3164 3152 3165 - if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { 3166 - if (!delta) 3167 - delta = 1; 3168 - seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); 3169 - ca_rtt_us = seq_rtt_us; 3170 - } 3171 - } 3172 3153 rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ 3173 3154 if (seq_rtt_us < 0) 3174 3155 return false; ··· 6298 6293 6299 6294 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 6300 6295 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, 6301 - tcp_time_stamp(tp))) { 6296 + tcp_time_stamp_ts(tp))) { 6302 6297 NET_INC_STATS(sock_net(sk), 6303 6298 LINUX_MIB_PAWSACTIVEREJECTED); 6304 6299 goto reset_and_undo; ··· 7047 7042 req->syncookie = want_cookie; 7048 7043 tcp_rsk(req)->af_specific = af_ops; 7049 7044 tcp_rsk(req)->ts_off = 0; 7045 + tcp_rsk(req)->req_usec_ts = -1; 7050 7046 #if IS_ENABLED(CONFIG_MPTCP) 7051 7047 tcp_rsk(req)->is_mptcp = 0; 7052 7048 #endif
+3 -2
net/ipv4/tcp_ipv4.c
··· 296 296 rt = NULL; 297 297 goto failure; 298 298 } 299 + tp->tcp_usec_ts = dst_tcp_usec_ts(&rt->dst); 299 300 /* OK, now commit destination to socket. */ 300 301 sk->sk_gso_type = SKB_GSO_TCPV4; 301 302 sk_setup_caps(sk, &rt->dst); ··· 955 954 tcp_v4_send_ack(sk, skb, 956 955 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 957 956 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 958 - tcp_time_stamp_raw() + tcptw->tw_ts_offset, 957 + tcp_tw_tsval(tcptw), 959 958 tcptw->tw_ts_recent, 960 959 tw->tw_bound_dev_if, 961 960 tcp_twsk_md5_key(tcptw), ··· 989 988 tcp_v4_send_ack(sk, skb, seq, 990 989 tcp_rsk(req)->rcv_nxt, 991 990 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 992 - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 991 + tcp_rsk_tsval(tcp_rsk(req)), 993 992 READ_ONCE(req->ts_recent), 994 993 0, 995 994 tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+1 -1
net/ipv4/tcp_lp.c
··· 272 272 { 273 273 struct tcp_sock *tp = tcp_sk(sk); 274 274 struct lp *lp = inet_csk_ca(sk); 275 - u32 now = tcp_time_stamp(tp); 275 + u32 now = tcp_time_stamp_ts(tp); 276 276 u32 delta; 277 277 278 278 if (sample->rtt_us > 0)
+14 -5
net/ipv4/tcp_minisocks.c
··· 300 300 tcptw->tw_ts_recent = tp->rx_opt.ts_recent; 301 301 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; 302 302 tcptw->tw_ts_offset = tp->tsoffset; 303 + tw->tw_usec_ts = tp->tcp_usec_ts; 303 304 tcptw->tw_last_oow_ack_time = 0; 304 305 tcptw->tw_tx_delay = tp->tcp_tx_delay; 305 306 tw->tw_txhash = sk->sk_txhash; ··· 555 554 newtp->max_window = newtp->snd_wnd; 556 555 557 556 if (newtp->rx_opt.tstamp_ok) { 557 + newtp->tcp_usec_ts = treq->req_usec_ts; 558 558 newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); 559 559 newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); 560 560 newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; 561 561 } else { 562 + newtp->tcp_usec_ts = 0; 562 563 newtp->rx_opt.ts_recent_stamp = 0; 563 564 newtp->tcp_header_len = sizeof(struct tcphdr); 564 565 } 565 566 if (req->num_timeout) { 566 - newtp->undo_marker = treq->snt_isn; 567 - newtp->retrans_stamp = div_u64(treq->snt_synack, 568 - USEC_PER_SEC / TCP_TS_HZ); 569 567 newtp->total_rto = req->num_timeout; 570 - newtp->total_rto_recoveries = 1; 571 - newtp->total_rto_time = tcp_time_stamp_raw() - 568 + newtp->undo_marker = treq->snt_isn; 569 + if (newtp->tcp_usec_ts) { 570 + newtp->retrans_stamp = treq->snt_synack; 571 + newtp->total_rto_time = (u32)(tcp_clock_us() - 572 + newtp->retrans_stamp) / USEC_PER_MSEC; 573 + } else { 574 + newtp->retrans_stamp = div_u64(treq->snt_synack, 575 + USEC_PER_SEC / TCP_TS_HZ); 576 + newtp->total_rto_time = tcp_clock_ms() - 572 577 newtp->retrans_stamp; 578 + } 579 + newtp->total_rto_recoveries = 1; 573 580 } 574 581 newtp->tsoffset = treq->ts_off; 575 582 #ifdef CONFIG_TCP_MD5SIG
+9 -5
net/ipv4/tcp_output.c
··· 799 799 800 800 if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { 801 801 opts->options |= OPTION_TS; 802 - opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; 802 + opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset; 803 803 opts->tsecr = tp->rx_opt.ts_recent; 804 804 remaining -= TCPOLEN_TSTAMP_ALIGNED; 805 805 } ··· 884 884 } 885 885 if (likely(ireq->tstamp_ok)) { 886 886 opts->options |= OPTION_TS; 887 - opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; 887 + opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) + 888 + tcp_rsk(req)->ts_off; 888 889 opts->tsecr = READ_ONCE(req->ts_recent); 889 890 remaining -= TCPOLEN_TSTAMP_ALIGNED; 890 891 } ··· 944 943 945 944 if (likely(tp->rx_opt.tstamp_ok)) { 946 945 opts->options |= OPTION_TS; 947 - opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; 946 + opts->tsval = skb ? tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + 947 + tp->tsoffset : 0; 948 948 opts->tsecr = tp->rx_opt.ts_recent; 949 949 size += TCPOLEN_TSTAMP_ALIGNED; 950 950 } ··· 3381 3379 3382 3380 /* Save stamp of the first (attempted) retransmit. */ 3383 3381 if (!tp->retrans_stamp) 3384 - tp->retrans_stamp = tcp_skb_timestamp(skb); 3382 + tp->retrans_stamp = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb); 3385 3383 3386 3384 if (tp->undo_retrans < 0) 3387 3385 tp->undo_retrans = 0; ··· 3667 3665 mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); 3668 3666 3669 3667 memset(&opts, 0, sizeof(opts)); 3668 + if (tcp_rsk(req)->req_usec_ts < 0) 3669 + tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); 3670 3670 now = tcp_clock_ns(); 3671 3671 #ifdef CONFIG_SYN_COOKIES 3672 3672 if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) ··· 3965 3961 3966 3962 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 3967 3963 tcp_mstamp_refresh(tp); 3968 - tp->retrans_stamp = tcp_time_stamp(tp); 3964 + tp->retrans_stamp = tcp_time_stamp_ts(tp); 3969 3965 tcp_connect_queue_skb(sk, buff); 3970 3966 tcp_ecn_send_syn(sk, buff); 3971 3967 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+28 -16
net/ipv4/tcp_timer.c
··· 26 26 static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) 27 27 { 28 28 struct inet_connection_sock *icsk = inet_csk(sk); 29 - u32 elapsed, start_ts, user_timeout; 29 + const struct tcp_sock *tp = tcp_sk(sk); 30 + u32 elapsed, user_timeout; 30 31 s32 remaining; 31 32 32 - start_ts = tcp_sk(sk)->retrans_stamp; 33 33 user_timeout = READ_ONCE(icsk->icsk_user_timeout); 34 34 if (!user_timeout) 35 35 return icsk->icsk_rto; 36 - elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; 36 + 37 + elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp; 38 + if (tp->tcp_usec_ts) 39 + elapsed /= USEC_PER_MSEC; 40 + 37 41 remaining = user_timeout - elapsed; 38 42 if (remaining <= 0) 39 43 return 1; /* user timeout has passed; fire ASAP */ ··· 216 212 unsigned int boundary, 217 213 unsigned int timeout) 218 214 { 219 - unsigned int start_ts; 215 + struct tcp_sock *tp = tcp_sk(sk); 216 + unsigned int start_ts, delta; 220 217 221 218 if (!inet_csk(sk)->icsk_retransmits) 222 219 return false; 223 220 224 - start_ts = tcp_sk(sk)->retrans_stamp; 221 + start_ts = tp->retrans_stamp; 225 222 if (likely(timeout == 0)) { 226 223 unsigned int rto_base = TCP_RTO_MIN; 227 224 ··· 231 226 timeout = tcp_model_timeout(sk, boundary, rto_base); 232 227 } 233 228 234 - return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; 229 + if (tp->tcp_usec_ts) { 230 + /* delta maybe off up to a jiffy due to timer granularity. */ 231 + delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1); 232 + return (s32)(delta - timeout * USEC_PER_MSEC) >= 0; 233 + } 234 + return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0; 235 235 } 236 236 237 237 /* A write timeout has occurred. Process the after effects. */ ··· 432 422 433 423 if (!icsk->icsk_retransmits) { 434 424 tp->total_rto_recoveries++; 435 - tp->rto_stamp = tcp_time_stamp(tp); 425 + tp->rto_stamp = tcp_time_stamp_ms(tp); 436 426 } 437 427 icsk->icsk_retransmits++; 438 428 tp->total_rto++; ··· 472 462 req->num_timeout++; 473 463 tcp_update_rto_stats(sk); 474 464 if (!tp->retrans_stamp) 475 - tp->retrans_stamp = tcp_time_stamp(tp); 465 + tp->retrans_stamp = tcp_time_stamp_ts(tp); 476 466 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 477 467 req->timeout << req->num_timeout, TCP_RTO_MAX); 478 468 } 479 469 480 470 static bool tcp_rtx_probe0_timed_out(const struct sock *sk, 481 - const struct sk_buff *skb) 471 + const struct sk_buff *skb, 472 + u32 rtx_delta) 482 473 { 483 474 const struct tcp_sock *tp = tcp_sk(sk); 484 475 const int timeout = TCP_RTO_MAX * 2; 485 - u32 rcv_delta, rtx_delta; 476 + u32 rcv_delta; 486 477 487 478 rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; 488 479 if (rcv_delta <= timeout) 489 480 return false; 490 481 491 - rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) - 492 - (tp->retrans_stamp ?: tcp_skb_timestamp(skb))); 493 - 494 - return rtx_delta > timeout; 482 + return msecs_to_jiffies(rtx_delta) > timeout; 495 483 } 496 484 497 485 /** ··· 542 534 struct inet_sock *inet = inet_sk(sk); 543 535 u32 rtx_delta; 544 536 545 - rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb)); 537 + rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: 538 + tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); 539 + if (tp->tcp_usec_ts) 540 + rtx_delta /= USEC_PER_MSEC; 541 + 546 542 if (sk->sk_family == AF_INET) { 547 543 net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", 548 544 &inet->inet_daddr, ntohs(inet->inet_dport), ··· 563 551 rtx_delta); 564 552 } 565 553 #endif 566 - if (tcp_rtx_probe0_timed_out(sk, skb)) { 554 + if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) { 567 555 tcp_write_err(sk); 568 556 goto out; 569 557 }
+3 -2
net/ipv6/tcp_ipv6.c
··· 286 286 goto failure; 287 287 } 288 288 289 + tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 289 290 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 290 291 291 292 if (!saddr) { ··· 1097 1096 1098 1097 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1099 1098 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1100 - tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1099 + tcp_tw_tsval(tcptw), 1101 1100 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1102 1101 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, 1103 1102 tw->tw_txhash); ··· 1124 1123 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1125 1124 tcp_rsk(req)->rcv_nxt, 1126 1125 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1127 - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1126 + tcp_rsk_tsval(tcp_rsk(req)), 1128 1127 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, 1129 1128 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1130 1129 ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+1 -1
net/netfilter/nf_synproxy_core.c
··· 153 153 struct synproxy_options *opts) 154 154 { 155 155 opts->tsecr = opts->tsval; 156 - opts->tsval = tcp_time_stamp_raw() & ~0x3f; 156 + opts->tsval = tcp_clock_ms() & ~0x3f; 157 157 158 158 if (opts->options & NF_SYNPROXY_OPT_WSCALE) { 159 159 opts->tsval |= opts->wscale;
+2 -2
tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
··· 177 177 return ns / (NSEC_PER_SEC / TCP_TS_HZ); 178 178 } 179 179 180 - static __always_inline __u32 tcp_time_stamp_raw(void) 180 + static __always_inline __u32 tcp_clock_ms(void) 181 181 { 182 182 return tcp_ns_to_ts(tcp_clock_ns()); 183 183 } ··· 274 274 if (!loop_ctx.option_timestamp) 275 275 return false; 276 276 277 - cookie = tcp_time_stamp_raw() & ~TSMASK; 277 + cookie = tcp_clock_ms() & ~TSMASK; 278 278 cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; 279 279 if (loop_ctx.option_sack) 280 280 cookie |= TS_OPT_SACK;