Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tcp: better handle TCP_TX_DELAY on established flows

Some applications uses TCP_TX_DELAY socket option after TCP flow
is established.

Some metrics need to be updated, otherwise TCP might take time to
adapt to the new (emulated) RTT.

This patch adjusts tp->srtt_us, tp->rtt_min, icsk_rto
and sk->sk_pacing_rate.

This is best effort, and for instance icsk_rto is reset
without taking backoff into account.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251013145926.833198-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
1c51450f 6378e25e

+31 -6
+2
include/net/tcp.h
··· 461 461 void tcp_enter_loss(struct sock *sk); 462 462 void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); 463 463 void tcp_clear_retrans(struct tcp_sock *tp); 464 + void tcp_update_pacing_rate(struct sock *sk); 465 + void tcp_set_rto(struct sock *sk); 464 466 void tcp_update_metrics(struct sock *sk); 465 467 void tcp_init_metrics(struct sock *sk); 466 468 void tcp_metrics_init(void);
+27 -4
net/ipv4/tcp.c
··· 3583 3583 DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); 3584 3584 EXPORT_IPV6_MOD(tcp_tx_delay_enabled); 3585 3585 3586 - static void tcp_enable_tx_delay(void) 3586 + static void tcp_enable_tx_delay(struct sock *sk, int val) 3587 3587 { 3588 - if (!static_branch_unlikely(&tcp_tx_delay_enabled)) { 3588 + struct tcp_sock *tp = tcp_sk(sk); 3589 + s32 delta = (val - tp->tcp_tx_delay) << 3; 3590 + 3591 + if (val && !static_branch_unlikely(&tcp_tx_delay_enabled)) { 3589 3592 static int __tcp_tx_delay_enabled = 0; 3590 3593 3591 3594 if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) { 3592 3595 static_branch_enable(&tcp_tx_delay_enabled); 3593 3596 pr_info("TCP_TX_DELAY enabled\n"); 3594 3597 } 3598 + } 3599 + /* If we change tcp_tx_delay on a live flow, adjust tp->srtt_us, 3600 + * tp->rtt_min, icsk_rto and sk->sk_pacing_rate. 3601 + * This is best effort. 3602 + */ 3603 + if (delta && sk->sk_state == TCP_ESTABLISHED) { 3604 + s64 srtt = (s64)tp->srtt_us + delta; 3605 + 3606 + tp->srtt_us = clamp_t(s64, srtt, 1, ~0U); 3607 + 3608 + /* Note: does not deal with non zero icsk_backoff */ 3609 + tcp_set_rto(sk); 3610 + 3611 + minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); 3612 + 3613 + tcp_update_pacing_rate(sk); 3595 3614 } 3596 3615 } 3597 3616 ··· 4138 4119 tp->recvmsg_inq = val; 4139 4120 break; 4140 4121 case TCP_TX_DELAY: 4141 - if (val) 4142 - tcp_enable_tx_delay(); 4122 + /* tp->srtt_us is u32, and is shifted by 3 */ 4123 + if (val < 0 || val >= (1U << (31 - 3))) { 4124 + err = -EINVAL; 4125 + break; 4126 + } 4127 + tcp_enable_tx_delay(sk, val); 4143 4128 WRITE_ONCE(tp->tcp_tx_delay, val); 4144 4129 break; 4145 4130 default:
+2 -2
net/ipv4/tcp_input.c
··· 1095 1095 tp->srtt_us = max(1U, srtt); 1096 1096 } 1097 1097 1098 - static void tcp_update_pacing_rate(struct sock *sk) 1098 + void tcp_update_pacing_rate(struct sock *sk) 1099 1099 { 1100 1100 const struct tcp_sock *tp = tcp_sk(sk); 1101 1101 u64 rate; ··· 1132 1132 /* Calculate rto without backoff. This is the second half of Van Jacobson's 1133 1133 * routine referred to above. 1134 1134 */ 1135 - static void tcp_set_rto(struct sock *sk) 1135 + void tcp_set_rto(struct sock *sk) 1136 1136 { 1137 1137 const struct tcp_sock *tp = tcp_sk(sk); 1138 1138 /* Old crap is replaced with new one. 8)