Merge branch 'tcp-make-tcp_ack-faster' · tjh.dev/kernel@c0f38f3

-3

include/net/tcp.h

··· 2514 2514 extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, 2515 2515 u32 reo_wnd); 2516 2516 extern bool tcp_rack_mark_lost(struct sock *sk); 2517 - extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, 2518 - u64 xmit_time); 2519 2517 extern void tcp_rack_reo_timeout(struct sock *sk); 2520 - extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); 2521 2518 2522 2519 /* tcp_plb.c */ 2523 2520

+77 -2

net/ipv4/tcp_input.c

··· 1558 1558 return in_sack; 1559 1559 } 1560 1560 1561 + /* Record the most recently (re)sent time among the (s)acked packets 1562 + * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from 1563 + * draft-cheng-tcpm-rack-00.txt 1564 + */ 1565 + static void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, 1566 + u32 end_seq, u64 xmit_time) 1567 + { 1568 + u32 rtt_us; 1569 + 1570 + rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); 1571 + if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { 1572 + /* If the sacked packet was retransmitted, it's ambiguous 1573 + * whether the retransmission or the original (or the prior 1574 + * retransmission) was sacked. 1575 + * 1576 + * If the original is lost, there is no ambiguity. Otherwise 1577 + * we assume the original can be delayed up to aRTT + min_rtt. 1578 + * the aRTT term is bounded by the fast recovery or timeout, 1579 + * so it's at least one RTT (i.e., retransmission is at least 1580 + * an RTT later). 1581 + */ 1582 + return; 1583 + } 1584 + tp->rack.advanced = 1; 1585 + tp->rack.rtt_us = rtt_us; 1586 + if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp, 1587 + end_seq, tp->rack.end_seq)) { 1588 + tp->rack.mstamp = xmit_time; 1589 + tp->rack.end_seq = end_seq; 1590 + } 1591 + } 1592 + 1561 1593 /* Mark the given newly-SACKed range as such, adjusting counters and hints. */ 1562 1594 static u8 tcp_sacktag_one(struct sock *sk, 1563 1595 struct tcp_sacktag_state *state, u8 sacked, ··· 4181 4149 return delivered; 4182 4150 } 4183 4151 4152 + /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. 4153 + * 4154 + * If a DSACK is received that seems like it may have been due to reordering 4155 + * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded 4156 + * by srtt), since there is possibility that spurious retransmission was 4157 + * due to reordering delay longer than reo_wnd. 4158 + * 4159 + * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) 4160 + * no. of successful recoveries (accounts for full DSACK-based loss 4161 + * recovery undo). After that, reset it to default (min_rtt/4). 4162 + * 4163 + * At max, reo_wnd is incremented only once per rtt. So that the new 4164 + * DSACK on which we are reacting, is due to the spurious retx (approx) 4165 + * after the reo_wnd has been updated last time. 4166 + * 4167 + * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than 4168 + * absolute value to account for change in rtt. 4169 + */ 4170 + static void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) 4171 + { 4172 + struct tcp_sock *tp = tcp_sk(sk); 4173 + 4174 + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & 4175 + TCP_RACK_STATIC_REO_WND) || 4176 + !rs->prior_delivered) 4177 + return; 4178 + 4179 + /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ 4180 + if (before(rs->prior_delivered, tp->rack.last_delivered)) 4181 + tp->rack.dsack_seen = 0; 4182 + 4183 + /* Adjust the reo_wnd if update is pending */ 4184 + if (tp->rack.dsack_seen) { 4185 + tp->rack.reo_wnd_steps = min_t(u32, 0xFF, 4186 + tp->rack.reo_wnd_steps + 1); 4187 + tp->rack.dsack_seen = 0; 4188 + tp->rack.last_delivered = tp->delivered; 4189 + tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; 4190 + } else if (!tp->rack.reo_wnd_persist) { 4191 + tp->rack.reo_wnd_steps = 1; 4192 + } 4193 + } 4194 + 4184 4195 /* This routine deals with incoming acks, but not outgoing ones. */ 4185 4196 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 4186 4197 { ··· 4358 4283 4359 4284 tcp_in_ack_event(sk, flag); 4360 4285 4361 - if (tp->tlp_high_seq) 4286 + if (unlikely(tp->tlp_high_seq)) 4362 4287 tcp_process_tlp_ack(sk, ack, flag); 4363 4288 4364 4289 if (tcp_ack_is_dubious(sk, flag)) { ··· 4408 4333 */ 4409 4334 tcp_ack_probe(sk); 4410 4335 4411 - if (tp->tlp_high_seq) 4336 + if (unlikely(tp->tlp_high_seq)) 4412 4337 tcp_process_tlp_ack(sk, ack, flag); 4413 4338 return 1; 4414 4339

-75

net/ipv4/tcp_recovery.c

··· 111 111 return !!timeout; 112 112 } 113 113 114 - /* Record the most recently (re)sent time among the (s)acked packets 115 - * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from 116 - * draft-cheng-tcpm-rack-00.txt 117 - */ 118 - void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, 119 - u64 xmit_time) 120 - { 121 - u32 rtt_us; 122 - 123 - rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); 124 - if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { 125 - /* If the sacked packet was retransmitted, it's ambiguous 126 - * whether the retransmission or the original (or the prior 127 - * retransmission) was sacked. 128 - * 129 - * If the original is lost, there is no ambiguity. Otherwise 130 - * we assume the original can be delayed up to aRTT + min_rtt. 131 - * the aRTT term is bounded by the fast recovery or timeout, 132 - * so it's at least one RTT (i.e., retransmission is at least 133 - * an RTT later). 134 - */ 135 - return; 136 - } 137 - tp->rack.advanced = 1; 138 - tp->rack.rtt_us = rtt_us; 139 - if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp, 140 - end_seq, tp->rack.end_seq)) { 141 - tp->rack.mstamp = xmit_time; 142 - tp->rack.end_seq = end_seq; 143 - } 144 - } 145 - 146 114 /* We have waited long enough to accommodate reordering. Mark the expired 147 115 * packets lost and retransmit them. 148 116 */ ··· 132 164 } 133 165 if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) 134 166 tcp_rearm_rto(sk); 135 - } 136 - 137 - /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. 138 - * 139 - * If a DSACK is received that seems like it may have been due to reordering 140 - * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded 141 - * by srtt), since there is possibility that spurious retransmission was 142 - * due to reordering delay longer than reo_wnd. 143 - * 144 - * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) 145 - * no. of successful recoveries (accounts for full DSACK-based loss 146 - * recovery undo). After that, reset it to default (min_rtt/4). 147 - * 148 - * At max, reo_wnd is incremented only once per rtt. So that the new 149 - * DSACK on which we are reacting, is due to the spurious retx (approx) 150 - * after the reo_wnd has been updated last time. 151 - * 152 - * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than 153 - * absolute value to account for change in rtt. 154 - */ 155 - void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) 156 - { 157 - struct tcp_sock *tp = tcp_sk(sk); 158 - 159 - if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & 160 - TCP_RACK_STATIC_REO_WND) || 161 - !rs->prior_delivered) 162 - return; 163 - 164 - /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ 165 - if (before(rs->prior_delivered, tp->rack.last_delivered)) 166 - tp->rack.dsack_seen = 0; 167 - 168 - /* Adjust the reo_wnd if update is pending */ 169 - if (tp->rack.dsack_seen) { 170 - tp->rack.reo_wnd_steps = min_t(u32, 0xFF, 171 - tp->rack.reo_wnd_steps + 1); 172 - tp->rack.dsack_seen = 0; 173 - tp->rack.last_delivered = tp->delivered; 174 - tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; 175 - } else if (!tp->rack.reo_wnd_persist) { 176 - tp->rack.reo_wnd_steps = 1; 177 - } 178 167 } 179 168 180 169 /* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits

Configure Feed

Configure Feed