Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

rxrpc: Store the DATA serial in the txqueue and use this in RTT calc

Store the serial number set on a DATA packet at the point of transmission
in the rxrpc_txqueue struct and when an ACK is received, match the
reference number in the ACK by trawling the txqueue rather than sharing an
RTT table with ACK RTT. This can be done as part of Tx queue rotation.

This means we have a lot more RTT samples available and is faster to search
with all the serial numbers packed together into a few cachelines rather
than being hung off different txbufs.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/20241204074710.990092-25-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

David Howells and committed by
Jakub Kicinski
dcdff0d8 9b052c6b

+79 -47
+5 -9
include/trace/events/rxrpc.h
··· 337 337 E_(rxrpc_rtt_tx_ping, "PING") 338 338 339 339 #define rxrpc_rtt_rx_traces \ 340 - EM(rxrpc_rtt_rx_other_ack, "OACK") \ 340 + EM(rxrpc_rtt_rx_data_ack, "DACK") \ 341 341 EM(rxrpc_rtt_rx_obsolete, "OBSL") \ 342 342 EM(rxrpc_rtt_rx_lost, "LOST") \ 343 - EM(rxrpc_rtt_rx_ping_response, "PONG") \ 344 - E_(rxrpc_rtt_rx_requested_ack, "RACK") 343 + E_(rxrpc_rtt_rx_ping_response, "PONG") 345 344 346 345 #define rxrpc_timer_traces \ 347 346 EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \ ··· 1694 1695 ); 1695 1696 1696 1697 TRACE_EVENT(rxrpc_congest, 1697 - TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, 1698 - rxrpc_serial_t ack_serial), 1698 + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary), 1699 1699 1700 - TP_ARGS(call, summary, ack_serial), 1700 + TP_ARGS(call, summary), 1701 1701 1702 1702 TP_STRUCT__entry( 1703 1703 __field(unsigned int, call) ··· 1704 1706 __field(rxrpc_seq_t, hard_ack) 1705 1707 __field(rxrpc_seq_t, top) 1706 1708 __field(rxrpc_seq_t, lowest_nak) 1707 - __field(rxrpc_serial_t, ack_serial) 1708 1709 __field(u16, nr_sacks) 1709 1710 __field(u16, nr_snacks) 1710 1711 __field(u16, cwnd) ··· 1719 1722 __entry->hard_ack = call->acks_hard_ack; 1720 1723 __entry->top = call->tx_top; 1721 1724 __entry->lowest_nak = call->acks_lowest_nak; 1722 - __entry->ack_serial = ack_serial; 1723 1725 __entry->nr_sacks = call->acks_nr_sacks; 1724 1726 __entry->nr_snacks = call->acks_nr_snacks; 1725 1727 __entry->cwnd = call->cong_cwnd; ··· 1730 1734 1731 1735 TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", 1732 1736 __entry->call, 1733 - __entry->ack_serial, 1737 + __entry->sum.acked_serial, 1734 1738 __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), 1735 1739 __entry->hard_ack, 1736 1740 __print_symbolic(__entry->ca_state, rxrpc_ca_states),
+4
net/rxrpc/ar-internal.h
··· 769 769 * Summary of a new ACK and the changes it made to the Tx buffer packet states. 770 770 */ 771 771 struct rxrpc_ack_summary { 772 + rxrpc_serial_t acked_serial; /* Serial number ACK'd */ 772 773 u16 in_flight; /* Number of unreceived transmissions */ 773 774 u16 nr_new_hacks; /* Number of rotated new ACKs */ 774 775 u16 nr_new_sacks; /* Number of new soft ACKs in packet */ ··· 778 777 bool new_low_snack:1; /* T if new low soft NACK found */ 779 778 bool retrans_timeo:1; /* T if reTx due to timeout happened */ 780 779 bool need_retransmit:1; /* T if we need transmission */ 780 + bool rtt_sample_avail:1; /* T if RTT sample available */ 781 781 u8 /*enum rxrpc_congest_change*/ change; 782 782 }; 783 783 ··· 861 859 unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */ 862 860 unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ 863 861 unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ 862 + unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */ 864 863 865 864 /* The arrays we want to pack into as few cache lines as possible. */ 866 865 struct { 867 866 #define RXRPC_NR_TXQUEUE BITS_PER_LONG 868 867 #define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1) 869 868 struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE]; 869 + unsigned int segment_serial[RXRPC_NR_TXQUEUE]; 870 870 unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE]; 871 871 } ____cacheline_aligned; 872 872 };
+4 -4
net/rxrpc/call_event.c
··· 159 159 rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted); 160 160 161 161 _debug("unrep %x-%x", start, stop); 162 - for (rxrpc_seq_t seq = start; before(seq, stop); seq++) { 163 - struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; 162 + for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) { 163 + rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK]; 164 164 165 165 if (ping_response && 166 - before(txb->serial, call->acks_highest_serial)) 166 + before(serial, call->acks_highest_serial)) 167 167 break; /* Wasn't accounted for by a more recent ping. */ 168 168 req.tq = tq; 169 169 req.seq = seq; ··· 198 198 199 199 _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); 200 200 call->resend_at = resend_at; 201 - trace_rxrpc_timer_set(call, resend_at - req.now, 201 + trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now), 202 202 rxrpc_timer_trace_resend_reset); 203 203 } else { 204 204 call->resend_at = KTIME_MAX;
+61 -33
net/rxrpc/input.c
··· 30 30 * Do TCP-style congestion management [RFC 5681]. 31 31 */ 32 32 static void rxrpc_congestion_management(struct rxrpc_call *call, 33 - struct sk_buff *skb, 34 - struct rxrpc_ack_summary *summary, 35 - rxrpc_serial_t acked_serial) 33 + struct rxrpc_ack_summary *summary) 36 34 { 37 35 summary->change = rxrpc_cong_no_change; 38 36 summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; ··· 42 44 if (call->cong_cwnd >= call->cong_ssthresh && 43 45 call->cong_ca_state == RXRPC_CA_SLOW_START) { 44 46 call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; 45 - call->cong_tstamp = skb->tstamp; 47 + call->cong_tstamp = call->acks_latest_ts; 46 48 call->cong_cumul_acks = 0; 47 49 } 48 50 } ··· 60 62 call->cong_cwnd += 1; 61 63 if (call->cong_cwnd >= call->cong_ssthresh) { 62 64 call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; 63 - call->cong_tstamp = skb->tstamp; 65 + call->cong_tstamp = call->acks_latest_ts; 64 66 } 65 67 goto out; 66 68 ··· 73 75 */ 74 76 if (call->peer->rtt_count == 0) 75 77 goto out; 76 - if (ktime_before(skb->tstamp, 78 + if (ktime_before(call->acks_latest_ts, 77 79 ktime_add_us(call->cong_tstamp, 78 80 call->peer->srtt_us >> 3))) 79 81 goto out_no_clear_ca; 80 82 summary->change = rxrpc_cong_rtt_window_end; 81 - call->cong_tstamp = skb->tstamp; 83 + call->cong_tstamp = call->acks_latest_ts; 82 84 if (call->cong_cumul_acks >= call->cong_cwnd) 83 85 call->cong_cwnd++; 84 86 goto out; ··· 135 137 summary->change = rxrpc_cong_cleared_nacks; 136 138 call->cong_dup_acks = 0; 137 139 call->cong_extra = 0; 138 - call->cong_tstamp = skb->tstamp; 140 + call->cong_tstamp = call->acks_latest_ts; 139 141 if (call->cong_cwnd < call->cong_ssthresh) 140 142 call->cong_ca_state = RXRPC_CA_SLOW_START; 141 143 else ··· 145 147 out_no_clear_ca: 146 148 if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) 147 149 call->cong_cwnd = RXRPC_TX_MAX_WINDOW; 148 - trace_rxrpc_congest(call, summary, acked_serial); 150 + trace_rxrpc_congest(call, summary); 149 151 return; 150 152 151 153 packet_loss_detected: ··· 193 195 } 194 196 195 197 /* 198 + * Add an RTT sample derived from an ACK'd DATA packet. 199 + */ 200 + static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, 201 + struct rxrpc_ack_summary *summary, 202 + struct rxrpc_txqueue *tq, 203 + int ix, 204 + rxrpc_serial_t ack_serial) 205 + { 206 + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, 207 + summary->acked_serial, ack_serial, 208 + ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), 209 + call->acks_latest_ts); 210 + summary->rtt_sample_avail = false; 211 + __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */ 212 + } 213 + 214 + /* 196 215 * Apply a hard ACK by advancing the Tx window. 197 216 */ 198 217 static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, 199 - struct rxrpc_ack_summary *summary) 218 + struct rxrpc_ack_summary *summary, 219 + rxrpc_serial_t ack_serial) 200 220 { 201 221 struct rxrpc_txqueue *tq = call->tx_queue; 202 222 rxrpc_seq_t seq = call->tx_bottom + 1; ··· 251 235 set_bit(RXRPC_CALL_TX_LAST, &call->flags); 252 236 rot_last = true; 253 237 } 238 + 239 + if (summary->rtt_sample_avail && 240 + summary->acked_serial == tq->segment_serial[ix] && 241 + test_bit(ix, &tq->rtt_samples)) 242 + rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); 254 243 255 244 if (ix == tq->nr_reported_acks) { 256 245 /* Packet directly hard ACK'd. */ ··· 369 348 } 370 349 371 350 if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { 372 - if (!rxrpc_rotate_tx_window(call, top, &summary)) { 351 + if (!rxrpc_rotate_tx_window(call, top, &summary, 0)) { 373 352 rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply); 374 353 return false; 375 354 } ··· 822 801 #endif 823 802 824 803 /* 804 + * Deal with RTT samples from soft ACKs. 805 + */ 806 + static void rxrpc_input_soft_rtt(struct rxrpc_call *call, 807 + struct rxrpc_ack_summary *summary, 808 + struct rxrpc_txqueue *tq, 809 + rxrpc_serial_t ack_serial) 810 + { 811 + for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++) 812 + if (summary->acked_serial == tq->segment_serial[ix]) 813 + return rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); 814 + } 815 + 816 + /* 825 817 * Process a batch of soft ACKs specific to a transmission queue segment. 826 818 */ 827 819 static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, ··· 943 909 944 910 _debug("bound %16lx %u", extracted, nr); 945 911 912 + if (summary->rtt_sample_avail) 913 + rxrpc_input_soft_rtt(call, summary, tq, sp->hdr.serial); 946 914 rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, 947 915 seq - RXRPC_NR_TXQUEUE, &lowest_nak); 948 916 extracted = ~0UL; ··· 1016 980 struct rxrpc_ack_summary summary = { 0 }; 1017 981 struct rxrpc_acktrailer trailer; 1018 982 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 1019 - rxrpc_serial_t ack_serial, acked_serial; 983 + rxrpc_serial_t ack_serial; 1020 984 rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; 1021 985 int nr_acks, offset, ioffset; 1022 986 ··· 1025 989 offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); 1026 990 1027 991 ack_serial = sp->hdr.serial; 1028 - acked_serial = sp->ack.acked_serial; 1029 992 first_soft_ack = sp->ack.first_ack; 1030 993 prev_pkt = sp->ack.prev_ack; 1031 994 nr_acks = sp->ack.nr_acks; 1032 995 hard_ack = first_soft_ack - 1; 996 + summary.acked_serial = sp->ack.acked_serial; 1033 997 summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? 1034 998 sp->ack.reason : RXRPC_ACK__INVALID); 1035 999 ··· 1037 1001 rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); 1038 1002 prefetch(call->tx_queue); 1039 1003 1040 - if (acked_serial != 0) { 1041 - switch (summary.ack_reason) { 1042 - case RXRPC_ACK_PING_RESPONSE: 1043 - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, 1044 - rxrpc_rtt_rx_ping_response); 1045 - break; 1046 - case RXRPC_ACK_REQUESTED: 1047 - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, 1048 - rxrpc_rtt_rx_requested_ack); 1049 - break; 1050 - default: 1051 - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, 1052 - rxrpc_rtt_rx_other_ack); 1053 - break; 1054 - } 1004 + if (summary.acked_serial != 0) { 1005 + if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) 1006 + rxrpc_complete_rtt_probe(call, skb->tstamp, summary.acked_serial, 1007 + ack_serial, rxrpc_rtt_rx_ping_response); 1008 + else 1009 + summary.rtt_sample_avail = true; 1055 1010 } 1056 1011 1057 1012 /* If we get an EXCEEDS_WINDOW ACK from the server, it probably ··· 1095 1068 case RXRPC_ACK_PING: 1096 1069 break; 1097 1070 default: 1098 - if (acked_serial && after(acked_serial, call->acks_highest_serial)) 1099 - call->acks_highest_serial = acked_serial; 1071 + if (summary.acked_serial && 1072 + after(summary.acked_serial, call->acks_highest_serial)) 1073 + call->acks_highest_serial = summary.acked_serial; 1100 1074 break; 1101 1075 } 1102 1076 ··· 1126 1098 return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); 1127 1099 1128 1100 if (after(hard_ack, call->tx_bottom)) { 1129 - if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { 1101 + if (rxrpc_rotate_tx_window(call, hard_ack, &summary, ack_serial)) { 1130 1102 rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); 1131 1103 goto send_response; 1132 1104 } ··· 1144 1116 rxrpc_propose_ping(call, ack_serial, 1145 1117 rxrpc_propose_ack_ping_for_lost_reply); 1146 1118 1147 - rxrpc_congestion_management(call, skb, &summary, acked_serial); 1119 + rxrpc_congestion_management(call, &summary); 1148 1120 if (summary.need_retransmit) 1149 1121 rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE); 1150 1122 ··· 1164 1136 { 1165 1137 struct rxrpc_ack_summary summary = { 0 }; 1166 1138 1167 - if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) 1139 + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary, 0)) 1168 1140 rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall); 1169 1141 } 1170 1142
+5 -1
net/rxrpc/output.c
··· 436 436 trace_rxrpc_req_ack(call->debug_id, txb->seq, why); 437 437 if (why != rxrpc_reqack_no_srv_last) { 438 438 flags |= RXRPC_REQUEST_ACK; 439 - rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data); 439 + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial); 440 440 call->peer->rtt_last_req = req->now; 441 441 } 442 442 dont_set_request_ack: ··· 508 508 509 509 _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); 510 510 tq->segment_xmit_ts[ix] = xmit_ts; 511 + tq->segment_serial[ix] = serial; 512 + if (i + 1 == req->n) 513 + /* Only sample the last subpacket in a jumbo. */ 514 + __set_bit(ix, &tq->rtt_samples); 511 515 len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); 512 516 serial++; 513 517 seq++;