Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

rxrpc: Implement progressive transmission queue struct

We need to scan the buffers in the transmission queue occasionally when
processing ACKs, but the transmission queue is currently a linked list of
transmission buffers which, when we eventually expand the Tx window to 8192
packets will be very slow to walk.

Instead, pull the fields we need to examine a lot (last sent time,
retransmitted flag) into a new struct rxrpc_txqueue and make each one hold
an array of 32 or 64 packets.

The transmission queue is then a list of these structs, each pointing to a
contiguous set of packets. Scanning is then a lot faster as the flags and
timestamps are concentrated in the CPU dcache.

The transmission timestamps are stored as a number of microseconds from a
base ktime to reduce memory requirements. This should be fine provided we
manage to transmit an entire buffer within an hour.

This will make implementing RACK-TLP [RFC8985] easier as it will be less
costly to scan the transmission buffers.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/20241204074710.990092-19-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

David Howells and committed by
Jakub Kicinski
b341a026 6396b48a

+468 -266
+83 -15
include/trace/events/rxrpc.h
··· 297 297 298 298 #define rxrpc_txqueue_traces \ 299 299 EM(rxrpc_txqueue_await_reply, "AWR") \ 300 - EM(rxrpc_txqueue_dequeue, "DEQ") \ 301 300 EM(rxrpc_txqueue_end, "END") \ 302 301 EM(rxrpc_txqueue_queue, "QUE") \ 303 302 EM(rxrpc_txqueue_queue_last, "QLS") \ ··· 481 482 EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ 482 483 E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") 483 484 485 + #define rxrpc_tq_traces \ 486 + EM(rxrpc_tq_alloc, "ALLOC") \ 487 + EM(rxrpc_tq_cleaned, "CLEAN") \ 488 + EM(rxrpc_tq_decant, "DCNT ") \ 489 + EM(rxrpc_tq_decant_advance, "DCNT>") \ 490 + EM(rxrpc_tq_queue, "QUEUE") \ 491 + EM(rxrpc_tq_queue_dup, "QUE!!") \ 492 + EM(rxrpc_tq_rotate, "ROT ") \ 493 + EM(rxrpc_tq_rotate_and_free, "ROT-F") \ 494 + EM(rxrpc_tq_rotate_and_keep, "ROT-K") \ 495 + EM(rxrpc_tq_transmit, "XMIT ") \ 496 + E_(rxrpc_tq_transmit_advance, "XMIT>") 497 + 484 498 #define rxrpc_pmtud_reduce_traces \ 485 499 EM(rxrpc_pmtud_reduce_ack, "Ack ") \ 486 500 EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \ ··· 530 518 enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); 531 519 enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); 532 520 enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); 521 + enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte); 533 522 enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); 534 523 enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); 535 524 enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); ··· 567 554 rxrpc_sack_traces; 568 555 rxrpc_skb_traces; 569 556 rxrpc_timer_traces; 557 + rxrpc_tq_traces; 570 558 rxrpc_tx_points; 571 559 rxrpc_txbuf_traces; 572 560 rxrpc_txqueue_traces; ··· 895 881 __field(rxrpc_seq_t, acks_hard_ack) 896 882 __field(rxrpc_seq_t, tx_bottom) 897 883 __field(rxrpc_seq_t, tx_top) 898 - __field(rxrpc_seq_t, tx_prepared) 884 + __field(rxrpc_seq_t, send_top) 899 885 __field(int, tx_winsize) 900 886 ), 901 887 ··· 905 891 __entry->acks_hard_ack = call->acks_hard_ack; 906 892 __entry->tx_bottom = call->tx_bottom; 907 893 __entry->tx_top = call->tx_top; 908 - __entry->tx_prepared = call->tx_prepared; 894 + __entry->send_top = call->send_top; 909 895 __entry->tx_winsize = call->tx_winsize; 910 896 ), 911 897 ··· 916 902 __entry->acks_hard_ack, 917 903 __entry->tx_top - __entry->tx_bottom, 918 904 __entry->tx_top - __entry->acks_hard_ack, 919 - __entry->tx_prepared - __entry->tx_bottom, 905 + __entry->send_top - __entry->tx_top, 920 906 __entry->tx_winsize) 921 907 ); 922 908 923 909 TRACE_EVENT(rxrpc_transmit, 924 - TP_PROTO(struct rxrpc_call *call, int space), 910 + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t send_top, int space), 925 911 926 - TP_ARGS(call, space), 912 + TP_ARGS(call, send_top, space), 927 913 928 914 TP_STRUCT__entry( 929 915 __field(unsigned int, call) ··· 939 925 940 926 TP_fast_assign( 941 927 __entry->call = call->debug_id; 942 - __entry->seq = call->tx_bottom; 928 + __entry->seq = call->tx_top + 1; 943 929 __entry->space = space; 944 930 __entry->tx_winsize = call->tx_winsize; 945 931 __entry->cong_cwnd = call->cong_cwnd; 946 932 __entry->cong_extra = call->cong_extra; 947 - __entry->prepared = call->tx_prepared - call->tx_bottom; 933 + __entry->prepared = send_top - call->tx_bottom; 948 934 __entry->in_flight = call->tx_top - call->acks_hard_ack; 949 935 __entry->pmtud_jumbo = call->peer->pmtud_jumbo; 950 936 ), ··· 959 945 __entry->prepared, 960 946 __entry->in_flight, 961 947 __entry->pmtud_jumbo) 948 + ); 949 + 950 + TRACE_EVENT(rxrpc_tx_rotate, 951 + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, rxrpc_seq_t to), 952 + 953 + TP_ARGS(call, seq, to), 954 + 955 + TP_STRUCT__entry( 956 + __field(unsigned int, call) 957 + __field(rxrpc_seq_t, seq) 958 + __field(rxrpc_seq_t, to) 959 + __field(rxrpc_seq_t, top) 960 + ), 961 + 962 + TP_fast_assign( 963 + __entry->call = call->debug_id; 964 + __entry->seq = seq; 965 + __entry->to = to; 966 + __entry->top = call->tx_top; 967 + ), 968 + 969 + TP_printk("c=%08x q=%08x-%08x-%08x", 970 + __entry->call, 971 + __entry->seq, 972 + __entry->to, 973 + __entry->top) 962 974 ); 963 975 964 976 TRACE_EVENT(rxrpc_rx_data, ··· 1661 1621 ); 1662 1622 1663 1623 TRACE_EVENT(rxrpc_retransmit, 1664 - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, 1665 - rxrpc_serial_t serial, ktime_t expiry), 1624 + TP_PROTO(struct rxrpc_call *call, 1625 + struct rxrpc_send_data_req *req, 1626 + struct rxrpc_txbuf *txb, ktime_t expiry), 1666 1627 1667 - TP_ARGS(call, seq, serial, expiry), 1628 + TP_ARGS(call, req, txb, expiry), 1668 1629 1669 1630 TP_STRUCT__entry( 1670 1631 __field(unsigned int, call) ··· 1676 1635 1677 1636 TP_fast_assign( 1678 1637 __entry->call = call->debug_id; 1679 - __entry->seq = seq; 1680 - __entry->serial = serial; 1638 + __entry->seq = req->seq; 1639 + __entry->serial = txb->serial; 1681 1640 __entry->expiry = expiry; 1682 1641 ), 1683 1642 ··· 1755 1714 __entry->cwnd = call->cong_cwnd; 1756 1715 __entry->extra = call->cong_extra; 1757 1716 __entry->hard_ack = call->acks_hard_ack; 1758 - __entry->prepared = call->tx_prepared - call->tx_bottom; 1717 + __entry->prepared = call->send_top - call->tx_bottom; 1759 1718 __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); 1760 - __entry->has_data = !list_empty(&call->tx_sendmsg); 1719 + __entry->has_data = call->tx_bottom != call->tx_top; 1761 1720 ), 1762 1721 1763 1722 TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", ··· 2063 2022 __entry->seq, 2064 2023 __print_symbolic(__entry->what, rxrpc_txbuf_traces), 2065 2024 __entry->ref) 2025 + ); 2026 + 2027 + TRACE_EVENT(rxrpc_tq, 2028 + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, 2029 + rxrpc_seq_t seq, enum rxrpc_tq_trace trace), 2030 + 2031 + TP_ARGS(call, tq, seq, trace), 2032 + 2033 + TP_STRUCT__entry( 2034 + __field(unsigned int, call_debug_id) 2035 + __field(rxrpc_seq_t, qbase) 2036 + __field(rxrpc_seq_t, seq) 2037 + __field(enum rxrpc_tq_trace, trace) 2038 + ), 2039 + 2040 + TP_fast_assign( 2041 + __entry->call_debug_id = call->debug_id; 2042 + __entry->qbase = tq ? tq->qbase : call->tx_qbase; 2043 + __entry->seq = seq; 2044 + __entry->trace = trace; 2045 + ), 2046 + 2047 + TP_printk("c=%08x bq=%08x q=%08x %s", 2048 + __entry->call_debug_id, 2049 + __entry->qbase, 2050 + __entry->seq, 2051 + __print_symbolic(__entry->trace, rxrpc_tq_traces)) 2066 2052 ); 2067 2053 2068 2054 TRACE_EVENT(rxrpc_poke_call,
+39 -8
net/rxrpc/ar-internal.h
··· 30 30 struct key_preparsed_payload; 31 31 struct rxrpc_connection; 32 32 struct rxrpc_txbuf; 33 + struct rxrpc_txqueue; 33 34 34 35 /* 35 36 * Mark applied to socket buffers in skb->mark. skb->priority is used ··· 692 691 unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ 693 692 unsigned short rx_pkt_len; /* Current recvmsg packet len */ 694 693 694 + /* Sendmsg data tracking. */ 695 + rxrpc_seq_t send_top; /* Highest Tx slot filled by sendmsg. */ 696 + struct rxrpc_txqueue *send_queue; /* Queue that sendmsg is writing into */ 697 + 695 698 /* Transmitted data tracking. */ 696 699 spinlock_t tx_lock; /* Transmit queue lock */ 697 - struct list_head tx_sendmsg; /* Sendmsg prepared packets */ 698 - struct list_head tx_buffer; /* Buffer of transmissible packets */ 700 + struct rxrpc_txqueue *tx_queue; /* Start of transmission buffers */ 701 + struct rxrpc_txqueue *tx_qtail; /* End of transmission buffers */ 702 + rxrpc_seq_t tx_qbase; /* First slot in tx_queue */ 699 703 rxrpc_seq_t tx_bottom; /* First packet in buffer */ 700 704 rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ 701 - rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */ 702 705 rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ 703 706 u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ 704 707 u8 tx_winsize; /* Maximum size of Tx window */ ··· 820 815 * Buffer of data to be output as a packet. 821 816 */ 822 817 struct rxrpc_txbuf { 823 - struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */ 824 - struct list_head tx_link; /* Link in live Enc queue or Tx queue */ 825 - ktime_t last_sent; /* Time at which last transmitted */ 826 818 refcount_t ref; 827 819 rxrpc_seq_t seq; /* Sequence number of this packet */ 828 820 rxrpc_serial_t serial; /* Last serial number transmitted with */ ··· 850 848 { 851 849 return !rxrpc_sending_to_server(txb); 852 850 } 851 + 852 + /* 853 + * Transmit queue element, including RACK [RFC8985] per-segment metadata. The 854 + * transmission timestamp is in usec from the base. 855 + */ 856 + struct rxrpc_txqueue { 857 + /* Start with the members we want to prefetch. */ 858 + struct rxrpc_txqueue *next; 859 + ktime_t xmit_ts_base; 860 + rxrpc_seq_t qbase; 861 + 862 + /* The arrays we want to pack into as few cache lines as possible. */ 863 + struct { 864 + #define RXRPC_NR_TXQUEUE BITS_PER_LONG 865 + #define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1) 866 + struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE]; 867 + unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE]; 868 + } ____cacheline_aligned; 869 + }; 870 + 871 + /* 872 + * Data transmission request. 873 + */ 874 + struct rxrpc_send_data_req { 875 + ktime_t now; /* Current time */ 876 + struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */ 877 + rxrpc_seq_t seq; /* Sequence of first data */ 878 + int n; /* Number of DATA packets to glue into jumbo */ 879 + bool did_send; /* T if did actually send */ 880 + }; 853 881 854 882 #include <trace/events/rxrpc.h> 855 883 ··· 937 905 enum rxrpc_propose_ack_trace why); 938 906 void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, 939 907 enum rxrpc_propose_ack_trace); 940 - void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); 941 908 void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); 942 909 943 910 bool rxrpc_input_call_event(struct rxrpc_call *call); ··· 1222 1191 rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); 1223 1192 void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call); 1224 1193 int rxrpc_send_abort_packet(struct rxrpc_call *); 1194 + void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req); 1225 1195 void rxrpc_send_conn_abort(struct rxrpc_connection *conn); 1226 1196 void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); 1227 1197 void rxrpc_send_keepalive(struct rxrpc_peer *); 1228 - void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n); 1229 1198 1230 1199 /* 1231 1200 * peer_event.c
+127 -75
net/rxrpc/call_event.c
··· 63 63 } 64 64 65 65 /* 66 + * Retransmit one or more packets. 67 + */ 68 + static void rxrpc_retransmit_data(struct rxrpc_call *call, 69 + struct rxrpc_send_data_req *req, 70 + ktime_t rto) 71 + { 72 + struct rxrpc_txqueue *tq = req->tq; 73 + unsigned int ix = req->seq & RXRPC_TXQ_MASK; 74 + struct rxrpc_txbuf *txb = tq->bufs[ix]; 75 + ktime_t xmit_ts, resend_at; 76 + 77 + _enter("%x,%x,%x,%x", tq->qbase, req->seq, ix, txb->debug_id); 78 + 79 + xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); 80 + resend_at = ktime_add(xmit_ts, rto); 81 + trace_rxrpc_retransmit(call, req, txb, 82 + ktime_sub(resend_at, req->now)); 83 + 84 + txb->flags |= RXRPC_TXBUF_RESENT; 85 + rxrpc_send_data_packet(call, req); 86 + rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); 87 + 88 + req->tq = NULL; 89 + req->n = 0; 90 + req->did_send = true; 91 + req->now = ktime_get_real(); 92 + } 93 + 94 + /* 66 95 * Perform retransmission of NAK'd and unack'd packets. 67 96 */ 68 97 void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) 69 98 { 99 + struct rxrpc_send_data_req req = { 100 + .now = ktime_get_real(), 101 + }; 70 102 struct rxrpc_ackpacket *ack = NULL; 71 103 struct rxrpc_skb_priv *sp; 104 + struct rxrpc_txqueue *tq; 72 105 struct rxrpc_txbuf *txb; 73 - rxrpc_seq_t transmitted = call->tx_transmitted; 106 + rxrpc_seq_t transmitted = call->tx_transmitted, seq; 74 107 ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); 75 - ktime_t resend_at = KTIME_MAX, now, delay; 108 + ktime_t resend_at = KTIME_MAX, delay; 76 109 bool unacked = false, did_send = false; 77 - unsigned int i; 110 + unsigned int qix; 78 111 79 112 _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); 80 113 81 - now = ktime_get_real(); 82 - 83 - if (list_empty(&call->tx_buffer)) 114 + if (call->tx_bottom == call->tx_top) 84 115 goto no_resend; 85 116 86 117 trace_rxrpc_resend(call, ack_skb); 87 - txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); 118 + tq = call->tx_queue; 119 + seq = call->tx_bottom; 88 120 89 - /* Scan the soft ACK table without dropping the lock and resend any 90 - * explicitly NAK'd packets. 91 - */ 121 + /* Scan the soft ACK table and resend any explicitly NAK'd packets. */ 92 122 if (ack_skb) { 93 123 sp = rxrpc_skb(ack_skb); 94 124 ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); 95 125 96 - for (i = 0; i < sp->ack.nr_acks; i++) { 97 - rxrpc_seq_t seq; 126 + for (int i = 0; i < sp->ack.nr_acks; i++) { 127 + rxrpc_seq_t aseq; 98 128 99 129 if (ack->acks[i] & 1) 100 130 continue; 101 - seq = sp->ack.first_ack + i; 102 - if (after(txb->seq, transmitted)) 103 - break; 104 - if (after(txb->seq, seq)) 105 - continue; /* A new hard ACK probably came in */ 106 - list_for_each_entry_from(txb, &call->tx_buffer, call_link) { 107 - if (txb->seq == seq) 108 - goto found_txb; 109 - } 110 - goto no_further_resend; 131 + aseq = sp->ack.first_ack + i; 132 + while (after_eq(aseq, tq->qbase + RXRPC_NR_TXQUEUE)) 133 + tq = tq->next; 134 + seq = aseq; 135 + qix = seq - tq->qbase; 136 + txb = tq->bufs[qix]; 137 + if (after(seq, transmitted)) 138 + goto no_further_resend; 111 139 112 - found_txb: 113 - resend_at = ktime_add(txb->last_sent, rto); 140 + resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); 141 + resend_at = ktime_add(resend_at, rto); 114 142 if (after(txb->serial, call->acks_highest_serial)) { 115 - if (ktime_after(resend_at, now) && 143 + if (ktime_after(resend_at, req.now) && 116 144 ktime_before(resend_at, next_resend)) 117 145 next_resend = resend_at; 118 146 continue; /* Ack point not yet reached */ ··· 148 120 149 121 rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); 150 122 151 - trace_rxrpc_retransmit(call, txb->seq, txb->serial, 152 - ktime_sub(resend_at, now)); 123 + req.tq = tq; 124 + req.seq = seq; 125 + req.n = 1; 126 + rxrpc_retransmit_data(call, &req, rto); 153 127 154 - txb->flags |= RXRPC_TXBUF_RESENT; 155 - rxrpc_transmit_data(call, txb, 1); 156 - did_send = true; 157 - now = ktime_get_real(); 158 - 159 - if (list_is_last(&txb->call_link, &call->tx_buffer)) 128 + if (after_eq(seq, call->tx_top)) 160 129 goto no_further_resend; 161 - txb = list_next_entry(txb, call_link); 162 130 } 163 131 } 164 132 ··· 163 139 * ACK'd or NACK'd in due course, so don't worry about it here; here we 164 140 * need to consider retransmitting anything beyond that point. 165 141 */ 166 - if (after_eq(call->acks_prev_seq, call->tx_transmitted)) 142 + seq = call->acks_prev_seq; 143 + if (after_eq(seq, call->tx_transmitted)) 167 144 goto no_further_resend; 145 + seq++; 168 146 169 - list_for_each_entry_from(txb, &call->tx_buffer, call_link) { 170 - resend_at = ktime_add(txb->last_sent, rto); 147 + while (after_eq(seq, tq->qbase + RXRPC_NR_TXQUEUE)) 148 + tq = tq->next; 171 149 172 - if (before_eq(txb->seq, call->acks_prev_seq)) 150 + while (before_eq(seq, call->tx_transmitted)) { 151 + qix = seq - tq->qbase; 152 + if (qix >= RXRPC_NR_TXQUEUE) { 153 + tq = tq->next; 173 154 continue; 174 - if (after(txb->seq, call->tx_transmitted)) 175 - break; /* Not transmitted yet */ 155 + } 156 + txb = tq->bufs[qix]; 157 + resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); 158 + resend_at = ktime_add(resend_at, rto); 176 159 177 160 if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && 178 161 before(txb->serial, ntohl(ack->serial))) 179 162 goto do_resend; /* Wasn't accounted for by a more recent ping. */ 180 163 181 - if (ktime_after(resend_at, now)) { 164 + if (ktime_after(resend_at, req.now)) { 182 165 if (ktime_before(resend_at, next_resend)) 183 166 next_resend = resend_at; 167 + seq++; 184 168 continue; 185 169 } 186 170 187 171 do_resend: 188 172 unacked = true; 189 173 190 - txb->flags |= RXRPC_TXBUF_RESENT; 191 - rxrpc_transmit_data(call, txb, 1); 192 - did_send = true; 193 - rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); 194 - now = ktime_get_real(); 174 + req.tq = tq; 175 + req.seq = seq; 176 + req.n = 1; 177 + rxrpc_retransmit_data(call, &req, rto); 178 + seq++; 195 179 } 196 180 197 181 no_further_resend: ··· 207 175 if (resend_at < KTIME_MAX) { 208 176 delay = rxrpc_get_rto_backoff(call->peer, did_send); 209 177 resend_at = ktime_add(resend_at, delay); 210 - trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset); 178 + trace_rxrpc_timer_set(call, resend_at - req.now, 179 + rxrpc_timer_trace_resend_reset); 211 180 } 212 181 call->resend_at = resend_at; 213 182 ··· 219 186 * that an ACK got lost somewhere. Send a ping to find out instead of 220 187 * retransmitting data. 221 188 */ 222 - if (!did_send) { 189 + if (!req.did_send) { 223 190 ktime_t next_ping = ktime_add_us(call->acks_latest_ts, 224 191 call->peer->srtt_us >> 3); 225 192 226 - if (ktime_sub(next_ping, now) <= 0) 193 + if (ktime_sub(next_ping, req.now) <= 0) 227 194 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 228 195 rxrpc_propose_ack_ping_for_0_retrans); 229 196 } ··· 273 240 } 274 241 275 242 /* 276 - * Decant some if the sendmsg prepared queue into the transmission buffer. 243 + * Transmit some as-yet untransmitted data. 277 244 */ 278 - static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) 245 + static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) 279 246 { 280 247 int space = rxrpc_tx_window_space(call); 281 248 282 249 if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { 283 - if (list_empty(&call->tx_sendmsg)) 250 + if (call->send_top == call->tx_top) 284 251 return; 285 252 rxrpc_expose_client_call(call); 286 253 } 287 254 288 255 while (space > 0) { 289 - struct rxrpc_txbuf *head = NULL, *txb; 290 - int count = 0, limit = min(space, 1); 256 + struct rxrpc_send_data_req req = { 257 + .now = ktime_get_real(), 258 + .seq = call->tx_transmitted + 1, 259 + .n = 0, 260 + }; 261 + struct rxrpc_txqueue *tq; 262 + struct rxrpc_txbuf *txb; 263 + rxrpc_seq_t send_top, seq; 264 + int limit = min(space, 1); 291 265 292 - if (list_empty(&call->tx_sendmsg)) 266 + /* Order send_top before the contents of the new txbufs and 267 + * txqueue pointers 268 + */ 269 + send_top = smp_load_acquire(&call->send_top); 270 + if (call->tx_top == send_top) 293 271 break; 294 272 295 - trace_rxrpc_transmit(call, space); 273 + trace_rxrpc_transmit(call, send_top, space); 296 274 297 - spin_lock(&call->tx_lock); 275 + tq = call->tx_qtail; 276 + seq = call->tx_top; 277 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant); 278 + 298 279 do { 299 - txb = list_first_entry(&call->tx_sendmsg, 300 - struct rxrpc_txbuf, call_link); 301 - if (!head) 302 - head = txb; 303 - list_move_tail(&txb->call_link, &call->tx_buffer); 304 - count++; 280 + int ix; 281 + 282 + seq++; 283 + ix = seq & RXRPC_TXQ_MASK; 284 + if (!ix) { 285 + tq = tq->next; 286 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant_advance); 287 + } 288 + if (!req.tq) 289 + req.tq = tq; 290 + txb = tq->bufs[ix]; 291 + req.n++; 305 292 if (!txb->jumboable) 306 293 break; 307 - } while (count < limit && !list_empty(&call->tx_sendmsg)); 294 + } while (req.n < limit && before(seq, send_top)); 308 295 309 - spin_unlock(&call->tx_lock); 310 - 311 - call->tx_top = txb->seq; 312 - if (txb->flags & RXRPC_LAST_PACKET) 296 + if (txb->flags & RXRPC_LAST_PACKET) { 313 297 rxrpc_close_tx_phase(call); 298 + tq = NULL; 299 + } 300 + call->tx_qtail = tq; 301 + call->tx_top = seq; 314 302 315 - space -= count; 316 - rxrpc_transmit_data(call, head, count); 303 + space -= req.n; 304 + rxrpc_send_data_packet(call, &req); 317 305 } 318 306 } 319 307 ··· 342 288 { 343 289 switch (__rxrpc_call_state(call)) { 344 290 case RXRPC_CALL_SERVER_ACK_REQUEST: 345 - if (list_empty(&call->tx_sendmsg)) 291 + if (call->tx_bottom == READ_ONCE(call->send_top)) 346 292 return; 347 293 rxrpc_begin_service_reply(call); 348 294 fallthrough; ··· 351 297 case RXRPC_CALL_CLIENT_SEND_REQUEST: 352 298 if (!rxrpc_tx_window_space(call)) 353 299 return; 354 - if (list_empty(&call->tx_sendmsg)) { 300 + if (call->tx_bottom == READ_ONCE(call->send_top)) { 355 301 rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); 356 302 return; 357 303 } 358 - rxrpc_decant_prepared_tx(call); 304 + rxrpc_transmit_fresh_data(call); 359 305 break; 360 306 default: 361 307 return; ··· 557 503 call->peer->pmtud_pending) 558 504 rxrpc_send_probe_for_pmtud(call); 559 505 } 560 - if (call->acks_hard_ack != call->tx_bottom) 561 - rxrpc_shrink_call_tx_buffer(call); 562 506 _leave(""); 563 507 return true; 564 508
+21 -17
net/rxrpc/call_object.c
··· 146 146 INIT_LIST_HEAD(&call->recvmsg_link); 147 147 INIT_LIST_HEAD(&call->sock_link); 148 148 INIT_LIST_HEAD(&call->attend_link); 149 - INIT_LIST_HEAD(&call->tx_sendmsg); 150 - INIT_LIST_HEAD(&call->tx_buffer); 151 149 skb_queue_head_init(&call->rx_queue); 152 150 skb_queue_head_init(&call->recvmsg_queue); 153 151 skb_queue_head_init(&call->rx_oos_queue); ··· 530 532 } 531 533 532 534 /* 533 - * Clean up the Rx skb ring. 535 + * Clean up the transmission buffers. 534 536 */ 535 - static void rxrpc_cleanup_ring(struct rxrpc_call *call) 537 + static void rxrpc_cleanup_tx_buffers(struct rxrpc_call *call) 538 + { 539 + struct rxrpc_txqueue *tq, *next; 540 + 541 + for (tq = call->tx_queue; tq; tq = next) { 542 + next = tq->next; 543 + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) 544 + if (tq->bufs[i]) 545 + rxrpc_put_txbuf(tq->bufs[i], rxrpc_txbuf_put_cleaned); 546 + trace_rxrpc_tq(call, tq, 0, rxrpc_tq_cleaned); 547 + kfree(tq); 548 + } 549 + } 550 + 551 + /* 552 + * Clean up the receive buffers. 553 + */ 554 + static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) 536 555 { 537 556 rxrpc_purge_queue(&call->recvmsg_queue); 538 557 rxrpc_purge_queue(&call->rx_queue); ··· 688 673 static void rxrpc_destroy_call(struct work_struct *work) 689 674 { 690 675 struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); 691 - struct rxrpc_txbuf *txb; 692 676 693 677 del_timer_sync(&call->timer); 694 678 695 679 rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); 696 - rxrpc_cleanup_ring(call); 697 - while ((txb = list_first_entry_or_null(&call->tx_sendmsg, 698 - struct rxrpc_txbuf, call_link))) { 699 - list_del(&txb->call_link); 700 - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); 701 - } 702 - while ((txb = list_first_entry_or_null(&call->tx_buffer, 703 - struct rxrpc_txbuf, call_link))) { 704 - list_del(&txb->call_link); 705 - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); 706 - } 707 - 680 + rxrpc_cleanup_tx_buffers(call); 681 + rxrpc_cleanup_rx_buffers(call); 708 682 rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); 709 683 rxrpc_put_connection(call->conn, rxrpc_conn_put_call); 710 684 rxrpc_deactivate_bundle(call->bundle);
+58 -16
net/rxrpc/input.c
··· 214 214 static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, 215 215 struct rxrpc_ack_summary *summary) 216 216 { 217 - struct rxrpc_txbuf *txb; 217 + struct rxrpc_txqueue *tq = call->tx_queue; 218 + rxrpc_seq_t seq = call->tx_bottom + 1; 218 219 bool rot_last = false; 219 220 220 - list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { 221 - if (before_eq(txb->seq, call->acks_hard_ack)) 222 - continue; 223 - if (txb->flags & RXRPC_LAST_PACKET) { 221 + _enter("%x,%x,%x", call->tx_bottom, call->acks_hard_ack, to); 222 + 223 + trace_rxrpc_tx_rotate(call, seq, to); 224 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); 225 + 226 + /* We may have a left over fully-consumed buffer at the front that we 227 + * couldn't drop before (rotate_and_keep below). 228 + */ 229 + if (seq == call->tx_qbase + RXRPC_NR_TXQUEUE) { 230 + call->tx_qbase += RXRPC_NR_TXQUEUE; 231 + call->tx_queue = tq->next; 232 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); 233 + kfree(tq); 234 + tq = call->tx_queue; 235 + } 236 + 237 + do { 238 + unsigned int ix = seq - call->tx_qbase; 239 + 240 + _debug("tq=%x seq=%x i=%d f=%x", tq->qbase, seq, ix, tq->bufs[ix]->flags); 241 + if (tq->bufs[ix]->flags & RXRPC_LAST_PACKET) { 224 242 set_bit(RXRPC_CALL_TX_LAST, &call->flags); 225 243 rot_last = true; 226 244 } 227 - if (txb->seq == to) 228 - break; 245 + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); 246 + tq->bufs[ix] = NULL; 247 + 248 + WRITE_ONCE(call->tx_bottom, seq); 249 + WRITE_ONCE(call->acks_hard_ack, seq); 250 + trace_rxrpc_txqueue(call, (rot_last ? 251 + rxrpc_txqueue_rotate_last : 252 + rxrpc_txqueue_rotate)); 253 + 254 + seq++; 255 + if (!(seq & RXRPC_TXQ_MASK)) { 256 + prefetch(tq->next); 257 + if (tq != call->tx_qtail) { 258 + call->tx_qbase += RXRPC_NR_TXQUEUE; 259 + call->tx_queue = tq->next; 260 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); 261 + kfree(tq); 262 + tq = call->tx_queue; 263 + } else { 264 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_keep); 265 + tq = NULL; 266 + break; 267 + } 268 + } 269 + 270 + } while (before_eq(seq, to)); 271 + 272 + if (rot_last) { 273 + set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); 274 + if (tq) { 275 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); 276 + kfree(tq); 277 + call->tx_queue = NULL; 278 + } 229 279 } 230 280 231 - if (rot_last) 232 - set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); 233 - 234 - _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); 281 + _debug("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); 235 282 236 283 if (call->acks_lowest_nak == call->acks_hard_ack) { 237 284 call->acks_lowest_nak = to; ··· 287 240 call->acks_lowest_nak = to; 288 241 } 289 242 290 - smp_store_release(&call->acks_hard_ack, to); 291 - 292 - trace_rxrpc_txqueue(call, (rot_last ? 293 - rxrpc_txqueue_rotate_last : 294 - rxrpc_txqueue_rotate)); 295 243 wake_up(&call->waitq); 296 244 return rot_last; 297 245 }
+82 -83
net/rxrpc/output.c
··· 375 375 /* 376 376 * Prepare a (sub)packet for transmission. 377 377 */ 378 - static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, 379 - rxrpc_serial_t serial, 380 - int subpkt, int nr_subpkts, 381 - ktime_t now) 378 + static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, 379 + struct rxrpc_send_data_req *req, 380 + struct rxrpc_txbuf *txb, 381 + rxrpc_serial_t serial, int subpkt) 382 382 { 383 383 struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; 384 384 struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); ··· 386 386 struct rxrpc_connection *conn = call->conn; 387 387 struct kvec *kv = &call->local->kvec[subpkt]; 388 388 size_t len = txb->pkt_len; 389 - bool last, more; 389 + bool last; 390 390 u8 flags; 391 391 392 392 _enter("%x,%zd", txb->seq, len); ··· 401 401 flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; 402 402 last = txb->flags & RXRPC_LAST_PACKET; 403 403 404 - if (subpkt < nr_subpkts - 1) { 404 + if (subpkt < req->n - 1) { 405 405 len = RXRPC_JUMBO_DATALEN; 406 406 goto dont_set_request_ack; 407 407 } 408 - 409 - more = (!list_is_last(&txb->call_link, &call->tx_buffer) || 410 - !list_empty(&call->tx_sendmsg)); 411 408 412 409 /* If our RTT cache needs working on, request an ACK. Also request 413 410 * ACKs if a DATA packet appears to have been lost. ··· 427 430 why = rxrpc_reqack_more_rtt; 428 431 else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) 429 432 why = rxrpc_reqack_old_rtt; 430 - else if (!last && !more) 433 + else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) 431 434 why = rxrpc_reqack_app_stall; 432 435 else 433 436 goto dont_set_request_ack; ··· 436 439 trace_rxrpc_req_ack(call->debug_id, txb->seq, why); 437 440 if (why != rxrpc_reqack_no_srv_last) { 438 441 flags |= RXRPC_REQUEST_ACK; 439 - rxrpc_begin_rtt_probe(call, serial, now, rxrpc_rtt_tx_data); 440 - call->peer->rtt_last_req = now; 442 + rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data); 443 + call->peer->rtt_last_req = req->now; 441 444 } 442 445 dont_set_request_ack: 443 446 444 447 /* The jumbo header overlays the wire header in the txbuf. */ 445 - if (subpkt < nr_subpkts - 1) 448 + if (subpkt < req->n - 1) 446 449 flags |= RXRPC_JUMBO_PACKET; 447 450 else 448 451 flags &= ~RXRPC_JUMBO_PACKET; ··· 467 470 } 468 471 469 472 /* 473 + * Prepare a transmission queue object for initial transmission. Returns the 474 + * number of microseconds since the transmission queue base timestamp. 475 + */ 476 + static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, 477 + struct rxrpc_send_data_req *req) 478 + { 479 + if (!tq) 480 + return 0; 481 + if (tq->xmit_ts_base == KTIME_MIN) { 482 + tq->xmit_ts_base = req->now; 483 + return 0; 484 + } 485 + return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base)); 486 + } 487 + 488 + /* 470 489 * Prepare a (jumbo) packet for transmission. 471 490 */ 472 - static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *head, int n) 491 + static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) 473 492 { 474 - struct rxrpc_txbuf *txb = head; 493 + struct rxrpc_txqueue *tq = req->tq; 475 494 rxrpc_serial_t serial; 476 - ktime_t now = ktime_get_real(); 495 + unsigned int xmit_ts; 496 + rxrpc_seq_t seq = req->seq; 477 497 size_t len = 0; 478 498 479 - /* Each transmission of a Tx packet needs a new serial number */ 480 - serial = rxrpc_get_next_serials(call->conn, n); 499 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit); 481 500 482 - for (int i = 0; i < n; i++) { 483 - txb->last_sent = now; 484 - len += rxrpc_prepare_data_subpacket(call, txb, serial, i, n, now); 501 + /* Each transmission of a Tx packet needs a new serial number */ 502 + serial = rxrpc_get_next_serials(call->conn, req->n); 503 + 504 + call->tx_last_sent = req->now; 505 + xmit_ts = rxrpc_prepare_txqueue(tq, req); 506 + prefetch(tq->next); 507 + 508 + for (int i = 0;;) { 509 + int ix = seq & RXRPC_TXQ_MASK; 510 + struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; 511 + 512 + _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); 513 + tq->segment_xmit_ts[ix] = xmit_ts; 514 + len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); 485 515 serial++; 486 - txb = list_next_entry(txb, call_link); 516 + seq++; 517 + i++; 518 + if (i >= req->n) 519 + break; 520 + if (!(seq & RXRPC_TXQ_MASK)) { 521 + tq = tq->next; 522 + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance); 523 + xmit_ts = rxrpc_prepare_txqueue(tq, req); 524 + } 487 525 } 488 526 489 527 /* Set timeouts */ 490 - if (call->peer->rtt_count > 1) { 491 - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); 492 - 493 - call->ack_lost_at = ktime_add(now, delay); 494 - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); 495 - } 496 - 497 528 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { 498 529 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); 499 530 500 - call->expect_rx_by = ktime_add(now, delay); 531 + call->expect_rx_by = ktime_add(req->now, delay); 501 532 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); 502 533 } 534 + if (call->resend_at == KTIME_MAX) { 535 + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); 503 536 504 - rxrpc_set_keepalive(call, now); 537 + call->resend_at = ktime_add(req->now, delay); 538 + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend); 539 + } 540 + 541 + rxrpc_set_keepalive(call, req->now); 505 542 return len; 506 543 } 507 544 508 545 /* 509 - * send a packet through the transport endpoint 546 + * Send one or more packets through the transport endpoint 510 547 */ 511 - static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) 548 + void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) 512 549 { 513 550 struct rxrpc_connection *conn = call->conn; 514 551 enum rxrpc_tx_point frag; 552 + struct rxrpc_txqueue *tq = req->tq; 553 + struct rxrpc_txbuf *txb; 515 554 struct msghdr msg; 555 + rxrpc_seq_t seq = req->seq; 516 556 size_t len; 517 557 bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); 518 558 int ret; 519 559 520 - _enter("%x,{%d}", txb->seq, txb->pkt_len); 560 + _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); 521 561 522 - len = rxrpc_prepare_data_packet(call, txb, n); 562 + len = rxrpc_prepare_data_packet(call, req); 563 + txb = tq->bufs[seq & RXRPC_TXQ_MASK]; 523 564 524 - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, n, len); 565 + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, req->n, len); 525 566 526 567 msg.msg_name = &call->peer->srx.transport; 527 568 msg.msg_namelen = call->peer->srx.transport_len; ··· 570 535 /* Send the packet with the don't fragment bit set unless we think it's 571 536 * too big or if this is a retransmission. 572 537 */ 573 - if (txb->seq == call->tx_transmitted + 1 && 538 + if (seq == call->tx_transmitted + 1 && 574 539 len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { 575 540 rxrpc_local_dont_fragment(conn->local, false); 576 541 frag = rxrpc_tx_point_call_data_frag; ··· 583 548 * retransmission algorithm doesn't try to resend what we haven't sent 584 549 * yet. 585 550 */ 586 - if (txb->seq == call->tx_transmitted + 1) 587 - call->tx_transmitted = txb->seq + n - 1; 551 + if (seq == call->tx_transmitted + 1) 552 + call->tx_transmitted = seq + req->n - 1; 588 553 589 554 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { 590 555 static int lose; ··· 621 586 rxrpc_tx_backoff(call, ret); 622 587 623 588 if (ret < 0) { 624 - /* Cancel the call if the initial transmission fails, 625 - * particularly if that's due to network routing issues that 626 - * aren't going away anytime soon. The layer above can arrange 627 - * the retransmission. 589 + /* Cancel the call if the initial transmission fails or if we 590 + * hit due to network routing issues that aren't going away 591 + * anytime soon. The layer above can arrange the 592 + * retransmission. 628 593 */ 629 - if (new_call) 594 + if (new_call || 595 + ret == -ENETUNREACH || 596 + ret == -EHOSTUNREACH || 597 + ret == -ECONNREFUSED) 630 598 rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 631 599 RX_USER_ABORT, ret); 632 600 } 633 601 634 602 done: 635 603 _leave(" = %d [%u]", ret, call->peer->max_data); 636 - return ret; 637 604 } 638 605 639 606 /* ··· 809 772 810 773 peer->last_tx_at = ktime_get_seconds(); 811 774 _leave(""); 812 - } 813 - 814 - /* 815 - * Schedule an instant Tx resend. 816 - */ 817 - static inline void rxrpc_instant_resend(struct rxrpc_call *call, 818 - struct rxrpc_txbuf *txb) 819 - { 820 - if (!__rxrpc_call_is_complete(call)) 821 - kdebug("resend"); 822 - } 823 - 824 - /* 825 - * Transmit a packet, possibly gluing several subpackets together. 826 - */ 827 - void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) 828 - { 829 - int ret; 830 - 831 - ret = rxrpc_send_data_packet(call, txb, n); 832 - if (ret < 0) { 833 - switch (ret) { 834 - case -ENETUNREACH: 835 - case -EHOSTUNREACH: 836 - case -ECONNREFUSED: 837 - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 838 - 0, ret); 839 - break; 840 - default: 841 - _debug("need instant resend %d", ret); 842 - rxrpc_instant_resend(call, txb); 843 - } 844 - } else { 845 - ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); 846 - 847 - call->resend_at = ktime_add(ktime_get_real(), delay); 848 - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx); 849 - } 850 775 }
+57 -12
net/rxrpc/sendmsg.c
··· 98 98 99 99 if (_tx_win) 100 100 *_tx_win = tx_bottom; 101 - return call->tx_prepared - tx_bottom < 256; 101 + return call->send_top - tx_bottom < 256; 102 102 } 103 103 104 104 /* ··· 242 242 struct rxrpc_txbuf *txb, 243 243 rxrpc_notify_end_tx_t notify_end_tx) 244 244 { 245 + struct rxrpc_txqueue *sq = call->send_queue; 245 246 rxrpc_seq_t seq = txb->seq; 246 247 bool poke, last = txb->flags & RXRPC_LAST_PACKET; 247 - 248 + int ix = seq & RXRPC_TXQ_MASK; 248 249 rxrpc_inc_stat(call->rxnet, stat_tx_data); 249 250 250 - ASSERTCMP(txb->seq, ==, call->tx_prepared + 1); 251 - 252 - /* We have to set the timestamp before queueing as the retransmit 253 - * algorithm can see the packet as soon as we queue it. 254 - */ 255 - txb->last_sent = ktime_get_real(); 251 + ASSERTCMP(txb->seq, ==, call->send_top + 1); 256 252 257 253 if (last) 258 254 trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last); 259 255 else 260 256 trace_rxrpc_txqueue(call, rxrpc_txqueue_queue); 261 257 258 + if (WARN_ON_ONCE(sq->bufs[ix])) 259 + trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue_dup); 260 + else 261 + trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue); 262 + 262 263 /* Add the packet to the call's output buffer */ 263 264 spin_lock(&call->tx_lock); 264 - poke = list_empty(&call->tx_sendmsg); 265 - list_add_tail(&txb->call_link, &call->tx_sendmsg); 266 - call->tx_prepared = seq; 267 - if (last) 265 + poke = (READ_ONCE(call->tx_bottom) == call->send_top); 266 + sq->bufs[ix] = txb; 267 + /* Order send_top after the queue->next pointer and txb content. */ 268 + smp_store_release(&call->send_top, seq); 269 + if (last) { 268 270 rxrpc_notify_end_tx(rx, call, notify_end_tx); 271 + call->send_queue = NULL; 272 + } 269 273 spin_unlock(&call->tx_lock); 270 274 271 275 if (poke) 272 276 rxrpc_poke_call(call, rxrpc_call_poke_start); 277 + } 278 + 279 + /* 280 + * Allocate a new txqueue unit and add it to the transmission queue. 281 + */ 282 + static int rxrpc_alloc_txqueue(struct sock *sk, struct rxrpc_call *call) 283 + { 284 + struct rxrpc_txqueue *tq; 285 + 286 + tq = kzalloc(sizeof(*tq), sk->sk_allocation); 287 + if (!tq) 288 + return -ENOMEM; 289 + 290 + tq->xmit_ts_base = KTIME_MIN; 291 + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) 292 + tq->segment_xmit_ts[i] = UINT_MAX; 293 + 294 + if (call->send_queue) { 295 + tq->qbase = call->send_top + 1; 296 + call->send_queue->next = tq; 297 + call->send_queue = tq; 298 + } else if (WARN_ON(call->tx_queue)) { 299 + kfree(tq); 300 + return -ENOMEM; 301 + } else { 302 + tq->qbase = 0; 303 + call->tx_qbase = 0; 304 + call->send_queue = tq; 305 + call->tx_qtail = tq; 306 + call->tx_queue = tq; 307 + } 308 + 309 + trace_rxrpc_tq(call, tq, call->send_top, rxrpc_tq_alloc); 310 + return 0; 273 311 } 274 312 275 313 /* ··· 383 345 384 346 if (!rxrpc_check_tx_space(call, NULL)) 385 347 goto wait_for_space; 348 + 349 + /* See if we need to begin/extend the Tx queue. */ 350 + if (!call->send_queue || !((call->send_top + 1) & RXRPC_TXQ_MASK)) { 351 + ret = rxrpc_alloc_txqueue(sk, call); 352 + if (ret < 0) 353 + goto maybe_error; 354 + } 386 355 387 356 /* Work out the maximum size of a packet. Assume that 388 357 * the security header is going to be in the padded
+1 -40
net/rxrpc/txbuf.c
··· 43 43 44 44 whdr = buf + hoff; 45 45 46 - INIT_LIST_HEAD(&txb->call_link); 47 - INIT_LIST_HEAD(&txb->tx_link); 48 46 refcount_set(&txb->ref, 1); 49 - txb->last_sent = KTIME_MIN; 50 47 txb->call_debug_id = call->debug_id; 51 48 txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); 52 49 txb->alloc_size = data_size; 53 50 txb->space = data_size; 54 51 txb->offset = sizeof(*whdr); 55 52 txb->flags = call->conn->out_clientflag; 56 - txb->seq = call->tx_prepared + 1; 53 + txb->seq = call->send_top + 1; 57 54 txb->nr_kvec = 1; 58 55 txb->kvec[0].iov_base = whdr; 59 56 txb->kvec[0].iov_len = sizeof(*whdr); ··· 111 114 filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; 112 115 trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; 113 116 114 - INIT_LIST_HEAD(&txb->call_link); 115 - INIT_LIST_HEAD(&txb->tx_link); 116 117 refcount_set(&txb->ref, 1); 117 118 txb->call_debug_id = call->debug_id; 118 119 txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); ··· 194 199 if (dead) 195 200 rxrpc_free_txbuf(txb); 196 201 } 197 - } 198 - 199 - /* 200 - * Shrink the transmit buffer. 201 - */ 202 - void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) 203 - { 204 - struct rxrpc_txbuf *txb; 205 - rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack); 206 - bool wake = false; 207 - 208 - _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top); 209 - 210 - while ((txb = list_first_entry_or_null(&call->tx_buffer, 211 - struct rxrpc_txbuf, call_link))) { 212 - hard_ack = call->acks_hard_ack; 213 - if (before(hard_ack, txb->seq)) 214 - break; 215 - 216 - if (txb->seq != call->tx_bottom + 1) 217 - rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step); 218 - ASSERTCMP(txb->seq, ==, call->tx_bottom + 1); 219 - WRITE_ONCE(call->tx_bottom, call->tx_bottom + 1); 220 - list_del_rcu(&txb->call_link); 221 - 222 - trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); 223 - 224 - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated); 225 - if (after(call->acks_hard_ack, call->tx_bottom + 128)) 226 - wake = true; 227 - } 228 - 229 - if (wake) 230 - wake_up(&call->waitq); 231 202 }