Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue

Tony Nguyen says:

====================
idpf: replace Tx flow scheduling buffer ring with buffer pool

Joshua Hay says:

This series fixes a stability issue in the flow scheduling Tx send/clean
path that results in a Tx timeout.

The existing guardrails in the Tx path were not sufficient to prevent
the driver from reusing completion tags that were still in flight (held
by the HW). This collision would cause the driver to erroneously clean
the wrong packet thus leaving the descriptor ring in a bad state.

The main point of this fix is to replace the flow scheduling buffer ring
with a large pool/array of buffers. The completion tag then simply is
the index into this array. The driver tracks the free tags and pulls
the next free one from a refillq. The cleaning routines simply use the
completion tag from the completion descriptor to index into the array to
quickly find the buffers to clean.

All of the code to support this is added first to ensure traffic still
passes with each patch. The final patch then removes all of the
obsolete stashing code.

* '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
idpf: remove obsolete stashing code
idpf: stop Tx if there are insufficient buffer resources
idpf: replace flow scheduling buffer ring with buffer pool
idpf: simplify and fix splitq Tx packet rollback error path
idpf: improve when to set RE bit logic
idpf: add support for Tx refillqs in flow scheduling mode
====================

Link: https://patch.msgid.link/20250821180100.401955-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+371 -530
+57 -4
drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
··· 180 180 } 181 181 182 182 /** 183 + * idpf_tx_singleq_dma_map_error - handle TX DMA map errors 184 + * @txq: queue to send buffer on 185 + * @skb: send buffer 186 + * @first: original first buffer info buffer for packet 187 + * @idx: starting point on ring to unwind 188 + */ 189 + static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, 190 + struct sk_buff *skb, 191 + struct idpf_tx_buf *first, u16 idx) 192 + { 193 + struct libeth_sq_napi_stats ss = { }; 194 + struct libeth_cq_pp cp = { 195 + .dev = txq->dev, 196 + .ss = &ss, 197 + }; 198 + 199 + u64_stats_update_begin(&txq->stats_sync); 200 + u64_stats_inc(&txq->q_stats.dma_map_errs); 201 + u64_stats_update_end(&txq->stats_sync); 202 + 203 + /* clear dma mappings for failed tx_buf map */ 204 + for (;;) { 205 + struct idpf_tx_buf *tx_buf; 206 + 207 + tx_buf = &txq->tx_buf[idx]; 208 + libeth_tx_complete(tx_buf, &cp); 209 + if (tx_buf == first) 210 + break; 211 + if (idx == 0) 212 + idx = txq->desc_count; 213 + idx--; 214 + } 215 + 216 + if (skb_is_gso(skb)) { 217 + union idpf_tx_flex_desc *tx_desc; 218 + 219 + /* If we failed a DMA mapping for a TSO packet, we will have 220 + * used one additional descriptor for a context 221 + * descriptor. Reset that here. 222 + */ 223 + tx_desc = &txq->flex_tx[idx]; 224 + memset(tx_desc, 0, sizeof(*tx_desc)); 225 + if (idx == 0) 226 + idx = txq->desc_count; 227 + idx--; 228 + } 229 + 230 + /* Update tail in case netdev_xmit_more was previously true */ 231 + idpf_tx_buf_hw_update(txq, idx, false); 232 + } 233 + 234 + /** 183 235 * idpf_tx_singleq_map - Build the Tx base descriptor 184 236 * @tx_q: queue to send buffer on 185 237 * @first: first buffer info buffer to use ··· 271 219 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 272 220 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 273 221 274 - if (dma_mapping_error(tx_q->dev, dma)) 275 - return idpf_tx_dma_map_error(tx_q, skb, first, i); 222 + if (unlikely(dma_mapping_error(tx_q->dev, dma))) 223 + return idpf_tx_singleq_dma_map_error(tx_q, skb, 224 + first, i); 276 225 277 226 /* record length, and DMA address */ 278 227 dma_unmap_len_set(tx_buf, len, size); ··· 415 362 { 416 363 struct idpf_tx_offload_params offload = { }; 417 364 struct idpf_tx_buf *first; 365 + u32 count, buf_count = 1; 418 366 int csum, tso, needed; 419 - unsigned int count; 420 367 __be16 protocol; 421 368 422 - count = idpf_tx_desc_count_required(tx_q, skb); 369 + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); 423 370 if (unlikely(!count)) 424 371 return idpf_tx_drop_skb(tx_q, skb); 425 372
+281 -472
drivers/net/ethernet/intel/idpf/idpf_txrx.c
··· 8 8 #include "idpf_ptp.h" 9 9 #include "idpf_virtchnl.h" 10 10 11 - struct idpf_tx_stash { 12 - struct hlist_node hlist; 13 - struct libeth_sqe buf; 14 - }; 15 - 16 - #define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv) 11 + #define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) 17 12 LIBETH_SQE_CHECK_PRIV(u32); 18 13 19 14 static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, 20 15 unsigned int count); 21 - 22 - /** 23 - * idpf_buf_lifo_push - push a buffer pointer onto stack 24 - * @stack: pointer to stack struct 25 - * @buf: pointer to buf to push 26 - * 27 - * Returns 0 on success, negative on failure 28 - **/ 29 - static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack, 30 - struct idpf_tx_stash *buf) 31 - { 32 - if (unlikely(stack->top == stack->size)) 33 - return -ENOSPC; 34 - 35 - stack->bufs[stack->top++] = buf; 36 - 37 - return 0; 38 - } 39 - 40 - /** 41 - * idpf_buf_lifo_pop - pop a buffer pointer from stack 42 - * @stack: pointer to stack struct 43 - **/ 44 - static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack) 45 - { 46 - if (unlikely(!stack->top)) 47 - return NULL; 48 - 49 - return stack->bufs[--stack->top]; 50 - } 51 16 52 17 /** 53 18 * idpf_tx_timeout - Respond to a Tx Hang ··· 42 77 static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) 43 78 { 44 79 struct libeth_sq_napi_stats ss = { }; 45 - struct idpf_buf_lifo *buf_stack; 46 - struct idpf_tx_stash *stash; 47 80 struct libeth_cq_pp cp = { 48 81 .dev = txq->dev, 49 82 .ss = &ss, 50 83 }; 51 - struct hlist_node *tmp; 52 - u32 i, tag; 84 + u32 i; 53 85 54 86 /* Buffers already cleared, nothing to do */ 55 87 if (!txq->tx_buf) 56 88 return; 57 89 58 90 /* Free all the Tx buffer sk_buffs */ 59 - for (i = 0; i < txq->desc_count; i++) 91 + for (i = 0; i < txq->buf_pool_size; i++) 60 92 libeth_tx_complete(&txq->tx_buf[i], &cp); 61 93 62 94 kfree(txq->tx_buf); 63 95 txq->tx_buf = NULL; 64 - 65 - if (!idpf_queue_has(FLOW_SCH_EN, txq)) 66 - return; 67 - 68 - buf_stack = &txq->stash->buf_stack; 69 - if (!buf_stack->bufs) 70 - return; 71 - 72 - /* 73 - * If a Tx timeout occurred, there are potentially still bufs in the 74 - * hash table, free them here. 75 - */ 76 - hash_for_each_safe(txq->stash->sched_buf_hash, tag, tmp, stash, 77 - hlist) { 78 - if (!stash) 79 - continue; 80 - 81 - libeth_tx_complete(&stash->buf, &cp); 82 - hash_del(&stash->hlist); 83 - idpf_buf_lifo_push(buf_stack, stash); 84 - } 85 - 86 - for (i = 0; i < buf_stack->size; i++) 87 - kfree(buf_stack->bufs[i]); 88 - 89 - kfree(buf_stack->bufs); 90 - buf_stack->bufs = NULL; 91 96 } 92 97 93 98 /** ··· 73 138 74 139 if (!txq->desc_ring) 75 140 return; 141 + 142 + if (txq->refillq) 143 + kfree(txq->refillq->ring); 76 144 77 145 dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); 78 146 txq->desc_ring = NULL; ··· 133 195 */ 134 196 static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) 135 197 { 136 - struct idpf_buf_lifo *buf_stack; 137 - int buf_size; 138 - int i; 139 - 140 198 /* Allocate book keeping buffers only. Buffers to be supplied to HW 141 199 * are allocated by kernel network stack and received as part of skb 142 200 */ 143 - buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count; 144 - tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL); 201 + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) 202 + tx_q->buf_pool_size = U16_MAX; 203 + else 204 + tx_q->buf_pool_size = tx_q->desc_count; 205 + tx_q->tx_buf = kcalloc(tx_q->buf_pool_size, sizeof(*tx_q->tx_buf), 206 + GFP_KERNEL); 145 207 if (!tx_q->tx_buf) 146 208 return -ENOMEM; 147 - 148 - if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) 149 - return 0; 150 - 151 - buf_stack = &tx_q->stash->buf_stack; 152 - 153 - /* Initialize tx buf stack for out-of-order completions if 154 - * flow scheduling offload is enabled 155 - */ 156 - buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs), 157 - GFP_KERNEL); 158 - if (!buf_stack->bufs) 159 - return -ENOMEM; 160 - 161 - buf_stack->size = tx_q->desc_count; 162 - buf_stack->top = tx_q->desc_count; 163 - 164 - for (i = 0; i < tx_q->desc_count; i++) { 165 - buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]), 166 - GFP_KERNEL); 167 - if (!buf_stack->bufs[i]) 168 - return -ENOMEM; 169 - } 170 209 171 210 return 0; 172 211 } ··· 159 244 struct idpf_tx_queue *tx_q) 160 245 { 161 246 struct device *dev = tx_q->dev; 247 + struct idpf_sw_queue *refillq; 162 248 int err; 163 249 164 250 err = idpf_tx_buf_alloc_all(tx_q); ··· 182 266 tx_q->next_to_use = 0; 183 267 tx_q->next_to_clean = 0; 184 268 idpf_queue_set(GEN_CHK, tx_q); 269 + 270 + if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) 271 + return 0; 272 + 273 + refillq = tx_q->refillq; 274 + refillq->desc_count = tx_q->buf_pool_size; 275 + refillq->ring = kcalloc(refillq->desc_count, sizeof(u32), 276 + GFP_KERNEL); 277 + if (!refillq->ring) { 278 + err = -ENOMEM; 279 + goto err_alloc; 280 + } 281 + 282 + for (unsigned int i = 0; i < refillq->desc_count; i++) 283 + refillq->ring[i] = 284 + FIELD_PREP(IDPF_RFL_BI_BUFID_M, i) | 285 + FIELD_PREP(IDPF_RFL_BI_GEN_M, 286 + idpf_queue_has(GEN_CHK, refillq)); 287 + 288 + /* Go ahead and flip the GEN bit since this counts as filling 289 + * up the ring, i.e. we already ring wrapped. 290 + */ 291 + idpf_queue_change(GEN_CHK, refillq); 292 + 293 + tx_q->last_re = tx_q->desc_count - IDPF_TX_SPLITQ_RE_MIN_GAP; 185 294 186 295 return 0; 187 296 ··· 258 317 for (i = 0; i < vport->num_txq_grp; i++) { 259 318 for (j = 0; j < vport->txq_grps[i].num_txq; j++) { 260 319 struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j]; 261 - u8 gen_bits = 0; 262 - u16 bufidx_mask; 263 320 264 321 err = idpf_tx_desc_alloc(vport, txq); 265 322 if (err) { ··· 266 327 i); 267 328 goto err_out; 268 329 } 269 - 270 - if (!idpf_is_queue_model_split(vport->txq_model)) 271 - continue; 272 - 273 - txq->compl_tag_cur_gen = 0; 274 - 275 - /* Determine the number of bits in the bufid 276 - * mask and add one to get the start of the 277 - * generation bits 278 - */ 279 - bufidx_mask = txq->desc_count - 1; 280 - while (bufidx_mask >> 1) { 281 - txq->compl_tag_gen_s++; 282 - bufidx_mask = bufidx_mask >> 1; 283 - } 284 - txq->compl_tag_gen_s++; 285 - 286 - gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH - 287 - txq->compl_tag_gen_s; 288 - txq->compl_tag_gen_max = GETMAXVAL(gen_bits); 289 - 290 - /* Set bufid mask based on location of first 291 - * gen bit; it cannot simply be the descriptor 292 - * ring size-1 since we can have size values 293 - * where not all of those bits are set. 294 - */ 295 - txq->compl_tag_bufid_m = 296 - GETMAXVAL(txq->compl_tag_gen_s); 297 330 } 298 331 299 332 if (!idpf_is_queue_model_split(vport->txq_model)) ··· 514 603 } 515 604 516 605 /** 517 - * idpf_rx_post_buf_refill - Post buffer id to refill queue 606 + * idpf_post_buf_refill - Post buffer id to refill queue 518 607 * @refillq: refill queue to post to 519 608 * @buf_id: buffer id to post 520 609 */ 521 - static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) 610 + static void idpf_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) 522 611 { 523 612 u32 nta = refillq->next_to_use; 524 613 525 614 /* store the buffer ID and the SW maintained GEN bit to the refillq */ 526 615 refillq->ring[nta] = 527 - FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | 528 - FIELD_PREP(IDPF_RX_BI_GEN_M, 616 + FIELD_PREP(IDPF_RFL_BI_BUFID_M, buf_id) | 617 + FIELD_PREP(IDPF_RFL_BI_GEN_M, 529 618 idpf_queue_has(GEN_CHK, refillq)); 530 619 531 620 if (unlikely(++nta == refillq->desc_count)) { ··· 906 995 struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; 907 996 908 997 for (j = 0; j < txq_grp->num_txq; j++) { 998 + if (flow_sch_en) { 999 + kfree(txq_grp->txqs[j]->refillq); 1000 + txq_grp->txqs[j]->refillq = NULL; 1001 + } 1002 + 909 1003 kfree(txq_grp->txqs[j]); 910 1004 txq_grp->txqs[j] = NULL; 911 1005 } ··· 920 1004 921 1005 kfree(txq_grp->complq); 922 1006 txq_grp->complq = NULL; 923 - 924 - if (flow_sch_en) 925 - kfree(txq_grp->stashes); 926 1007 } 927 1008 kfree(vport->txq_grps); 928 1009 vport->txq_grps = NULL; ··· 1280 1367 for (i = 0; i < vport->num_txq_grp; i++) { 1281 1368 struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; 1282 1369 struct idpf_adapter *adapter = vport->adapter; 1283 - struct idpf_txq_stash *stashes; 1284 1370 int j; 1285 1371 1286 1372 tx_qgrp->vport = vport; ··· 1290 1378 GFP_KERNEL); 1291 1379 if (!tx_qgrp->txqs[j]) 1292 1380 goto err_alloc; 1293 - } 1294 - 1295 - if (split && flow_sch_en) { 1296 - stashes = kcalloc(num_txq, sizeof(*stashes), 1297 - GFP_KERNEL); 1298 - if (!stashes) 1299 - goto err_alloc; 1300 - 1301 - tx_qgrp->stashes = stashes; 1302 1381 } 1303 1382 1304 1383 for (j = 0; j < tx_qgrp->num_txq; j++) { ··· 1311 1408 if (!flow_sch_en) 1312 1409 continue; 1313 1410 1314 - if (split) { 1315 - q->stash = &stashes[j]; 1316 - hash_init(q->stash->sched_buf_hash); 1317 - } 1318 - 1319 1411 idpf_queue_set(FLOW_SCH_EN, q); 1412 + 1413 + q->refillq = kzalloc(sizeof(*q->refillq), GFP_KERNEL); 1414 + if (!q->refillq) 1415 + goto err_alloc; 1416 + 1417 + idpf_queue_set(GEN_CHK, q->refillq); 1418 + idpf_queue_set(RFL_GEN_CHK, q->refillq); 1320 1419 } 1321 1420 1322 1421 if (!split) ··· 1602 1697 spin_unlock_bh(&tx_tstamp_caps->status_lock); 1603 1698 } 1604 1699 1605 - /** 1606 - * idpf_tx_clean_stashed_bufs - clean bufs that were stored for 1607 - * out of order completions 1608 - * @txq: queue to clean 1609 - * @compl_tag: completion tag of packet to clean (from completion descriptor) 1610 - * @cleaned: pointer to stats struct to track cleaned packets/bytes 1611 - * @budget: Used to determine if we are in netpoll 1612 - */ 1613 - static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq, 1614 - u16 compl_tag, 1615 - struct libeth_sq_napi_stats *cleaned, 1616 - int budget) 1617 - { 1618 - struct idpf_tx_stash *stash; 1619 - struct hlist_node *tmp_buf; 1620 - struct libeth_cq_pp cp = { 1621 - .dev = txq->dev, 1622 - .ss = cleaned, 1623 - .napi = budget, 1624 - }; 1625 - 1626 - /* Buffer completion */ 1627 - hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf, 1628 - hlist, compl_tag) { 1629 - if (unlikely(idpf_tx_buf_compl_tag(&stash->buf) != compl_tag)) 1630 - continue; 1631 - 1632 - hash_del(&stash->hlist); 1633 - 1634 - if (stash->buf.type == LIBETH_SQE_SKB && 1635 - (skb_shinfo(stash->buf.skb)->tx_flags & SKBTX_IN_PROGRESS)) 1636 - idpf_tx_read_tstamp(txq, stash->buf.skb); 1637 - 1638 - libeth_tx_complete(&stash->buf, &cp); 1639 - 1640 - /* Push shadow buf back onto stack */ 1641 - idpf_buf_lifo_push(&txq->stash->buf_stack, stash); 1642 - } 1643 - } 1644 - 1645 - /** 1646 - * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a 1647 - * later time (only relevant for flow scheduling mode) 1648 - * @txq: Tx queue to clean 1649 - * @tx_buf: buffer to store 1650 - */ 1651 - static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq, 1652 - struct idpf_tx_buf *tx_buf) 1653 - { 1654 - struct idpf_tx_stash *stash; 1655 - 1656 - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) 1657 - return 0; 1658 - 1659 - stash = idpf_buf_lifo_pop(&txq->stash->buf_stack); 1660 - if (unlikely(!stash)) { 1661 - net_err_ratelimited("%s: No out-of-order TX buffers left!\n", 1662 - netdev_name(txq->netdev)); 1663 - 1664 - return -ENOMEM; 1665 - } 1666 - 1667 - /* Store buffer params in shadow buffer */ 1668 - stash->buf.skb = tx_buf->skb; 1669 - stash->buf.bytes = tx_buf->bytes; 1670 - stash->buf.packets = tx_buf->packets; 1671 - stash->buf.type = tx_buf->type; 1672 - stash->buf.nr_frags = tx_buf->nr_frags; 1673 - dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); 1674 - dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); 1675 - idpf_tx_buf_compl_tag(&stash->buf) = idpf_tx_buf_compl_tag(tx_buf); 1676 - 1677 - /* Add buffer to buf_hash table to be freed later */ 1678 - hash_add(txq->stash->sched_buf_hash, &stash->hlist, 1679 - idpf_tx_buf_compl_tag(&stash->buf)); 1680 - 1681 - tx_buf->type = LIBETH_SQE_EMPTY; 1682 - 1683 - return 0; 1684 - } 1685 - 1686 1700 #define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ 1687 1701 do { \ 1688 1702 if (unlikely(++(ntc) == (txq)->desc_count)) { \ ··· 1629 1805 * Separate packet completion events will be reported on the completion queue, 1630 1806 * and the buffers will be cleaned separately. The stats are not updated from 1631 1807 * this function when using flow-based scheduling. 1632 - * 1633 - * Furthermore, in flow scheduling mode, check to make sure there are enough 1634 - * reserve buffers to stash the packet. If there are not, return early, which 1635 - * will leave next_to_clean pointing to the packet that failed to be stashed. 1636 - * 1637 - * Return: false in the scenario above, true otherwise. 1638 1808 */ 1639 - static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, 1809 + static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, 1640 1810 int napi_budget, 1641 1811 struct libeth_sq_napi_stats *cleaned, 1642 1812 bool descs_only) ··· 1644 1826 .napi = napi_budget, 1645 1827 }; 1646 1828 struct idpf_tx_buf *tx_buf; 1647 - bool clean_complete = true; 1829 + 1830 + if (descs_only) { 1831 + /* Bump ring index to mark as cleaned. */ 1832 + tx_q->next_to_clean = end; 1833 + return; 1834 + } 1648 1835 1649 1836 tx_desc = &tx_q->flex_tx[ntc]; 1650 1837 next_pending_desc = &tx_q->flex_tx[end]; ··· 1669 1846 break; 1670 1847 1671 1848 eop_idx = tx_buf->rs_idx; 1849 + libeth_tx_complete(tx_buf, &cp); 1672 1850 1673 - if (descs_only) { 1674 - if (IDPF_TX_BUF_RSV_UNUSED(tx_q) < tx_buf->nr_frags) { 1675 - clean_complete = false; 1676 - goto tx_splitq_clean_out; 1677 - } 1851 + /* unmap remaining buffers */ 1852 + while (ntc != eop_idx) { 1853 + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, 1854 + tx_desc, tx_buf); 1678 1855 1679 - idpf_stash_flow_sch_buffers(tx_q, tx_buf); 1680 - 1681 - while (ntc != eop_idx) { 1682 - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, 1683 - tx_desc, tx_buf); 1684 - idpf_stash_flow_sch_buffers(tx_q, tx_buf); 1685 - } 1686 - } else { 1856 + /* unmap any remaining paged data */ 1687 1857 libeth_tx_complete(tx_buf, &cp); 1688 - 1689 - /* unmap remaining buffers */ 1690 - while (ntc != eop_idx) { 1691 - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, 1692 - tx_desc, tx_buf); 1693 - 1694 - /* unmap any remaining paged data */ 1695 - libeth_tx_complete(tx_buf, &cp); 1696 - } 1697 1858 } 1698 1859 1699 1860 fetch_next_txq_desc: 1700 1861 idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); 1701 1862 } 1702 1863 1703 - tx_splitq_clean_out: 1704 1864 tx_q->next_to_clean = ntc; 1705 - 1706 - return clean_complete; 1707 1865 } 1708 1866 1709 - #define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ 1710 - do { \ 1711 - (buf)++; \ 1712 - (ntc)++; \ 1713 - if (unlikely((ntc) == (txq)->desc_count)) { \ 1714 - buf = (txq)->tx_buf; \ 1715 - ntc = 0; \ 1716 - } \ 1717 - } while (0) 1718 - 1719 1867 /** 1720 - * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers 1868 + * idpf_tx_clean_bufs - clean flow scheduling TX queue buffers 1721 1869 * @txq: queue to clean 1722 - * @compl_tag: completion tag of packet to clean (from completion descriptor) 1870 + * @buf_id: packet's starting buffer ID, from completion descriptor 1723 1871 * @cleaned: pointer to stats struct to track cleaned packets/bytes 1724 1872 * @budget: Used to determine if we are in netpoll 1725 1873 * 1726 - * Cleans all buffers associated with the input completion tag either from the 1727 - * TX buffer ring or from the hash table if the buffers were previously 1728 - * stashed. Returns the byte/segment count for the cleaned packet associated 1729 - * this completion tag. 1874 + * Clean all buffers associated with the packet starting at buf_id. Returns the 1875 + * byte/segment count for the cleaned packet. 1730 1876 */ 1731 - static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag, 1732 - struct libeth_sq_napi_stats *cleaned, 1733 - int budget) 1877 + static void idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, 1878 + struct libeth_sq_napi_stats *cleaned, 1879 + int budget) 1734 1880 { 1735 - u16 idx = compl_tag & txq->compl_tag_bufid_m; 1736 1881 struct idpf_tx_buf *tx_buf = NULL; 1737 1882 struct libeth_cq_pp cp = { 1738 1883 .dev = txq->dev, 1739 1884 .ss = cleaned, 1740 1885 .napi = budget, 1741 1886 }; 1742 - u16 ntc, orig_idx = idx; 1743 1887 1744 - tx_buf = &txq->tx_buf[idx]; 1745 - 1746 - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX || 1747 - idpf_tx_buf_compl_tag(tx_buf) != compl_tag)) 1748 - return false; 1749 - 1888 + tx_buf = &txq->tx_buf[buf_id]; 1750 1889 if (tx_buf->type == LIBETH_SQE_SKB) { 1751 1890 if (skb_shinfo(tx_buf->skb)->tx_flags & SKBTX_IN_PROGRESS) 1752 1891 idpf_tx_read_tstamp(txq, tx_buf->skb); 1753 1892 1754 1893 libeth_tx_complete(tx_buf, &cp); 1894 + idpf_post_buf_refill(txq->refillq, buf_id); 1755 1895 } 1756 1896 1757 - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); 1897 + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { 1898 + buf_id = idpf_tx_buf_next(tx_buf); 1758 1899 1759 - while (idpf_tx_buf_compl_tag(tx_buf) == compl_tag) { 1900 + tx_buf = &txq->tx_buf[buf_id]; 1760 1901 libeth_tx_complete(tx_buf, &cp); 1761 - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); 1902 + idpf_post_buf_refill(txq->refillq, buf_id); 1762 1903 } 1763 - 1764 - /* 1765 - * It's possible the packet we just cleaned was an out of order 1766 - * completion, which means we can stash the buffers starting from 1767 - * the original next_to_clean and reuse the descriptors. We need 1768 - * to compare the descriptor ring next_to_clean packet's "first" buffer 1769 - * to the "first" buffer of the packet we just cleaned to determine if 1770 - * this is the case. Howevever, next_to_clean can point to either a 1771 - * reserved buffer that corresponds to a context descriptor used for the 1772 - * next_to_clean packet (TSO packet) or the "first" buffer (single 1773 - * packet). The orig_idx from the packet we just cleaned will always 1774 - * point to the "first" buffer. If next_to_clean points to a reserved 1775 - * buffer, let's bump ntc once and start the comparison from there. 1776 - */ 1777 - ntc = txq->next_to_clean; 1778 - tx_buf = &txq->tx_buf[ntc]; 1779 - 1780 - if (tx_buf->type == LIBETH_SQE_CTX) 1781 - idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); 1782 - 1783 - /* 1784 - * If ntc still points to a different "first" buffer, clean the 1785 - * descriptor ring and stash all of the buffers for later cleaning. If 1786 - * we cannot stash all of the buffers, next_to_clean will point to the 1787 - * "first" buffer of the packet that could not be stashed and cleaning 1788 - * will start there next time. 1789 - */ 1790 - if (unlikely(tx_buf != &txq->tx_buf[orig_idx] && 1791 - !idpf_tx_splitq_clean(txq, orig_idx, budget, cleaned, 1792 - true))) 1793 - return true; 1794 - 1795 - /* 1796 - * Otherwise, update next_to_clean to reflect the cleaning that was 1797 - * done above. 1798 - */ 1799 - txq->next_to_clean = idx; 1800 - 1801 - return true; 1802 1904 } 1803 1905 1804 1906 /** ··· 1742 1994 struct libeth_sq_napi_stats *cleaned, 1743 1995 int budget) 1744 1996 { 1745 - u16 compl_tag; 1997 + /* RS completion contains queue head for queue based scheduling or 1998 + * completion tag for flow based scheduling. 1999 + */ 2000 + u16 rs_compl_val = le16_to_cpu(desc->q_head_compl_tag.q_head); 1746 2001 1747 2002 if (!idpf_queue_has(FLOW_SCH_EN, txq)) { 1748 - u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); 1749 - 1750 - idpf_tx_splitq_clean(txq, head, budget, cleaned, false); 2003 + idpf_tx_splitq_clean(txq, rs_compl_val, budget, cleaned, false); 1751 2004 return; 1752 2005 } 1753 2006 1754 - compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); 1755 - 1756 - /* If we didn't clean anything on the ring, this packet must be 1757 - * in the hash table. Go clean it there. 1758 - */ 1759 - if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget)) 1760 - idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); 2007 + idpf_tx_clean_bufs(txq, rs_compl_val, cleaned, budget); 1761 2008 } 1762 2009 1763 2010 /** ··· 1869 2126 /* Update BQL */ 1870 2127 nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); 1871 2128 1872 - dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || 1873 - np->state != __IDPF_VPORT_UP || 2129 + dont_wake = !complq_ok || np->state != __IDPF_VPORT_UP || 1874 2130 !netif_carrier_ok(tx_q->netdev); 1875 2131 /* Check if the TXQ needs to and can be restarted */ 1876 2132 __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, ··· 1926 2184 desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); 1927 2185 } 1928 2186 1929 - /* Global conditions to tell whether the txq (and related resources) 1930 - * has room to allow the use of "size" descriptors. 2187 + /** 2188 + * idpf_tx_splitq_has_room - check if enough Tx splitq resources are available 2189 + * @tx_q: the queue to be checked 2190 + * @descs_needed: number of descriptors required for this packet 2191 + * @bufs_needed: number of Tx buffers required for this packet 2192 + * 2193 + * Return: 0 if no room available, 1 otherwise 1931 2194 */ 1932 - static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) 2195 + static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 descs_needed, 2196 + u32 bufs_needed) 1933 2197 { 1934 - if (IDPF_DESC_UNUSED(tx_q) < size || 2198 + if (IDPF_DESC_UNUSED(tx_q) < descs_needed || 1935 2199 IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > 1936 2200 IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || 1937 - IDPF_TX_BUF_RSV_LOW(tx_q)) 2201 + idpf_tx_splitq_get_free_bufs(tx_q->refillq) < bufs_needed) 1938 2202 return 0; 1939 2203 return 1; 1940 2204 } ··· 1949 2201 * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions 1950 2202 * @tx_q: the queue to be checked 1951 2203 * @descs_needed: number of descriptors required for this packet 2204 + * @bufs_needed: number of buffers needed for this packet 1952 2205 * 1953 - * Returns 0 if stop is not needed 2206 + * Return: 0 if stop is not needed 1954 2207 */ 1955 2208 static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, 1956 - unsigned int descs_needed) 2209 + u32 descs_needed, 2210 + u32 bufs_needed) 1957 2211 { 2212 + /* Since we have multiple resources to check for splitq, our 2213 + * start,stop_thrs becomes a boolean check instead of a count 2214 + * threshold. 2215 + */ 1958 2216 if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, 1959 - idpf_txq_has_room(tx_q, descs_needed), 2217 + idpf_txq_has_room(tx_q, descs_needed, 2218 + bufs_needed), 1960 2219 1, 1)) 1961 2220 return 0; 1962 2221 ··· 2005 2250 } 2006 2251 2007 2252 /** 2008 - * idpf_tx_desc_count_required - calculate number of Tx descriptors needed 2253 + * idpf_tx_res_count_required - get number of Tx resources needed for this pkt 2009 2254 * @txq: queue to send buffer on 2010 2255 * @skb: send buffer 2256 + * @bufs_needed: (output) number of buffers needed for this skb. 2011 2257 * 2012 - * Returns number of data descriptors needed for this skb. 2258 + * Return: number of data descriptors and buffers needed for this skb. 2013 2259 */ 2014 - unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, 2015 - struct sk_buff *skb) 2260 + unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, 2261 + struct sk_buff *skb, 2262 + u32 *bufs_needed) 2016 2263 { 2017 2264 const struct skb_shared_info *shinfo; 2018 2265 unsigned int count = 0, i; ··· 2025 2268 return count; 2026 2269 2027 2270 shinfo = skb_shinfo(skb); 2271 + *bufs_needed += shinfo->nr_frags; 2028 2272 for (i = 0; i < shinfo->nr_frags; i++) { 2029 2273 unsigned int size; 2030 2274 ··· 2055 2297 } 2056 2298 2057 2299 /** 2058 - * idpf_tx_dma_map_error - handle TX DMA map errors 2059 - * @txq: queue to send buffer on 2060 - * @skb: send buffer 2061 - * @first: original first buffer info buffer for packet 2062 - * @idx: starting point on ring to unwind 2063 - */ 2064 - void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, 2065 - struct idpf_tx_buf *first, u16 idx) 2066 - { 2067 - struct libeth_sq_napi_stats ss = { }; 2068 - struct libeth_cq_pp cp = { 2069 - .dev = txq->dev, 2070 - .ss = &ss, 2071 - }; 2072 - 2073 - u64_stats_update_begin(&txq->stats_sync); 2074 - u64_stats_inc(&txq->q_stats.dma_map_errs); 2075 - u64_stats_update_end(&txq->stats_sync); 2076 - 2077 - /* clear dma mappings for failed tx_buf map */ 2078 - for (;;) { 2079 - struct idpf_tx_buf *tx_buf; 2080 - 2081 - tx_buf = &txq->tx_buf[idx]; 2082 - libeth_tx_complete(tx_buf, &cp); 2083 - if (tx_buf == first) 2084 - break; 2085 - if (idx == 0) 2086 - idx = txq->desc_count; 2087 - idx--; 2088 - } 2089 - 2090 - if (skb_is_gso(skb)) { 2091 - union idpf_tx_flex_desc *tx_desc; 2092 - 2093 - /* If we failed a DMA mapping for a TSO packet, we will have 2094 - * used one additional descriptor for a context 2095 - * descriptor. Reset that here. 2096 - */ 2097 - tx_desc = &txq->flex_tx[idx]; 2098 - memset(tx_desc, 0, sizeof(*tx_desc)); 2099 - if (idx == 0) 2100 - idx = txq->desc_count; 2101 - idx--; 2102 - } 2103 - 2104 - /* Update tail in case netdev_xmit_more was previously true */ 2105 - idpf_tx_buf_hw_update(txq, idx, false); 2106 - } 2107 - 2108 - /** 2109 2300 * idpf_tx_splitq_bump_ntu - adjust NTU and generation 2110 2301 * @txq: the tx ring to wrap 2111 2302 * @ntu: ring index to bump ··· 2063 2356 { 2064 2357 ntu++; 2065 2358 2066 - if (ntu == txq->desc_count) { 2359 + if (ntu == txq->desc_count) 2067 2360 ntu = 0; 2068 - txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq); 2069 - } 2070 2361 2071 2362 return ntu; 2363 + } 2364 + 2365 + /** 2366 + * idpf_tx_get_free_buf_id - get a free buffer ID from the refill queue 2367 + * @refillq: refill queue to get buffer ID from 2368 + * @buf_id: return buffer ID 2369 + * 2370 + * Return: true if a buffer ID was found, false if not 2371 + */ 2372 + static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, 2373 + u32 *buf_id) 2374 + { 2375 + u32 ntc = refillq->next_to_clean; 2376 + u32 refill_desc; 2377 + 2378 + refill_desc = refillq->ring[ntc]; 2379 + 2380 + if (unlikely(idpf_queue_has(RFL_GEN_CHK, refillq) != 2381 + !!(refill_desc & IDPF_RFL_BI_GEN_M))) 2382 + return false; 2383 + 2384 + *buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); 2385 + 2386 + if (unlikely(++ntc == refillq->desc_count)) { 2387 + idpf_queue_change(RFL_GEN_CHK, refillq); 2388 + ntc = 0; 2389 + } 2390 + 2391 + refillq->next_to_clean = ntc; 2392 + 2393 + return true; 2394 + } 2395 + 2396 + /** 2397 + * idpf_tx_splitq_pkt_err_unmap - Unmap buffers and bump tail in case of error 2398 + * @txq: Tx queue to unwind 2399 + * @params: pointer to splitq params struct 2400 + * @first: starting buffer for packet to unmap 2401 + */ 2402 + static void idpf_tx_splitq_pkt_err_unmap(struct idpf_tx_queue *txq, 2403 + struct idpf_tx_splitq_params *params, 2404 + struct idpf_tx_buf *first) 2405 + { 2406 + struct idpf_sw_queue *refillq = txq->refillq; 2407 + struct libeth_sq_napi_stats ss = { }; 2408 + struct idpf_tx_buf *tx_buf = first; 2409 + struct libeth_cq_pp cp = { 2410 + .dev = txq->dev, 2411 + .ss = &ss, 2412 + }; 2413 + 2414 + u64_stats_update_begin(&txq->stats_sync); 2415 + u64_stats_inc(&txq->q_stats.dma_map_errs); 2416 + u64_stats_update_end(&txq->stats_sync); 2417 + 2418 + libeth_tx_complete(tx_buf, &cp); 2419 + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { 2420 + tx_buf = &txq->tx_buf[idpf_tx_buf_next(tx_buf)]; 2421 + libeth_tx_complete(tx_buf, &cp); 2422 + } 2423 + 2424 + /* Update tail in case netdev_xmit_more was previously true. */ 2425 + idpf_tx_buf_hw_update(txq, params->prev_ntu, false); 2426 + 2427 + if (!refillq) 2428 + return; 2429 + 2430 + /* Restore refillq state to avoid leaking tags. */ 2431 + if (params->prev_refill_gen != idpf_queue_has(RFL_GEN_CHK, refillq)) 2432 + idpf_queue_change(RFL_GEN_CHK, refillq); 2433 + refillq->next_to_clean = params->prev_refill_ntc; 2072 2434 } 2073 2435 2074 2436 /** ··· 2161 2385 struct netdev_queue *nq; 2162 2386 struct sk_buff *skb; 2163 2387 skb_frag_t *frag; 2388 + u32 next_buf_id; 2164 2389 u16 td_cmd = 0; 2165 2390 dma_addr_t dma; 2166 2391 ··· 2179 2402 tx_buf = first; 2180 2403 first->nr_frags = 0; 2181 2404 2182 - params->compl_tag = 2183 - (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i; 2184 - 2185 2405 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 2186 2406 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; 2187 2407 2188 - if (dma_mapping_error(tx_q->dev, dma)) 2189 - return idpf_tx_dma_map_error(tx_q, skb, first, i); 2408 + if (unlikely(dma_mapping_error(tx_q->dev, dma))) { 2409 + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; 2410 + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, 2411 + first); 2412 + } 2190 2413 2191 2414 first->nr_frags++; 2192 - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; 2193 2415 tx_buf->type = LIBETH_SQE_FRAG; 2194 2416 2195 2417 /* record length, and DMA address */ ··· 2244 2468 max_data); 2245 2469 2246 2470 if (unlikely(++i == tx_q->desc_count)) { 2247 - tx_buf = tx_q->tx_buf; 2248 2471 tx_desc = &tx_q->flex_tx[0]; 2249 2472 i = 0; 2250 - tx_q->compl_tag_cur_gen = 2251 - IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); 2252 2473 } else { 2253 - tx_buf++; 2254 2474 tx_desc++; 2255 2475 } 2256 - 2257 - /* Since this packet has a buffer that is going to span 2258 - * multiple descriptors, it's going to leave holes in 2259 - * to the TX buffer ring. To ensure these holes do not 2260 - * cause issues in the cleaning routines, we will clear 2261 - * them of any stale data and assign them the same 2262 - * completion tag as the current packet. Then when the 2263 - * packet is being cleaned, the cleaning routines will 2264 - * simply pass over these holes and finish cleaning the 2265 - * rest of the packet. 2266 - */ 2267 - tx_buf->type = LIBETH_SQE_EMPTY; 2268 - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; 2269 2476 2270 2477 /* Adjust the DMA offset and the remaining size of the 2271 2478 * fragment. On the first iteration of this loop, ··· 2274 2515 idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); 2275 2516 2276 2517 if (unlikely(++i == tx_q->desc_count)) { 2277 - tx_buf = tx_q->tx_buf; 2278 2518 tx_desc = &tx_q->flex_tx[0]; 2279 2519 i = 0; 2280 - tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); 2281 2520 } else { 2282 - tx_buf++; 2283 2521 tx_desc++; 2284 2522 } 2523 + 2524 + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { 2525 + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, 2526 + &next_buf_id))) { 2527 + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; 2528 + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, 2529 + first); 2530 + } 2531 + } else { 2532 + next_buf_id = i; 2533 + } 2534 + idpf_tx_buf_next(tx_buf) = next_buf_id; 2535 + tx_buf = &tx_q->tx_buf[next_buf_id]; 2285 2536 2286 2537 size = skb_frag_size(frag); 2287 2538 data_len -= size; ··· 2307 2538 2308 2539 /* write last descriptor with RS and EOP bits */ 2309 2540 first->rs_idx = i; 2541 + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; 2310 2542 td_cmd |= params->eop_cmd; 2311 2543 idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); 2312 2544 i = idpf_tx_splitq_bump_ntu(tx_q, i); ··· 2516 2746 union idpf_flex_tx_ctx_desc *desc; 2517 2747 int i = txq->next_to_use; 2518 2748 2519 - txq->tx_buf[i].type = LIBETH_SQE_CTX; 2520 - 2521 2749 /* grab the next descriptor */ 2522 2750 desc = &txq->flex_ctx[i]; 2523 2751 txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); ··· 2609 2841 #endif /* CONFIG_PTP_1588_CLOCK */ 2610 2842 2611 2843 /** 2844 + * idpf_tx_splitq_need_re - check whether RE bit needs to be set 2845 + * @tx_q: pointer to Tx queue 2846 + * 2847 + * Return: true if RE bit needs to be set, false otherwise 2848 + */ 2849 + static bool idpf_tx_splitq_need_re(struct idpf_tx_queue *tx_q) 2850 + { 2851 + int gap = tx_q->next_to_use - tx_q->last_re; 2852 + 2853 + gap += (gap < 0) ? tx_q->desc_count : 0; 2854 + 2855 + return gap >= IDPF_TX_SPLITQ_RE_MIN_GAP; 2856 + } 2857 + 2858 + /** 2612 2859 * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors 2613 2860 * @skb: send buffer 2614 2861 * @tx_q: queue to send buffer on ··· 2633 2850 static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, 2634 2851 struct idpf_tx_queue *tx_q) 2635 2852 { 2636 - struct idpf_tx_splitq_params tx_params = { }; 2853 + struct idpf_tx_splitq_params tx_params = { 2854 + .prev_ntu = tx_q->next_to_use, 2855 + }; 2637 2856 union idpf_flex_tx_ctx_desc *ctx_desc; 2638 2857 struct idpf_tx_buf *first; 2639 - unsigned int count; 2858 + u32 count, buf_count = 1; 2640 2859 int tso, idx; 2860 + u32 buf_id; 2641 2861 2642 - count = idpf_tx_desc_count_required(tx_q, skb); 2862 + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); 2643 2863 if (unlikely(!count)) 2644 2864 return idpf_tx_drop_skb(tx_q, skb); 2645 2865 ··· 2652 2866 2653 2867 /* Check for splitq specific TX resources */ 2654 2868 count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso); 2655 - if (idpf_tx_maybe_stop_splitq(tx_q, count)) { 2869 + if (idpf_tx_maybe_stop_splitq(tx_q, count, buf_count)) { 2656 2870 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); 2657 2871 2658 2872 return NETDEV_TX_BUSY; ··· 2684 2898 idpf_tx_set_tstamp_desc(ctx_desc, idx); 2685 2899 } 2686 2900 2687 - /* record the location of the first descriptor for this packet */ 2688 - first = &tx_q->tx_buf[tx_q->next_to_use]; 2901 + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { 2902 + struct idpf_sw_queue *refillq = tx_q->refillq; 2903 + 2904 + /* Save refillq state in case of a packet rollback. Otherwise, 2905 + * the tags will be leaked since they will be popped from the 2906 + * refillq but never reposted during cleaning. 2907 + */ 2908 + tx_params.prev_refill_gen = 2909 + idpf_queue_has(RFL_GEN_CHK, refillq); 2910 + tx_params.prev_refill_ntc = refillq->next_to_clean; 2911 + 2912 + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, 2913 + &buf_id))) { 2914 + if (tx_params.prev_refill_gen != 2915 + idpf_queue_has(RFL_GEN_CHK, refillq)) 2916 + idpf_queue_change(RFL_GEN_CHK, refillq); 2917 + refillq->next_to_clean = tx_params.prev_refill_ntc; 2918 + 2919 + tx_q->next_to_use = tx_params.prev_ntu; 2920 + return idpf_tx_drop_skb(tx_q, skb); 2921 + } 2922 + tx_params.compl_tag = buf_id; 2923 + 2924 + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; 2925 + tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; 2926 + /* Set the RE bit to periodically "clean" the descriptor ring. 2927 + * MIN_GAP is set to MIN_RING size to ensure it will be set at 2928 + * least once each time around the ring. 2929 + */ 2930 + if (idpf_tx_splitq_need_re(tx_q)) { 2931 + tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; 2932 + tx_q->txq_grp->num_completions_pending++; 2933 + tx_q->last_re = tx_q->next_to_use; 2934 + } 2935 + 2936 + if (skb->ip_summed == CHECKSUM_PARTIAL) 2937 + tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; 2938 + 2939 + } else { 2940 + buf_id = tx_q->next_to_use; 2941 + 2942 + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; 2943 + tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; 2944 + 2945 + if (skb->ip_summed == CHECKSUM_PARTIAL) 2946 + tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; 2947 + } 2948 + 2949 + first = &tx_q->tx_buf[buf_id]; 2689 2950 first->skb = skb; 2690 2951 2691 2952 if (tso) { ··· 2742 2909 } else { 2743 2910 first->packets = 1; 2744 2911 first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); 2745 - } 2746 - 2747 - if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { 2748 - tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; 2749 - tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; 2750 - /* Set the RE bit to catch any packets that may have not been 2751 - * stashed during RS completion cleaning. MIN_GAP is set to 2752 - * MIN_RING size to ensure it will be set at least once each 2753 - * time around the ring. 2754 - */ 2755 - if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) { 2756 - tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; 2757 - tx_q->txq_grp->num_completions_pending++; 2758 - } 2759 - 2760 - if (skb->ip_summed == CHECKSUM_PARTIAL) 2761 - tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; 2762 - 2763 - } else { 2764 - tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; 2765 - tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; 2766 - 2767 - if (skb->ip_summed == CHECKSUM_PARTIAL) 2768 - tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; 2769 2912 } 2770 2913 2771 2914 idpf_tx_splitq_map(tx_q, &tx_params, first); ··· 3281 3472 skip_data: 3282 3473 rx_buf->netmem = 0; 3283 3474 3284 - idpf_rx_post_buf_refill(refillq, buf_id); 3475 + idpf_post_buf_refill(refillq, buf_id); 3285 3476 IDPF_RX_BUMP_NTC(rxq, ntc); 3286 3477 3287 3478 /* skip if it is non EOP desc */ ··· 3389 3580 bool failure; 3390 3581 3391 3582 if (idpf_queue_has(RFL_GEN_CHK, refillq) != 3392 - !!(refill_desc & IDPF_RX_BI_GEN_M)) 3583 + !!(refill_desc & IDPF_RFL_BI_GEN_M)) 3393 3584 break; 3394 3585 3395 - buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); 3586 + buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); 3396 3587 failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc); 3397 3588 if (failure) 3398 3589 break;
+33 -54
drivers/net/ethernet/intel/idpf/idpf_txrx.h
··· 108 108 */ 109 109 #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 110 110 111 - #define IDPF_RX_BI_GEN_M BIT(16) 112 - #define IDPF_RX_BI_BUFID_M GENMASK(15, 0) 111 + #define IDPF_RFL_BI_GEN_M BIT(16) 112 + #define IDPF_RFL_BI_BUFID_M GENMASK(15, 0) 113 113 114 114 #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M 115 115 #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M ··· 117 117 #define IDPF_DESC_UNUSED(txq) \ 118 118 ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ 119 119 (txq)->next_to_clean - (txq)->next_to_use - 1) 120 - 121 - #define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->stash->buf_stack.top) 122 - #define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ 123 - (txq)->desc_count >> 2) 124 120 125 121 #define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) 126 122 /* Determine the absolute number of completions pending, i.e. the number of ··· 127 131 0 : U32_MAX) + \ 128 132 (txq)->num_completions_pending - (txq)->complq->num_completions) 129 133 130 - #define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16 131 - /* Adjust the generation for the completion tag and wrap if necessary */ 132 - #define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \ 133 - ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ 134 - 0 : (txq)->compl_tag_cur_gen) 134 + #define IDPF_TXBUF_NULL U32_MAX 135 135 136 136 #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) 137 137 ··· 143 151 }; 144 152 145 153 #define idpf_tx_buf libeth_sqe 146 - 147 - /** 148 - * struct idpf_buf_lifo - LIFO for managing OOO completions 149 - * @top: Used to know how many buffers are left 150 - * @size: Total size of LIFO 151 - * @bufs: Backing array 152 - */ 153 - struct idpf_buf_lifo { 154 - u16 top; 155 - u16 size; 156 - struct idpf_tx_stash **bufs; 157 - }; 158 154 159 155 /** 160 156 * struct idpf_tx_offload_params - Offload parameters for a given packet ··· 176 196 * @compl_tag: Associated tag for completion 177 197 * @td_tag: Descriptor tunneling tag 178 198 * @offload: Offload parameters 199 + * @prev_ntu: stored TxQ next_to_use in case of rollback 200 + * @prev_refill_ntc: stored refillq next_to_clean in case of packet rollback 201 + * @prev_refill_gen: stored refillq generation bit in case of packet rollback 179 202 */ 180 203 struct idpf_tx_splitq_params { 181 204 enum idpf_tx_desc_dtype_value dtype; ··· 189 206 }; 190 207 191 208 struct idpf_tx_offload_params offload; 209 + 210 + u16 prev_ntu; 211 + u16 prev_refill_ntc; 212 + bool prev_refill_gen; 192 213 }; 193 214 194 215 enum idpf_tx_ctx_desc_eipt_offload { ··· 455 468 #define IDPF_DIM_DEFAULT_PROFILE_IX 1 456 469 457 470 /** 458 - * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode 459 - * @buf_stack: Stack of empty buffers to store buffer info for out of order 460 - * buffer completions. See struct idpf_buf_lifo 461 - * @sched_buf_hash: Hash table to store buffers 462 - */ 463 - struct idpf_txq_stash { 464 - struct idpf_buf_lifo buf_stack; 465 - DECLARE_HASHTABLE(sched_buf_hash, 12); 466 - } ____cacheline_aligned; 467 - 468 - /** 469 471 * struct idpf_rx_queue - software structure representing a receive queue 470 472 * @rx: universal receive descriptor array 471 473 * @single_buf: buffer descriptor array in singleq ··· 586 610 * @netdev: &net_device corresponding to this queue 587 611 * @next_to_use: Next descriptor to use 588 612 * @next_to_clean: Next descriptor to clean 613 + * @last_re: last descriptor index that RE bit was set 614 + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather 589 615 * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on 590 616 * the TX completion queue, it can be for any TXQ associated 591 617 * with that completion queue. This means we can clean up to ··· 598 620 * only once at the end of the cleaning routine. 599 621 * @clean_budget: singleq only, queue cleaning budget 600 622 * @cleaned_pkts: Number of packets cleaned for the above said case 601 - * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather 602 - * @stash: Tx buffer stash for Flow-based scheduling mode 603 - * @compl_tag_bufid_m: Completion tag buffer id mask 604 - * @compl_tag_cur_gen: Used to keep track of current completion tag generation 605 - * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset 623 + * @refillq: Pointer to refill queue 606 624 * @cached_tstamp_caps: Tx timestamp capabilities negotiated with the CP 607 625 * @tstamp_task: Work that handles Tx timestamp read 608 626 * @stats_sync: See struct u64_stats_sync ··· 607 633 * @size: Length of descriptor ring in bytes 608 634 * @dma: Physical address of ring 609 635 * @q_vector: Backreference to associated vector 636 + * @buf_pool_size: Total number of idpf_tx_buf 610 637 */ 611 638 struct idpf_tx_queue { 612 639 __cacheline_group_begin_aligned(read_mostly); ··· 629 654 u16 desc_count; 630 655 631 656 u16 tx_min_pkt_len; 632 - u16 compl_tag_gen_s; 633 657 634 658 struct net_device *netdev; 635 659 __cacheline_group_end_aligned(read_mostly); ··· 636 662 __cacheline_group_begin_aligned(read_write); 637 663 u16 next_to_use; 638 664 u16 next_to_clean; 665 + u16 last_re; 666 + u16 tx_max_bufs; 639 667 640 668 union { 641 669 u32 cleaned_bytes; ··· 645 669 }; 646 670 u16 cleaned_pkts; 647 671 648 - u16 tx_max_bufs; 649 - struct idpf_txq_stash *stash; 650 - 651 - u16 compl_tag_bufid_m; 652 - u16 compl_tag_cur_gen; 653 - u16 compl_tag_gen_max; 672 + struct idpf_sw_queue *refillq; 654 673 655 674 struct idpf_ptp_vport_tx_tstamp_caps *cached_tstamp_caps; 656 675 struct work_struct *tstamp_task; ··· 660 689 dma_addr_t dma; 661 690 662 691 struct idpf_q_vector *q_vector; 692 + u32 buf_pool_size; 663 693 __cacheline_group_end_aligned(cold); 664 694 }; 665 695 libeth_cacheline_set_assert(struct idpf_tx_queue, 64, 666 - 112 + sizeof(struct u64_stats_sync), 667 - 24); 696 + 104 + sizeof(struct u64_stats_sync), 697 + 32); 668 698 669 699 /** 670 700 * struct idpf_buf_queue - software structure representing a buffer queue ··· 875 903 * @vport: Vport back pointer 876 904 * @num_txq: Number of TX queues associated 877 905 * @txqs: Array of TX queue pointers 878 - * @stashes: array of OOO stashes for the queues 879 906 * @complq: Associated completion queue pointer, split queue only 880 907 * @num_completions_pending: Total number of completions pending for the 881 908 * completion queue, acculumated for all TX queues ··· 889 918 890 919 u16 num_txq; 891 920 struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q]; 892 - struct idpf_txq_stash *stashes; 893 921 894 922 struct idpf_compl_queue *complq; 895 923 ··· 981 1011 reg->dyn_ctl); 982 1012 } 983 1013 1014 + /** 1015 + * idpf_tx_splitq_get_free_bufs - get number of free buf_ids in refillq 1016 + * @refillq: pointer to refillq containing buf_ids 1017 + */ 1018 + static inline u32 idpf_tx_splitq_get_free_bufs(struct idpf_sw_queue *refillq) 1019 + { 1020 + return (refillq->next_to_use > refillq->next_to_clean ? 1021 + 0 : refillq->desc_count) + 1022 + refillq->next_to_use - refillq->next_to_clean - 1; 1023 + } 1024 + 984 1025 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); 985 1026 void idpf_vport_init_num_qs(struct idpf_vport *vport, 986 1027 struct virtchnl2_create_vport *vport_msg); ··· 1019 1038 bool xmit_more); 1020 1039 unsigned int idpf_size_to_txd_count(unsigned int size); 1021 1040 netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); 1022 - void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, 1023 - struct idpf_tx_buf *first, u16 ring_idx); 1024 - unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, 1025 - struct sk_buff *skb); 1041 + unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, 1042 + struct sk_buff *skb, u32 *buf_count); 1026 1043 void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); 1027 1044 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, 1028 1045 struct idpf_tx_queue *tx_q);