Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-fec-improve-xdp-copy-mode-and-add-af_xdp-zero-copy-support'

Wei Fang says:

====================
net: fec: improve XDP copy mode and add AF_XDP zero-copy support

This patch set optimizes the XDP copy mode logic as follows.

1. Separate the processing of RX XDP frames from fec_enet_rx_queue(),
and adds a separate function fec_enet_rx_queue_xdp() for handling XDP
frames.

2. For TX XDP packets, using the batch sending method to avoid frequent
MMIO writes.

3. Use the switch statement to check the tx_buf type instead of the
if...else... statement, making the cleanup logic of TX BD ring cleared
and more efficient.

We compared the performance of XDP copy mode before and after applying
this patch set, and the results show that the performance has improved.

Before applying this patch set.
root@imx93evk:~# ./xdp-bench tx eth0
Summary 396,868 rx/s 0 err,drop/s
Summary 396,024 rx/s 0 err,drop/s

root@imx93evk:~# ./xdp-bench drop eth0
Summary 684,781 rx/s 0 err/s
Summary 675,746 rx/s 0 err/s

root@imx93evk:~# ./xdp-bench pass eth0
Summary 208,552 rx/s 0 err,drop/s
Summary 208,654 rx/s 0 err,drop/s

root@imx93evk:~# ./xdp-bench redirect eth0 eth0
eth0->eth0 311,210 rx/s 0 err,drop/s 311,208 xmit/s
eth0->eth0 310,808 rx/s 0 err,drop/s 310,809 xmit/s

After applying this patch set.
root@imx93evk:~# ./xdp-bench tx eth0
Summary 425,778 rx/s 0 err,drop/s
Summary 426,042 rx/s 0 err,drop/s

root@imx93evk:~# ./xdp-bench drop eth0
Summary 698,351 rx/s 0 err/s
Summary 701,882 rx/s 0 err/s

root@imx93evk:~# ./xdp-bench pass eth0
Summary 210,348 rx/s 0 err,drop/s
Summary 210,016 rx/s 0 err,drop/s

root@imx93evk:~# ./xdp-bench redirect eth0 eth0
eth0->eth0 354,407 rx/s 0 err,drop/s 354,401 xmit/s
eth0->eth0 350,381 rx/s 0 err,drop/s 350,389 xmit/s

This patch set also addes the AF_XDP zero-copy support, and we tested
the performance on i.MX93 platform with xdpsock tool. The following is
the performance comparison of copy mode and zero-copy mode. It can be
seen that the performance of zero-copy mode is better than that of copy
mode.

1. MAC swap L2 forwarding
1.1 Zero-copy mode
root@imx93evk:~# ./xdpsock -i eth0 -l -z
sock0@eth0:0 l2fwd xdp-drv
pps pkts 1.00
rx 414715 415455
tx 414715 415455

1.2 Copy mode
root@imx93evk:~# ./xdpsock -i eth0 -l -c
sock0@eth0:0 l2fwd xdp-drv
pps pkts 1.00
rx 356396 356609
tx 356396 356609

2. TX only
2.1 Zero-copy mode
root@imx93evk:~# ./xdpsock -i eth0 -t -s 64 -z
sock0@eth0:0 txonly xdp-drv
pps pkts 1.00
rx 0 0
tx 1119573 1126720

2.2 Copy mode
root@imx93evk:~# ./xdpsock -i eth0 -t -s 64 -c
sock0@eth0:0 txonly xdp-drv
pps pkts 1.00
rx 0 0
tx 406864 407616
====================

Link: https://patch.msgid.link/20260205085742.2685134-1-wei.fang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+1247 -392
+13 -1
drivers/net/ethernet/freescale/fec.h
··· 340 340 #define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) 341 341 #define TX_RING_SIZE 1024 /* Must be power of two */ 342 342 #define TX_RING_MOD_MASK 511 /* for this to work */ 343 + #define FEC_XSK_TX_BUDGET_MAX 256 343 344 344 345 #define BD_ENET_RX_INT 0x00800000 345 346 #define BD_ENET_RX_PTP ((ushort)0x0400) ··· 529 528 FEC_TXBUF_T_SKB, 530 529 FEC_TXBUF_T_XDP_NDO, 531 530 FEC_TXBUF_T_XDP_TX, 531 + FEC_TXBUF_T_XSK_XMIT, 532 + FEC_TXBUF_T_XSK_TX, 532 533 }; 533 534 534 535 struct fec_tx_buffer { ··· 542 539 struct bufdesc_prop bd; 543 540 unsigned char *tx_bounce[TX_RING_SIZE]; 544 541 struct fec_tx_buffer tx_buf[TX_RING_SIZE]; 542 + struct xsk_buff_pool *xsk_pool; 545 543 546 544 unsigned short tx_stop_threshold; 547 545 unsigned short tx_wake_threshold; ··· 552 548 dma_addr_t tso_hdrs_dma; 553 549 }; 554 550 551 + union fec_rx_buffer { 552 + void *buf_p; 553 + struct page *page; 554 + struct xdp_buff *xdp; 555 + }; 556 + 555 557 struct fec_enet_priv_rx_q { 556 558 struct bufdesc_prop bd; 557 - struct page *rx_buf[RX_RING_SIZE]; 559 + union fec_rx_buffer rx_buf[RX_RING_SIZE]; 560 + struct xsk_buff_pool *xsk_pool; 558 561 559 562 /* page_pool */ 560 563 struct page_pool *page_pool; ··· 654 643 struct pm_qos_request pm_qos_req; 655 644 656 645 unsigned int tx_align; 646 + unsigned int rx_shift; 657 647 658 648 /* hw interrupt coalesce */ 659 649 unsigned int rx_pkts_itr;
+1234 -391
drivers/net/ethernet/freescale/fec_main.c
··· 71 71 #include <net/page_pool/helpers.h> 72 72 #include <net/selftests.h> 73 73 #include <net/tso.h> 74 + #include <net/xdp_sock_drv.h> 74 75 #include <soc/imx/cpuidle.h> 75 76 76 77 #include "fec.h" ··· 80 79 static void fec_enet_itr_coal_set(struct net_device *ndev); 81 80 static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep, 82 81 int cpu, struct xdp_buff *xdp, 83 - u32 dma_sync_len); 82 + u32 dma_sync_len, int queue); 84 83 85 84 #define DRIVER_NAME "fec" 86 85 ··· 468 467 469 468 static int 470 469 fec_enet_create_page_pool(struct fec_enet_private *fep, 471 - struct fec_enet_priv_rx_q *rxq, int size) 470 + struct fec_enet_priv_rx_q *rxq) 472 471 { 473 472 struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); 474 473 struct page_pool_params pp_params = { 475 474 .order = fep->pagepool_order, 476 475 .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, 477 - .pool_size = size, 476 + .pool_size = rxq->bd.ring_size, 478 477 .nid = dev_to_node(&fep->pdev->dev), 479 478 .dev = &fep->pdev->dev, 480 479 .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, ··· 490 489 return err; 491 490 } 492 491 493 - err = xdp_rxq_info_reg(&rxq->xdp_rxq, fep->netdev, rxq->id, 0); 494 - if (err < 0) 495 - goto err_free_pp; 496 - 497 - err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, 498 - rxq->page_pool); 499 - if (err) 500 - goto err_unregister_rxq; 501 - 502 492 return 0; 493 + } 503 494 504 - err_unregister_rxq: 505 - xdp_rxq_info_unreg(&rxq->xdp_rxq); 506 - err_free_pp: 507 - page_pool_destroy(rxq->page_pool); 508 - rxq->page_pool = NULL; 509 - return err; 495 + static void fec_txq_trigger_xmit(struct fec_enet_private *fep, 496 + struct fec_enet_priv_tx_q *txq) 497 + { 498 + if (!(fep->quirks & FEC_QUIRK_ERR007885) || 499 + !readl(txq->bd.reg_desc_active) || 500 + !readl(txq->bd.reg_desc_active) || 501 + !readl(txq->bd.reg_desc_active) || 502 + !readl(txq->bd.reg_desc_active)) 503 + writel(0, txq->bd.reg_desc_active); 510 504 } 511 505 512 506 static struct bufdesc * ··· 713 717 txq->bd.cur = bdp; 714 718 715 719 /* Trigger transmission start */ 716 - if (!(fep->quirks & FEC_QUIRK_ERR007885) || 717 - !readl(txq->bd.reg_desc_active) || 718 - !readl(txq->bd.reg_desc_active) || 719 - !readl(txq->bd.reg_desc_active) || 720 - !readl(txq->bd.reg_desc_active)) 721 - writel(0, txq->bd.reg_desc_active); 720 + fec_txq_trigger_xmit(fep, txq); 722 721 723 722 return 0; 724 723 } ··· 904 913 txq->bd.cur = bdp; 905 914 906 915 /* Trigger transmission start */ 907 - if (!(fep->quirks & FEC_QUIRK_ERR007885) || 908 - !readl(txq->bd.reg_desc_active) || 909 - !readl(txq->bd.reg_desc_active) || 910 - !readl(txq->bd.reg_desc_active) || 911 - !readl(txq->bd.reg_desc_active)) 912 - writel(0, txq->bd.reg_desc_active); 916 + fec_txq_trigger_xmit(fep, txq); 913 917 914 918 return 0; 915 919 ··· 991 1005 bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY); 992 1006 else 993 1007 bdp->cbd_sc = cpu_to_fec16(0); 1008 + 1009 + if (fep->bufdesc_ex) { 1010 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 1011 + 1012 + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); 1013 + } 1014 + 994 1015 bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); 995 1016 } 996 1017 ··· 1015 1022 txq->bd.cur = bdp; 1016 1023 1017 1024 for (i = 0; i < txq->bd.ring_size; i++) { 1025 + struct page *page; 1026 + 1018 1027 /* Initialize the BD for every fragment in the page. */ 1019 1028 bdp->cbd_sc = cpu_to_fec16(0); 1020 - if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { 1029 + 1030 + switch (txq->tx_buf[i].type) { 1031 + case FEC_TXBUF_T_SKB: 1021 1032 if (bdp->cbd_bufaddr && 1022 1033 !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) 1023 1034 dma_unmap_single(&fep->pdev->dev, 1024 1035 fec32_to_cpu(bdp->cbd_bufaddr), 1025 1036 fec16_to_cpu(bdp->cbd_datlen), 1026 1037 DMA_TO_DEVICE); 1027 - if (txq->tx_buf[i].buf_p) 1028 - dev_kfree_skb_any(txq->tx_buf[i].buf_p); 1029 - } else if (txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO) { 1030 - if (bdp->cbd_bufaddr) 1031 - dma_unmap_single(&fep->pdev->dev, 1032 - fec32_to_cpu(bdp->cbd_bufaddr), 1033 - fec16_to_cpu(bdp->cbd_datlen), 1034 - DMA_TO_DEVICE); 1035 - 1036 - if (txq->tx_buf[i].buf_p) 1037 - xdp_return_frame(txq->tx_buf[i].buf_p); 1038 - } else { 1039 - struct page *page = txq->tx_buf[i].buf_p; 1040 - 1041 - if (page) 1042 - page_pool_put_page(pp_page_to_nmdesc(page)->pp, 1043 - page, 0, 1044 - false); 1038 + dev_kfree_skb_any(txq->tx_buf[i].buf_p); 1039 + break; 1040 + case FEC_TXBUF_T_XDP_NDO: 1041 + dma_unmap_single(&fep->pdev->dev, 1042 + fec32_to_cpu(bdp->cbd_bufaddr), 1043 + fec16_to_cpu(bdp->cbd_datlen), 1044 + DMA_TO_DEVICE); 1045 + xdp_return_frame(txq->tx_buf[i].buf_p); 1046 + break; 1047 + case FEC_TXBUF_T_XDP_TX: 1048 + page = txq->tx_buf[i].buf_p; 1049 + page_pool_put_page(pp_page_to_nmdesc(page)->pp, 1050 + page, 0, false); 1051 + break; 1052 + case FEC_TXBUF_T_XSK_TX: 1053 + xsk_buff_free(txq->tx_buf[i].buf_p); 1054 + break; 1055 + default: 1056 + break; 1045 1057 } 1046 1058 1047 1059 txq->tx_buf[i].buf_p = NULL; ··· 1479 1481 hwtstamps->hwtstamp = ns_to_ktime(ns); 1480 1482 } 1481 1483 1482 - static void 1483 - fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget) 1484 + static bool fec_enet_xsk_xmit(struct fec_enet_private *fep, 1485 + struct xsk_buff_pool *pool, 1486 + u32 queue) 1484 1487 { 1485 - struct fec_enet_private *fep; 1486 - struct xdp_frame *xdpf; 1487 - struct bufdesc *bdp; 1488 - unsigned short status; 1489 - struct sk_buff *skb; 1490 - struct fec_enet_priv_tx_q *txq; 1488 + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; 1489 + struct xdp_desc *xsk_desc = pool->tx_descs; 1490 + int cpu = smp_processor_id(); 1491 + int free_bds, budget, batch; 1491 1492 struct netdev_queue *nq; 1492 - int index = 0; 1493 - int entries_free; 1493 + struct bufdesc *bdp; 1494 + dma_addr_t dma; 1495 + u32 estatus; 1496 + u16 status; 1497 + int i, j; 1498 + 1499 + nq = netdev_get_tx_queue(fep->netdev, queue); 1500 + __netif_tx_lock(nq, cpu); 1501 + 1502 + txq_trans_cond_update(nq); 1503 + free_bds = fec_enet_get_free_txdesc_num(txq); 1504 + if (!free_bds) 1505 + goto tx_unlock; 1506 + 1507 + budget = min(free_bds, FEC_XSK_TX_BUDGET_MAX); 1508 + batch = xsk_tx_peek_release_desc_batch(pool, budget); 1509 + if (!batch) 1510 + goto tx_unlock; 1511 + 1512 + bdp = txq->bd.cur; 1513 + for (i = 0; i < batch; i++) { 1514 + dma = xsk_buff_raw_get_dma(pool, xsk_desc[i].addr); 1515 + xsk_buff_raw_dma_sync_for_device(pool, dma, xsk_desc[i].len); 1516 + 1517 + j = fec_enet_get_bd_index(bdp, &txq->bd); 1518 + txq->tx_buf[j].type = FEC_TXBUF_T_XSK_XMIT; 1519 + txq->tx_buf[j].buf_p = NULL; 1520 + 1521 + status = fec16_to_cpu(bdp->cbd_sc); 1522 + status &= ~BD_ENET_TX_STATS; 1523 + status |= BD_ENET_TX_INTR | BD_ENET_TX_LAST; 1524 + bdp->cbd_datlen = cpu_to_fec16(xsk_desc[i].len); 1525 + bdp->cbd_bufaddr = cpu_to_fec32(dma); 1526 + 1527 + if (fep->bufdesc_ex) { 1528 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 1529 + 1530 + estatus = BD_ENET_TX_INT; 1531 + if (fep->quirks & FEC_QUIRK_HAS_AVB) 1532 + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); 1533 + 1534 + ebdp->cbd_bdu = 0; 1535 + ebdp->cbd_esc = cpu_to_fec32(estatus); 1536 + } 1537 + 1538 + /* Make sure the updates to rest of the descriptor are performed 1539 + * before transferring ownership. 1540 + */ 1541 + dma_wmb(); 1542 + 1543 + /* Send it on its way. Tell FEC it's ready, interrupt when done, 1544 + * it's the last BD of the frame, and to put the CRC on the end. 1545 + */ 1546 + status |= BD_ENET_TX_READY | BD_ENET_TX_TC; 1547 + bdp->cbd_sc = cpu_to_fec16(status); 1548 + dma_wmb(); 1549 + 1550 + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); 1551 + txq->bd.cur = bdp; 1552 + } 1553 + 1554 + /* Trigger transmission start */ 1555 + fec_txq_trigger_xmit(fep, txq); 1556 + 1557 + __netif_tx_unlock(nq); 1558 + 1559 + return batch < budget; 1560 + 1561 + tx_unlock: 1562 + __netif_tx_unlock(nq); 1563 + 1564 + return true; 1565 + } 1566 + 1567 + static int fec_enet_tx_queue(struct fec_enet_private *fep, 1568 + u16 queue, int budget) 1569 + { 1570 + struct netdev_queue *nq = netdev_get_tx_queue(fep->netdev, queue); 1571 + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; 1572 + struct net_device *ndev = fep->netdev; 1573 + struct bufdesc *bdp = txq->dirty_tx; 1574 + int index, frame_len, entries_free; 1575 + struct fec_tx_buffer *tx_buf; 1576 + unsigned short status; 1577 + struct sk_buff *skb; 1494 1578 struct page *page; 1495 - int frame_len; 1496 - 1497 - fep = netdev_priv(ndev); 1498 - 1499 - txq = fep->tx_queue[queue_id]; 1500 - /* get next bdp of dirty_tx */ 1501 - nq = netdev_get_tx_queue(ndev, queue_id); 1502 - bdp = txq->dirty_tx; 1579 + int xsk_cnt = 0; 1503 1580 1504 1581 /* get next bdp of dirty_tx */ 1505 1582 bdp = fec_enet_get_nextdesc(bdp, &txq->bd); ··· 1587 1514 break; 1588 1515 1589 1516 index = fec_enet_get_bd_index(bdp, &txq->bd); 1517 + tx_buf = &txq->tx_buf[index]; 1518 + frame_len = fec16_to_cpu(bdp->cbd_datlen); 1590 1519 1591 - if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { 1592 - skb = txq->tx_buf[index].buf_p; 1520 + switch (tx_buf->type) { 1521 + case FEC_TXBUF_T_SKB: 1593 1522 if (bdp->cbd_bufaddr && 1594 1523 !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) 1595 1524 dma_unmap_single(&fep->pdev->dev, 1596 1525 fec32_to_cpu(bdp->cbd_bufaddr), 1597 - fec16_to_cpu(bdp->cbd_datlen), 1598 - DMA_TO_DEVICE); 1526 + frame_len, DMA_TO_DEVICE); 1527 + 1599 1528 bdp->cbd_bufaddr = cpu_to_fec32(0); 1529 + skb = tx_buf->buf_p; 1600 1530 if (!skb) 1601 1531 goto tx_buf_done; 1602 - } else { 1532 + 1533 + frame_len = skb->len; 1534 + 1535 + /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who 1536 + * are to time stamp the packet, so we still need to check time 1537 + * stamping enabled flag. 1538 + */ 1539 + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && 1540 + fep->hwts_tx_en) && fep->bufdesc_ex) { 1541 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 1542 + struct skb_shared_hwtstamps shhwtstamps; 1543 + 1544 + fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); 1545 + skb_tstamp_tx(skb, &shhwtstamps); 1546 + } 1547 + 1548 + /* Free the sk buffer associated with this last transmit */ 1549 + napi_consume_skb(skb, budget); 1550 + break; 1551 + case FEC_TXBUF_T_XDP_NDO: 1603 1552 /* Tx processing cannot call any XDP (or page pool) APIs if 1604 1553 * the "budget" is 0. Because NAPI is called with budget of 1605 1554 * 0 (such as netpoll) indicates we may be in an IRQ context, 1606 1555 * however, we can't use the page pool from IRQ context. 1607 1556 */ 1608 1557 if (unlikely(!budget)) 1609 - break; 1558 + goto out; 1610 1559 1611 - if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO) { 1612 - xdpf = txq->tx_buf[index].buf_p; 1613 - if (bdp->cbd_bufaddr) 1614 - dma_unmap_single(&fep->pdev->dev, 1615 - fec32_to_cpu(bdp->cbd_bufaddr), 1616 - fec16_to_cpu(bdp->cbd_datlen), 1617 - DMA_TO_DEVICE); 1618 - } else { 1619 - page = txq->tx_buf[index].buf_p; 1620 - } 1560 + dma_unmap_single(&fep->pdev->dev, 1561 + fec32_to_cpu(bdp->cbd_bufaddr), 1562 + frame_len, DMA_TO_DEVICE); 1563 + bdp->cbd_bufaddr = cpu_to_fec32(0); 1564 + xdp_return_frame_rx_napi(tx_buf->buf_p); 1565 + break; 1566 + case FEC_TXBUF_T_XDP_TX: 1567 + if (unlikely(!budget)) 1568 + goto out; 1621 1569 1622 1570 bdp->cbd_bufaddr = cpu_to_fec32(0); 1623 - if (unlikely(!txq->tx_buf[index].buf_p)) { 1624 - txq->tx_buf[index].type = FEC_TXBUF_T_SKB; 1625 - goto tx_buf_done; 1626 - } 1627 - 1628 - frame_len = fec16_to_cpu(bdp->cbd_datlen); 1571 + page = tx_buf->buf_p; 1572 + /* The dma_sync_size = 0 as XDP_TX has already synced 1573 + * DMA for_device 1574 + */ 1575 + page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, 1576 + 0, true); 1577 + break; 1578 + case FEC_TXBUF_T_XSK_XMIT: 1579 + bdp->cbd_bufaddr = cpu_to_fec32(0); 1580 + xsk_cnt++; 1581 + break; 1582 + case FEC_TXBUF_T_XSK_TX: 1583 + bdp->cbd_bufaddr = cpu_to_fec32(0); 1584 + xsk_buff_free(tx_buf->buf_p); 1585 + break; 1586 + default: 1587 + break; 1629 1588 } 1630 1589 1631 1590 /* Check for errors. */ ··· 1677 1572 ndev->stats.tx_carrier_errors++; 1678 1573 } else { 1679 1574 ndev->stats.tx_packets++; 1680 - 1681 - if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) 1682 - ndev->stats.tx_bytes += skb->len; 1683 - else 1684 - ndev->stats.tx_bytes += frame_len; 1575 + ndev->stats.tx_bytes += frame_len; 1685 1576 } 1686 1577 1687 1578 /* Deferred means some collisions occurred during transmit, ··· 1686 1585 if (status & BD_ENET_TX_DEF) 1687 1586 ndev->stats.collisions++; 1688 1587 1689 - if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { 1690 - /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who 1691 - * are to time stamp the packet, so we still need to check time 1692 - * stamping enabled flag. 1693 - */ 1694 - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && 1695 - fep->hwts_tx_en) && fep->bufdesc_ex) { 1696 - struct skb_shared_hwtstamps shhwtstamps; 1697 - struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 1698 - 1699 - fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); 1700 - skb_tstamp_tx(skb, &shhwtstamps); 1701 - } 1702 - 1703 - /* Free the sk buffer associated with this last transmit */ 1704 - napi_consume_skb(skb, budget); 1705 - } else if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO) { 1706 - xdp_return_frame_rx_napi(xdpf); 1707 - } else { /* recycle pages of XDP_TX frames */ 1708 - /* The dma_sync_size = 0 as XDP_TX has already synced DMA for_device */ 1709 - page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, 1710 - 0, true); 1711 - } 1712 - 1713 - txq->tx_buf[index].buf_p = NULL; 1588 + tx_buf->buf_p = NULL; 1714 1589 /* restore default tx buffer type: FEC_TXBUF_T_SKB */ 1715 - txq->tx_buf[index].type = FEC_TXBUF_T_SKB; 1590 + tx_buf->type = FEC_TXBUF_T_SKB; 1716 1591 1717 1592 tx_buf_done: 1718 1593 /* Make sure the update to bdp and tx_buf are performed ··· 1709 1632 } 1710 1633 } 1711 1634 1635 + out: 1636 + 1712 1637 /* ERR006358: Keep the transmitter going */ 1713 1638 if (bdp != txq->bd.cur && 1714 1639 readl(txq->bd.reg_desc_active) == 0) 1715 1640 writel(0, txq->bd.reg_desc_active); 1641 + 1642 + if (txq->xsk_pool) { 1643 + struct xsk_buff_pool *pool = txq->xsk_pool; 1644 + 1645 + if (xsk_cnt) 1646 + xsk_tx_completed(pool, xsk_cnt); 1647 + 1648 + if (xsk_uses_need_wakeup(pool)) 1649 + xsk_set_tx_need_wakeup(pool); 1650 + 1651 + /* If the condition is true, it indicates that there are still 1652 + * packets to be transmitted, so return "budget" to make the 1653 + * NAPI continue polling. 1654 + */ 1655 + if (!fec_enet_xsk_xmit(fep, pool, queue)) 1656 + return budget; 1657 + } 1658 + 1659 + return 0; 1716 1660 } 1717 1661 1718 - static void fec_enet_tx(struct net_device *ndev, int budget) 1662 + static int fec_enet_tx(struct net_device *ndev, int budget) 1719 1663 { 1720 1664 struct fec_enet_private *fep = netdev_priv(ndev); 1721 - int i; 1665 + int i, count = 0; 1722 1666 1723 1667 /* Make sure that AVB queues are processed first. */ 1724 1668 for (i = fep->num_tx_queues - 1; i >= 0; i--) 1725 - fec_enet_tx_queue(ndev, i, budget); 1669 + count += fec_enet_tx_queue(fep, i, budget); 1670 + 1671 + return count; 1726 1672 } 1727 1673 1728 1674 static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, ··· 1758 1658 if (unlikely(!new_page)) 1759 1659 return -ENOMEM; 1760 1660 1761 - rxq->rx_buf[index] = new_page; 1661 + rxq->rx_buf[index].page = new_page; 1762 1662 phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM; 1763 1663 bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); 1764 1664 1765 1665 return 0; 1766 1666 } 1767 1667 1768 - static u32 1769 - fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog, 1770 - struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu) 1668 + static int fec_enet_update_cbd_zc(struct fec_enet_priv_rx_q *rxq, 1669 + struct bufdesc *bdp, int index) 1771 1670 { 1772 - unsigned int sync, len = xdp->data_end - xdp->data; 1773 - u32 ret = FEC_ENET_XDP_PASS; 1774 - struct page *page; 1775 - int err; 1776 - u32 act; 1671 + struct xdp_buff *new_xdp; 1672 + dma_addr_t phys_addr; 1777 1673 1778 - act = bpf_prog_run_xdp(prog, xdp); 1674 + new_xdp = xsk_buff_alloc(rxq->xsk_pool); 1675 + if (unlikely(!new_xdp)) 1676 + return -ENOMEM; 1779 1677 1780 - /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover 1781 - * max len CPU touch 1782 - */ 1783 - sync = xdp->data_end - xdp->data; 1784 - sync = max(sync, len); 1678 + rxq->rx_buf[index].xdp = new_xdp; 1679 + phys_addr = xsk_buff_xdp_get_dma(new_xdp); 1680 + bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); 1785 1681 1786 - switch (act) { 1787 - case XDP_PASS: 1788 - rxq->stats[RX_XDP_PASS]++; 1789 - ret = FEC_ENET_XDP_PASS; 1790 - break; 1791 - 1792 - case XDP_REDIRECT: 1793 - rxq->stats[RX_XDP_REDIRECT]++; 1794 - err = xdp_do_redirect(fep->netdev, xdp, prog); 1795 - if (unlikely(err)) 1796 - goto xdp_err; 1797 - 1798 - ret = FEC_ENET_XDP_REDIR; 1799 - break; 1800 - 1801 - case XDP_TX: 1802 - rxq->stats[RX_XDP_TX]++; 1803 - err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync); 1804 - if (unlikely(err)) { 1805 - rxq->stats[RX_XDP_TX_ERRORS]++; 1806 - goto xdp_err; 1807 - } 1808 - 1809 - ret = FEC_ENET_XDP_TX; 1810 - break; 1811 - 1812 - default: 1813 - bpf_warn_invalid_xdp_action(fep->netdev, prog, act); 1814 - fallthrough; 1815 - 1816 - case XDP_ABORTED: 1817 - fallthrough; /* handle aborts by dropping packet */ 1818 - 1819 - case XDP_DROP: 1820 - rxq->stats[RX_XDP_DROP]++; 1821 - xdp_err: 1822 - ret = FEC_ENET_XDP_CONSUMED; 1823 - page = virt_to_head_page(xdp->data); 1824 - page_pool_put_page(rxq->page_pool, page, sync, true); 1825 - if (act != XDP_DROP) 1826 - trace_xdp_exception(fep->netdev, prog, act); 1827 - break; 1828 - } 1829 - 1830 - return ret; 1682 + return 0; 1831 1683 } 1832 1684 1833 1685 static void fec_enet_rx_vlan(const struct net_device *ndev, struct sk_buff *skb) ··· 1798 1746 } 1799 1747 } 1800 1748 1749 + static int fec_rx_error_check(struct net_device *ndev, u16 status) 1750 + { 1751 + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | 1752 + BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST | 1753 + BD_ENET_RX_CL)) { 1754 + ndev->stats.rx_errors++; 1755 + 1756 + if (status & BD_ENET_RX_OV) { 1757 + /* FIFO overrun */ 1758 + ndev->stats.rx_fifo_errors++; 1759 + return -EIO; 1760 + } 1761 + 1762 + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | 1763 + BD_ENET_RX_LAST)) { 1764 + /* Frame too long or too short. */ 1765 + ndev->stats.rx_length_errors++; 1766 + if ((status & BD_ENET_RX_LAST) && net_ratelimit()) 1767 + netdev_err(ndev, "rcv is not +last\n"); 1768 + } 1769 + 1770 + /* CRC Error */ 1771 + if (status & BD_ENET_RX_CR) 1772 + ndev->stats.rx_crc_errors++; 1773 + 1774 + /* Report late collisions as a frame error. */ 1775 + if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL)) 1776 + ndev->stats.rx_frame_errors++; 1777 + 1778 + return -EIO; 1779 + } 1780 + 1781 + return 0; 1782 + } 1783 + 1784 + static struct sk_buff *fec_build_skb(struct fec_enet_private *fep, 1785 + struct fec_enet_priv_rx_q *rxq, 1786 + struct bufdesc *bdp, 1787 + struct page *page, u32 len) 1788 + { 1789 + struct net_device *ndev = fep->netdev; 1790 + struct bufdesc_ex *ebdp; 1791 + struct sk_buff *skb; 1792 + 1793 + skb = build_skb(page_address(page), 1794 + PAGE_SIZE << fep->pagepool_order); 1795 + if (unlikely(!skb)) { 1796 + page_pool_recycle_direct(rxq->page_pool, page); 1797 + ndev->stats.rx_dropped++; 1798 + if (net_ratelimit()) 1799 + netdev_err(ndev, "build_skb failed\n"); 1800 + 1801 + return NULL; 1802 + } 1803 + 1804 + skb_reserve(skb, FEC_ENET_XDP_HEADROOM + fep->rx_shift); 1805 + skb_put(skb, len); 1806 + skb_mark_for_recycle(skb); 1807 + 1808 + /* Get offloads from the enhanced buffer descriptor */ 1809 + if (fep->bufdesc_ex) { 1810 + ebdp = (struct bufdesc_ex *)bdp; 1811 + 1812 + /* If this is a VLAN packet remove the VLAN Tag */ 1813 + if (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN)) 1814 + fec_enet_rx_vlan(ndev, skb); 1815 + 1816 + /* Get receive timestamp from the skb */ 1817 + if (fep->hwts_rx_en) 1818 + fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), 1819 + skb_hwtstamps(skb)); 1820 + 1821 + if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) { 1822 + if (!(ebdp->cbd_esc & 1823 + cpu_to_fec32(FLAG_RX_CSUM_ERROR))) 1824 + /* don't check it */ 1825 + skb->ip_summed = CHECKSUM_UNNECESSARY; 1826 + else 1827 + skb_checksum_none_assert(skb); 1828 + } 1829 + } 1830 + 1831 + skb->protocol = eth_type_trans(skb, ndev); 1832 + skb_record_rx_queue(skb, rxq->bd.qid); 1833 + 1834 + return skb; 1835 + } 1836 + 1801 1837 /* During a receive, the bd_rx.cur points to the current incoming buffer. 1802 1838 * When we update through the ring, if the next incoming buffer has 1803 1839 * not been given to the system, we just set the empty indicator, 1804 1840 * effectively tossing the packet. 1805 1841 */ 1806 - static int 1807 - fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) 1842 + static int fec_enet_rx_queue(struct fec_enet_private *fep, 1843 + u16 queue, int budget) 1808 1844 { 1809 - struct fec_enet_private *fep = netdev_priv(ndev); 1810 - struct fec_enet_priv_rx_q *rxq; 1811 - struct bufdesc *bdp; 1812 - unsigned short status; 1813 - struct sk_buff *skb; 1814 - ushort pkt_len; 1815 - int pkt_received = 0; 1816 - struct bufdesc_ex *ebdp = NULL; 1817 - int index = 0; 1818 - bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME; 1819 - struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); 1820 - u32 ret, xdp_result = FEC_ENET_XDP_PASS; 1821 - u32 data_start = FEC_ENET_XDP_HEADROOM; 1822 - int cpu = smp_processor_id(); 1823 - struct xdp_buff xdp; 1845 + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; 1846 + bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME; 1847 + struct net_device *ndev = fep->netdev; 1848 + struct bufdesc *bdp = rxq->bd.cur; 1849 + u32 sub_len = 4 + fep->rx_shift; 1850 + int pkt_received = 0; 1851 + u16 status, pkt_len; 1852 + struct sk_buff *skb; 1824 1853 struct page *page; 1825 - __fec32 cbd_bufaddr; 1826 - u32 sub_len = 4; 1827 - 1828 - /*If it has the FEC_QUIRK_HAS_RACC quirk property, the bit of 1829 - * FEC_RACC_SHIFT16 is set by default in the probe function. 1830 - */ 1831 - if (fep->quirks & FEC_QUIRK_HAS_RACC) { 1832 - data_start += 2; 1833 - sub_len += 2; 1834 - } 1854 + dma_addr_t dma; 1855 + int index; 1835 1856 1836 1857 #if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) 1837 1858 /* ··· 1913 1788 */ 1914 1789 flush_cache_all(); 1915 1790 #endif 1916 - rxq = fep->rx_queue[queue_id]; 1917 1791 1918 1792 /* First, grab all of the stats for the incoming packet. 1919 1793 * These get messed up if we get called due to a busy condition. 1920 1794 */ 1921 - bdp = rxq->bd.cur; 1922 - xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq); 1923 - 1924 1795 while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { 1925 1796 1926 1797 if (pkt_received >= budget) 1927 1798 break; 1928 1799 pkt_received++; 1929 1800 1930 - writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT); 1801 + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT); 1931 1802 1932 1803 /* Check for errors. */ 1933 1804 status ^= BD_ENET_RX_LAST; 1934 - if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | 1935 - BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST | 1936 - BD_ENET_RX_CL)) { 1937 - ndev->stats.rx_errors++; 1938 - if (status & BD_ENET_RX_OV) { 1939 - /* FIFO overrun */ 1940 - ndev->stats.rx_fifo_errors++; 1941 - goto rx_processing_done; 1942 - } 1943 - if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH 1944 - | BD_ENET_RX_LAST)) { 1945 - /* Frame too long or too short. */ 1946 - ndev->stats.rx_length_errors++; 1947 - if (status & BD_ENET_RX_LAST) 1948 - netdev_err(ndev, "rcv is not +last\n"); 1949 - } 1950 - if (status & BD_ENET_RX_CR) /* CRC Error */ 1951 - ndev->stats.rx_crc_errors++; 1952 - /* Report late collisions as a frame error. */ 1953 - if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL)) 1954 - ndev->stats.rx_frame_errors++; 1805 + if (unlikely(fec_rx_error_check(ndev, status))) 1955 1806 goto rx_processing_done; 1956 - } 1957 1807 1958 1808 /* Process the incoming frame. */ 1959 1809 ndev->stats.rx_packets++; 1960 1810 pkt_len = fec16_to_cpu(bdp->cbd_datlen); 1961 - ndev->stats.rx_bytes += pkt_len; 1962 - if (fep->quirks & FEC_QUIRK_HAS_RACC) 1963 - ndev->stats.rx_bytes -= 2; 1811 + ndev->stats.rx_bytes += pkt_len - fep->rx_shift; 1964 1812 1965 1813 index = fec_enet_get_bd_index(bdp, &rxq->bd); 1966 - page = rxq->rx_buf[index]; 1967 - cbd_bufaddr = bdp->cbd_bufaddr; 1814 + page = rxq->rx_buf[index].page; 1815 + dma = fec32_to_cpu(bdp->cbd_bufaddr); 1968 1816 if (fec_enet_update_cbd(rxq, bdp, index)) { 1969 1817 ndev->stats.rx_dropped++; 1970 1818 goto rx_processing_done; 1971 1819 } 1972 1820 1973 - dma_sync_single_for_cpu(&fep->pdev->dev, 1974 - fec32_to_cpu(cbd_bufaddr), 1975 - pkt_len, 1821 + dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len, 1976 1822 DMA_FROM_DEVICE); 1977 1823 prefetch(page_address(page)); 1978 - 1979 - if (xdp_prog) { 1980 - xdp_buff_clear_frags_flag(&xdp); 1981 - /* subtract 16bit shift and FCS */ 1982 - xdp_prepare_buff(&xdp, page_address(page), 1983 - data_start, pkt_len - sub_len, false); 1984 - ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu); 1985 - xdp_result |= ret; 1986 - if (ret != FEC_ENET_XDP_PASS) 1987 - goto rx_processing_done; 1988 - } 1989 - 1990 - /* The packet length includes FCS, but we don't want to 1991 - * include that when passing upstream as it messes up 1992 - * bridging applications. 1993 - */ 1994 - skb = build_skb(page_address(page), 1995 - PAGE_SIZE << fep->pagepool_order); 1996 - if (unlikely(!skb)) { 1997 - page_pool_recycle_direct(rxq->page_pool, page); 1998 - ndev->stats.rx_dropped++; 1999 - 2000 - netdev_err_once(ndev, "build_skb failed!\n"); 2001 - goto rx_processing_done; 2002 - } 2003 - 2004 - skb_reserve(skb, data_start); 2005 - skb_put(skb, pkt_len - sub_len); 2006 - skb_mark_for_recycle(skb); 2007 1824 2008 1825 if (unlikely(need_swap)) { 2009 1826 u8 *data; ··· 1954 1887 swap_buffer(data, pkt_len); 1955 1888 } 1956 1889 1957 - /* Extract the enhanced buffer descriptor */ 1958 - ebdp = NULL; 1959 - if (fep->bufdesc_ex) 1960 - ebdp = (struct bufdesc_ex *)bdp; 1890 + /* The packet length includes FCS, but we don't want to 1891 + * include that when passing upstream as it messes up 1892 + * bridging applications. 1893 + */ 1894 + skb = fec_build_skb(fep, rxq, bdp, page, pkt_len - sub_len); 1895 + if (!skb) 1896 + goto rx_processing_done; 1961 1897 1962 - /* If this is a VLAN packet remove the VLAN Tag */ 1963 - if (fep->bufdesc_ex && 1964 - (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN))) 1965 - fec_enet_rx_vlan(ndev, skb); 1966 - 1967 - skb->protocol = eth_type_trans(skb, ndev); 1968 - 1969 - /* Get receive timestamp from the skb */ 1970 - if (fep->hwts_rx_en && fep->bufdesc_ex) 1971 - fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), 1972 - skb_hwtstamps(skb)); 1973 - 1974 - if (fep->bufdesc_ex && 1975 - (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) { 1976 - if (!(ebdp->cbd_esc & cpu_to_fec32(FLAG_RX_CSUM_ERROR))) { 1977 - /* don't check it */ 1978 - skb->ip_summed = CHECKSUM_UNNECESSARY; 1979 - } else { 1980 - skb_checksum_none_assert(skb); 1981 - } 1982 - } 1983 - 1984 - skb_record_rx_queue(skb, queue_id); 1985 1898 napi_gro_receive(&fep->napi, skb); 1986 1899 1987 1900 rx_processing_done: ··· 1995 1948 } 1996 1949 rxq->bd.cur = bdp; 1997 1950 1998 - if (xdp_result & FEC_ENET_XDP_REDIR) 1951 + return pkt_received; 1952 + } 1953 + 1954 + static void fec_xdp_drop(struct fec_enet_priv_rx_q *rxq, 1955 + struct xdp_buff *xdp, u32 sync) 1956 + { 1957 + struct page *page = virt_to_head_page(xdp->data); 1958 + 1959 + page_pool_put_page(rxq->page_pool, page, sync, true); 1960 + } 1961 + 1962 + static int 1963 + fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index) 1964 + { 1965 + if (unlikely(index < 0)) 1966 + return 0; 1967 + 1968 + return (index % fep->num_tx_queues); 1969 + } 1970 + 1971 + static int fec_enet_rx_queue_xdp(struct fec_enet_private *fep, int queue, 1972 + int budget, struct bpf_prog *prog) 1973 + { 1974 + u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift; 1975 + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; 1976 + struct net_device *ndev = fep->netdev; 1977 + struct bufdesc *bdp = rxq->bd.cur; 1978 + u32 sub_len = 4 + fep->rx_shift; 1979 + int cpu = smp_processor_id(); 1980 + int pkt_received = 0; 1981 + struct sk_buff *skb; 1982 + u16 status, pkt_len; 1983 + struct xdp_buff xdp; 1984 + int tx_qid = queue; 1985 + struct page *page; 1986 + u32 xdp_res = 0; 1987 + dma_addr_t dma; 1988 + int index, err; 1989 + u32 act, sync; 1990 + 1991 + #if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) 1992 + /* 1993 + * Hacky flush of all caches instead of using the DMA API for the TSO 1994 + * headers. 1995 + */ 1996 + flush_cache_all(); 1997 + #endif 1998 + 1999 + if (unlikely(tx_qid >= fep->num_tx_queues)) 2000 + tx_qid = fec_enet_xdp_get_tx_queue(fep, cpu); 2001 + 2002 + xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq); 2003 + 2004 + while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { 2005 + if (pkt_received >= budget) 2006 + break; 2007 + pkt_received++; 2008 + 2009 + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT); 2010 + 2011 + /* Check for errors. */ 2012 + status ^= BD_ENET_RX_LAST; 2013 + if (unlikely(fec_rx_error_check(ndev, status))) 2014 + goto rx_processing_done; 2015 + 2016 + /* Process the incoming frame. */ 2017 + ndev->stats.rx_packets++; 2018 + pkt_len = fec16_to_cpu(bdp->cbd_datlen); 2019 + ndev->stats.rx_bytes += pkt_len - fep->rx_shift; 2020 + 2021 + index = fec_enet_get_bd_index(bdp, &rxq->bd); 2022 + page = rxq->rx_buf[index].page; 2023 + dma = fec32_to_cpu(bdp->cbd_bufaddr); 2024 + 2025 + if (fec_enet_update_cbd(rxq, bdp, index)) { 2026 + ndev->stats.rx_dropped++; 2027 + goto rx_processing_done; 2028 + } 2029 + 2030 + dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len, 2031 + DMA_FROM_DEVICE); 2032 + prefetch(page_address(page)); 2033 + 2034 + xdp_buff_clear_frags_flag(&xdp); 2035 + /* subtract 16bit shift and FCS */ 2036 + pkt_len -= sub_len; 2037 + xdp_prepare_buff(&xdp, page_address(page), data_start, 2038 + pkt_len, false); 2039 + 2040 + act = bpf_prog_run_xdp(prog, &xdp); 2041 + /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync 2042 + * for_device cover max len CPU touch. 2043 + */ 2044 + sync = xdp.data_end - xdp.data; 2045 + sync = max(sync, pkt_len); 2046 + 2047 + switch (act) { 2048 + case XDP_PASS: 2049 + rxq->stats[RX_XDP_PASS]++; 2050 + /* The packet length includes FCS, but we don't want to 2051 + * include that when passing upstream as it messes up 2052 + * bridging applications. 2053 + */ 2054 + skb = fec_build_skb(fep, rxq, bdp, page, pkt_len); 2055 + if (!skb) 2056 + trace_xdp_exception(ndev, prog, XDP_PASS); 2057 + else 2058 + napi_gro_receive(&fep->napi, skb); 2059 + 2060 + break; 2061 + case XDP_REDIRECT: 2062 + rxq->stats[RX_XDP_REDIRECT]++; 2063 + err = xdp_do_redirect(ndev, &xdp, prog); 2064 + if (unlikely(err)) { 2065 + fec_xdp_drop(rxq, &xdp, sync); 2066 + trace_xdp_exception(ndev, prog, XDP_REDIRECT); 2067 + } else { 2068 + xdp_res |= FEC_ENET_XDP_REDIR; 2069 + } 2070 + break; 2071 + case XDP_TX: 2072 + rxq->stats[RX_XDP_TX]++; 2073 + err = fec_enet_xdp_tx_xmit(fep, cpu, &xdp, sync, tx_qid); 2074 + if (unlikely(err)) { 2075 + rxq->stats[RX_XDP_TX_ERRORS]++; 2076 + fec_xdp_drop(rxq, &xdp, sync); 2077 + trace_xdp_exception(ndev, prog, XDP_TX); 2078 + } else { 2079 + xdp_res |= FEC_ENET_XDP_TX; 2080 + } 2081 + break; 2082 + default: 2083 + bpf_warn_invalid_xdp_action(ndev, prog, act); 2084 + fallthrough; 2085 + case XDP_ABORTED: 2086 + trace_xdp_exception(ndev, prog, act); 2087 + /* handle aborts by dropping packet */ 2088 + fallthrough; 2089 + case XDP_DROP: 2090 + rxq->stats[RX_XDP_DROP]++; 2091 + fec_xdp_drop(rxq, &xdp, sync); 2092 + break; 2093 + } 2094 + 2095 + rx_processing_done: 2096 + /* Clear the status flags for this buffer */ 2097 + status &= ~BD_ENET_RX_STATS; 2098 + /* Mark the buffer empty */ 2099 + status |= BD_ENET_RX_EMPTY; 2100 + 2101 + if (fep->bufdesc_ex) { 2102 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 2103 + 2104 + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); 2105 + ebdp->cbd_prot = 0; 2106 + ebdp->cbd_bdu = 0; 2107 + } 2108 + 2109 + /* Make sure the updates to rest of the descriptor are 2110 + * performed before transferring ownership. 2111 + */ 2112 + dma_wmb(); 2113 + bdp->cbd_sc = cpu_to_fec16(status); 2114 + 2115 + /* Update BD pointer to next entry */ 2116 + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); 2117 + 2118 + /* Doing this here will keep the FEC running while we process 2119 + * incoming frames. On a heavily loaded network, we should be 2120 + * able to keep up at the expense of system resources. 2121 + */ 2122 + writel(0, rxq->bd.reg_desc_active); 2123 + } 2124 + 2125 + rxq->bd.cur = bdp; 2126 + 2127 + if (xdp_res & FEC_ENET_XDP_REDIR) 1999 2128 xdp_do_flush(); 2129 + 2130 + if (xdp_res & FEC_ENET_XDP_TX) 2131 + /* Trigger transmission start */ 2132 + fec_txq_trigger_xmit(fep, fep->tx_queue[tx_qid]); 2133 + 2134 + return pkt_received; 2135 + } 2136 + 2137 + static struct sk_buff *fec_build_skb_zc(struct xdp_buff *xsk, 2138 + struct napi_struct *napi) 2139 + { 2140 + size_t len = xdp_get_buff_len(xsk); 2141 + struct sk_buff *skb; 2142 + 2143 + skb = napi_alloc_skb(napi, len); 2144 + if (unlikely(!skb)) { 2145 + xsk_buff_free(xsk); 2146 + return NULL; 2147 + } 2148 + 2149 + skb_put_data(skb, xsk->data, len); 2150 + xsk_buff_free(xsk); 2151 + 2152 + return skb; 2153 + } 2154 + 2155 + static int fec_enet_xsk_tx_xmit(struct fec_enet_private *fep, 2156 + struct xdp_buff *xsk, int cpu, 2157 + int queue) 2158 + { 2159 + struct netdev_queue *nq = netdev_get_tx_queue(fep->netdev, queue); 2160 + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; 2161 + u32 offset = xsk->data - xsk->data_hard_start; 2162 + u32 headroom = txq->xsk_pool->headroom; 2163 + u32 len = xsk->data_end - xsk->data; 2164 + u32 index, status, estatus; 2165 + struct bufdesc *bdp; 2166 + dma_addr_t dma; 2167 + 2168 + __netif_tx_lock(nq, cpu); 2169 + 2170 + /* Avoid tx timeout as XDP shares the queue with kernel stack */ 2171 + txq_trans_cond_update(nq); 2172 + 2173 + if (!fec_enet_get_free_txdesc_num(txq)) { 2174 + __netif_tx_unlock(nq); 2175 + 2176 + return -EBUSY; 2177 + } 2178 + 2179 + /* Fill in a Tx ring entry */ 2180 + bdp = txq->bd.cur; 2181 + status = fec16_to_cpu(bdp->cbd_sc); 2182 + status &= ~BD_ENET_TX_STATS; 2183 + 2184 + index = fec_enet_get_bd_index(bdp, &txq->bd); 2185 + dma = xsk_buff_xdp_get_frame_dma(xsk) + headroom + offset; 2186 + 2187 + xsk_buff_raw_dma_sync_for_device(txq->xsk_pool, dma, len); 2188 + 2189 + txq->tx_buf[index].buf_p = xsk; 2190 + txq->tx_buf[index].type = FEC_TXBUF_T_XSK_TX; 2191 + 2192 + status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); 2193 + if (fep->bufdesc_ex) 2194 + estatus = BD_ENET_TX_INT; 2195 + 2196 + bdp->cbd_bufaddr = cpu_to_fec32(dma); 2197 + bdp->cbd_datlen = cpu_to_fec16(len); 2198 + 2199 + if (fep->bufdesc_ex) { 2200 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 2201 + 2202 + if (fep->quirks & FEC_QUIRK_HAS_AVB) 2203 + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); 2204 + 2205 + ebdp->cbd_bdu = 0; 2206 + ebdp->cbd_esc = cpu_to_fec32(estatus); 2207 + } 2208 + 2209 + dma_wmb(); 2210 + status |= BD_ENET_TX_READY | BD_ENET_TX_TC; 2211 + bdp->cbd_sc = cpu_to_fec16(status); 2212 + dma_wmb(); 2213 + 2214 + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); 2215 + txq->bd.cur = bdp; 2216 + 2217 + __netif_tx_unlock(nq); 2218 + 2219 + return 0; 2220 + } 2221 + 2222 + static int fec_enet_rx_queue_xsk(struct fec_enet_private *fep, int queue, 2223 + int budget, struct bpf_prog *prog) 2224 + { 2225 + u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift; 2226 + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; 2227 + struct net_device *ndev = fep->netdev; 2228 + struct bufdesc *bdp = rxq->bd.cur; 2229 + u32 sub_len = 4 + fep->rx_shift; 2230 + int cpu = smp_processor_id(); 2231 + bool wakeup_xsk = false; 2232 + struct xdp_buff *xsk; 2233 + int pkt_received = 0; 2234 + struct sk_buff *skb; 2235 + u16 status, pkt_len; 2236 + u32 xdp_res = 0; 2237 + int index, err; 2238 + u32 act; 2239 + 2240 + #if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) 2241 + /* 2242 + * Hacky flush of all caches instead of using the DMA API for the TSO 2243 + * headers. 2244 + */ 2245 + flush_cache_all(); 2246 + #endif 2247 + 2248 + while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { 2249 + if (unlikely(pkt_received >= budget)) 2250 + break; 2251 + 2252 + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT); 2253 + 2254 + index = fec_enet_get_bd_index(bdp, &rxq->bd); 2255 + xsk = rxq->rx_buf[index].xdp; 2256 + if (unlikely(!xsk)) { 2257 + if (fec_enet_update_cbd_zc(rxq, bdp, index)) 2258 + break; 2259 + 2260 + if (fep->bufdesc_ex) { 2261 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 2262 + 2263 + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); 2264 + ebdp->cbd_prot = 0; 2265 + ebdp->cbd_bdu = 0; 2266 + } 2267 + 2268 + dma_wmb(); 2269 + status &= ~BD_ENET_RX_STATS; 2270 + status |= BD_ENET_RX_EMPTY; 2271 + bdp->cbd_sc = cpu_to_fec16(status); 2272 + break; 2273 + } 2274 + 2275 + pkt_received++; 2276 + /* Check for errors. */ 2277 + status ^= BD_ENET_RX_LAST; 2278 + if (unlikely(fec_rx_error_check(ndev, status))) 2279 + goto rx_processing_done; 2280 + 2281 + /* Process the incoming frame. */ 2282 + ndev->stats.rx_packets++; 2283 + pkt_len = fec16_to_cpu(bdp->cbd_datlen); 2284 + ndev->stats.rx_bytes += pkt_len - fep->rx_shift; 2285 + 2286 + if (fec_enet_update_cbd_zc(rxq, bdp, index)) { 2287 + ndev->stats.rx_dropped++; 2288 + goto rx_processing_done; 2289 + } 2290 + 2291 + pkt_len -= sub_len; 2292 + xsk->data = xsk->data_hard_start + data_start; 2293 + /* Subtract FCS and 16bit shift */ 2294 + xsk->data_end = xsk->data + pkt_len; 2295 + xsk->data_meta = xsk->data; 2296 + xsk_buff_dma_sync_for_cpu(xsk); 2297 + 2298 + /* If the XSK pool is enabled before the bpf program is 2299 + * installed, or the bpf program is uninstalled before 2300 + * the XSK pool is disabled. prog will be NULL and we 2301 + * need to set a default XDP_PASS action. 2302 + */ 2303 + if (unlikely(!prog)) 2304 + act = XDP_PASS; 2305 + else 2306 + act = bpf_prog_run_xdp(prog, xsk); 2307 + 2308 + switch (act) { 2309 + case XDP_PASS: 2310 + rxq->stats[RX_XDP_PASS]++; 2311 + skb = fec_build_skb_zc(xsk, &fep->napi); 2312 + if (unlikely(!skb)) { 2313 + ndev->stats.rx_dropped++; 2314 + trace_xdp_exception(ndev, prog, XDP_PASS); 2315 + } else { 2316 + napi_gro_receive(&fep->napi, skb); 2317 + } 2318 + 2319 + break; 2320 + case XDP_TX: 2321 + rxq->stats[RX_XDP_TX]++; 2322 + err = fec_enet_xsk_tx_xmit(fep, xsk, cpu, queue); 2323 + if (unlikely(err)) { 2324 + rxq->stats[RX_XDP_TX_ERRORS]++; 2325 + xsk_buff_free(xsk); 2326 + trace_xdp_exception(ndev, prog, XDP_TX); 2327 + } else { 2328 + xdp_res |= FEC_ENET_XDP_TX; 2329 + } 2330 + break; 2331 + case XDP_REDIRECT: 2332 + rxq->stats[RX_XDP_REDIRECT]++; 2333 + err = xdp_do_redirect(ndev, xsk, prog); 2334 + if (unlikely(err)) { 2335 + if (err == -ENOBUFS) 2336 + wakeup_xsk = true; 2337 + 2338 + rxq->stats[RX_XDP_DROP]++; 2339 + xsk_buff_free(xsk); 2340 + trace_xdp_exception(ndev, prog, XDP_REDIRECT); 2341 + } else { 2342 + xdp_res |= FEC_ENET_XDP_REDIR; 2343 + } 2344 + break; 2345 + default: 2346 + bpf_warn_invalid_xdp_action(ndev, prog, act); 2347 + fallthrough; 2348 + case XDP_ABORTED: 2349 + trace_xdp_exception(ndev, prog, act); 2350 + fallthrough; 2351 + case XDP_DROP: 2352 + rxq->stats[RX_XDP_DROP]++; 2353 + xsk_buff_free(xsk); 2354 + break; 2355 + } 2356 + 2357 + rx_processing_done: 2358 + /* Clear the status flags for this buffer */ 2359 + status &= ~BD_ENET_RX_STATS; 2360 + /* Mark the buffer empty */ 2361 + status |= BD_ENET_RX_EMPTY; 2362 + 2363 + if (fep->bufdesc_ex) { 2364 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 2365 + 2366 + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); 2367 + ebdp->cbd_prot = 0; 2368 + ebdp->cbd_bdu = 0; 2369 + } 2370 + 2371 + /* Make sure the updates to rest of the descriptor are 2372 + * performed before transferring ownership. 2373 + */ 2374 + dma_wmb(); 2375 + bdp->cbd_sc = cpu_to_fec16(status); 2376 + 2377 + /* Update BD pointer to next entry */ 2378 + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); 2379 + 2380 + /* Doing this here will keep the FEC running while we process 2381 + * incoming frames. On a heavily loaded network, we should be 2382 + * able to keep up at the expense of system resources. 2383 + */ 2384 + writel(0, rxq->bd.reg_desc_active); 2385 + } 2386 + 2387 + rxq->bd.cur = bdp; 2388 + 2389 + if (xdp_res & FEC_ENET_XDP_REDIR) 2390 + xdp_do_flush(); 2391 + 2392 + if (xdp_res & FEC_ENET_XDP_TX) 2393 + fec_txq_trigger_xmit(fep, fep->tx_queue[queue]); 2394 + 2395 + if (rxq->xsk_pool && xsk_uses_need_wakeup(rxq->xsk_pool)) { 2396 + if (wakeup_xsk) 2397 + xsk_set_rx_need_wakeup(rxq->xsk_pool); 2398 + else 2399 + xsk_clear_rx_need_wakeup(rxq->xsk_pool); 2400 + } 2000 2401 2001 2402 return pkt_received; 2002 2403 } ··· 2452 1957 static int fec_enet_rx(struct net_device *ndev, int budget) 2453 1958 { 2454 1959 struct fec_enet_private *fep = netdev_priv(ndev); 1960 + struct bpf_prog *prog = READ_ONCE(fep->xdp_prog); 2455 1961 int i, done = 0; 2456 1962 2457 1963 /* Make sure that AVB queues are processed first. */ 2458 - for (i = fep->num_rx_queues - 1; i >= 0; i--) 2459 - done += fec_enet_rx_queue(ndev, i, budget - done); 1964 + for (i = fep->num_rx_queues - 1; i >= 0; i--) { 1965 + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[i]; 1966 + int batch = budget - done; 1967 + 1968 + if (rxq->xsk_pool) 1969 + done += fec_enet_rx_queue_xsk(fep, i, batch, prog); 1970 + else if (prog) 1971 + done += fec_enet_rx_queue_xdp(fep, i, batch, prog); 1972 + else 1973 + done += fec_enet_rx_queue(fep, i, batch); 1974 + } 2460 1975 2461 1976 return done; 2462 1977 } ··· 2509 2004 { 2510 2005 struct net_device *ndev = napi->dev; 2511 2006 struct fec_enet_private *fep = netdev_priv(ndev); 2512 - int done = 0; 2007 + int rx_done = 0, tx_done = 0; 2008 + int max_done; 2513 2009 2514 2010 do { 2515 - done += fec_enet_rx(ndev, budget - done); 2516 - fec_enet_tx(ndev, budget); 2517 - } while ((done < budget) && fec_enet_collect_events(fep)); 2011 + rx_done += fec_enet_rx(ndev, budget - rx_done); 2012 + tx_done += fec_enet_tx(ndev, budget); 2013 + max_done = max(rx_done, tx_done); 2014 + } while ((max_done < budget) && fec_enet_collect_events(fep)); 2518 2015 2519 - if (done < budget) { 2520 - napi_complete_done(napi, done); 2016 + if (max_done < budget) { 2017 + napi_complete_done(napi, max_done); 2521 2018 writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); 2019 + return max_done; 2522 2020 } 2523 2021 2524 - return done; 2022 + return budget; 2525 2023 } 2526 2024 2527 2025 /* ------------------------------------------------------------------------- */ ··· 3811 3303 .self_test = net_selftest, 3812 3304 }; 3813 3305 3306 + static int fec_xdp_rxq_info_reg(struct fec_enet_private *fep, 3307 + struct fec_enet_priv_rx_q *rxq) 3308 + { 3309 + struct net_device *ndev = fep->netdev; 3310 + void *allocator; 3311 + int type, err; 3312 + 3313 + err = xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq->id, 0); 3314 + if (err) { 3315 + netdev_err(ndev, "Failed to register xdp rxq info\n"); 3316 + return err; 3317 + } 3318 + 3319 + allocator = rxq->xsk_pool ? NULL : rxq->page_pool; 3320 + type = rxq->xsk_pool ? MEM_TYPE_XSK_BUFF_POOL : MEM_TYPE_PAGE_POOL; 3321 + err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, type, allocator); 3322 + if (err) { 3323 + netdev_err(ndev, "Failed to register XDP mem model\n"); 3324 + xdp_rxq_info_unreg(&rxq->xdp_rxq); 3325 + 3326 + return err; 3327 + } 3328 + 3329 + if (rxq->xsk_pool) 3330 + xsk_pool_set_rxq_info(rxq->xsk_pool, &rxq->xdp_rxq); 3331 + 3332 + return 0; 3333 + } 3334 + 3335 + static void fec_xdp_rxq_info_unreg(struct fec_enet_priv_rx_q *rxq) 3336 + { 3337 + if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) { 3338 + xdp_rxq_info_unreg_mem_model(&rxq->xdp_rxq); 3339 + xdp_rxq_info_unreg(&rxq->xdp_rxq); 3340 + } 3341 + } 3342 + 3343 + static void fec_free_rxq_buffers(struct fec_enet_priv_rx_q *rxq) 3344 + { 3345 + bool xsk = !!rxq->xsk_pool; 3346 + int i; 3347 + 3348 + for (i = 0; i < rxq->bd.ring_size; i++) { 3349 + union fec_rx_buffer *buf = &rxq->rx_buf[i]; 3350 + 3351 + if (!buf->buf_p) 3352 + continue; 3353 + 3354 + if (xsk) 3355 + xsk_buff_free(buf->xdp); 3356 + else 3357 + page_pool_put_full_page(rxq->page_pool, 3358 + buf->page, false); 3359 + 3360 + rxq->rx_buf[i].buf_p = NULL; 3361 + } 3362 + 3363 + if (!xsk) { 3364 + page_pool_destroy(rxq->page_pool); 3365 + rxq->page_pool = NULL; 3366 + } 3367 + } 3368 + 3814 3369 static void fec_enet_free_buffers(struct net_device *ndev) 3815 3370 { 3816 3371 struct fec_enet_private *fep = netdev_priv(ndev); 3817 3372 unsigned int i; 3818 3373 struct fec_enet_priv_tx_q *txq; 3819 3374 struct fec_enet_priv_rx_q *rxq; 3375 + struct page *page; 3820 3376 unsigned int q; 3821 3377 3822 3378 for (q = 0; q < fep->num_rx_queues; q++) { 3823 3379 rxq = fep->rx_queue[q]; 3824 - for (i = 0; i < rxq->bd.ring_size; i++) 3825 - page_pool_put_full_page(rxq->page_pool, rxq->rx_buf[i], 3826 - false); 3380 + 3381 + fec_xdp_rxq_info_unreg(rxq); 3382 + fec_free_rxq_buffers(rxq); 3827 3383 3828 3384 for (i = 0; i < XDP_STATS_TOTAL; i++) 3829 3385 rxq->stats[i] = 0; 3830 - 3831 - if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) 3832 - xdp_rxq_info_unreg(&rxq->xdp_rxq); 3833 - page_pool_destroy(rxq->page_pool); 3834 - rxq->page_pool = NULL; 3835 3386 } 3836 3387 3837 3388 for (q = 0; q < fep->num_tx_queues; q++) { ··· 3899 3332 kfree(txq->tx_bounce[i]); 3900 3333 txq->tx_bounce[i] = NULL; 3901 3334 3902 - if (!txq->tx_buf[i].buf_p) { 3903 - txq->tx_buf[i].type = FEC_TXBUF_T_SKB; 3904 - continue; 3905 - } 3906 - 3907 - if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { 3335 + switch (txq->tx_buf[i].type) { 3336 + case FEC_TXBUF_T_SKB: 3908 3337 dev_kfree_skb(txq->tx_buf[i].buf_p); 3909 - } else if (txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO) { 3338 + break; 3339 + case FEC_TXBUF_T_XDP_NDO: 3910 3340 xdp_return_frame(txq->tx_buf[i].buf_p); 3911 - } else { 3912 - struct page *page = txq->tx_buf[i].buf_p; 3913 - 3341 + break; 3342 + case FEC_TXBUF_T_XDP_TX: 3343 + page = txq->tx_buf[i].buf_p; 3914 3344 page_pool_put_page(pp_page_to_nmdesc(page)->pp, 3915 3345 page, 0, false); 3346 + break; 3347 + case FEC_TXBUF_T_XSK_TX: 3348 + xsk_buff_free(txq->tx_buf[i].buf_p); 3349 + break; 3350 + default: 3351 + break; 3916 3352 } 3917 3353 3918 3354 txq->tx_buf[i].buf_p = NULL; ··· 3992 3422 return ret; 3993 3423 } 3994 3424 3995 - static int 3996 - fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue) 3425 + static int fec_alloc_rxq_buffers_pp(struct fec_enet_private *fep, 3426 + struct fec_enet_priv_rx_q *rxq) 3997 3427 { 3998 - struct fec_enet_private *fep = netdev_priv(ndev); 3999 - struct fec_enet_priv_rx_q *rxq; 3428 + struct bufdesc *bdp = rxq->bd.base; 4000 3429 dma_addr_t phys_addr; 4001 - struct bufdesc *bdp; 4002 3430 struct page *page; 4003 3431 int i, err; 4004 3432 4005 - rxq = fep->rx_queue[queue]; 4006 - bdp = rxq->bd.base; 4007 - 4008 - err = fec_enet_create_page_pool(fep, rxq, rxq->bd.ring_size); 3433 + err = fec_enet_create_page_pool(fep, rxq); 4009 3434 if (err < 0) { 4010 - netdev_err(ndev, "%s failed queue %d (%d)\n", __func__, queue, err); 3435 + netdev_err(fep->netdev, "%s failed queue %d (%d)\n", 3436 + __func__, rxq->bd.qid, err); 4011 3437 return err; 4012 3438 } 4013 3439 ··· 4022 3456 4023 3457 for (i = 0; i < rxq->bd.ring_size; i++) { 4024 3458 page = page_pool_dev_alloc_pages(rxq->page_pool); 4025 - if (!page) 4026 - goto err_alloc; 3459 + if (!page) { 3460 + err = -ENOMEM; 3461 + goto free_rx_buffers; 3462 + } 4027 3463 4028 3464 phys_addr = page_pool_get_dma_addr(page) + FEC_ENET_XDP_HEADROOM; 4029 3465 bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); 4030 - 4031 - rxq->rx_buf[i] = page; 4032 - bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY); 4033 - 4034 - if (fep->bufdesc_ex) { 4035 - struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 4036 - ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); 4037 - } 4038 - 3466 + rxq->rx_buf[i].page = page; 4039 3467 bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); 4040 3468 } 4041 3469 4042 - /* Set the last buffer to wrap. */ 4043 - bdp = fec_enet_get_prevdesc(bdp, &rxq->bd); 4044 - bdp->cbd_sc |= cpu_to_fec16(BD_ENET_RX_WRAP); 4045 3470 return 0; 4046 3471 4047 - err_alloc: 3472 + free_rx_buffers: 3473 + fec_free_rxq_buffers(rxq); 3474 + 3475 + return err; 3476 + } 3477 + 3478 + static int fec_alloc_rxq_buffers_zc(struct fec_enet_private *fep, 3479 + struct fec_enet_priv_rx_q *rxq) 3480 + { 3481 + union fec_rx_buffer *buf = &rxq->rx_buf[0]; 3482 + struct bufdesc *bdp = rxq->bd.base; 3483 + dma_addr_t phys_addr; 3484 + int i; 3485 + 3486 + for (i = 0; i < rxq->bd.ring_size; i++) { 3487 + buf[i].xdp = xsk_buff_alloc(rxq->xsk_pool); 3488 + if (!buf[i].xdp) 3489 + break; 3490 + 3491 + phys_addr = xsk_buff_xdp_get_dma(buf[i].xdp); 3492 + bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); 3493 + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); 3494 + } 3495 + 3496 + for (; i < rxq->bd.ring_size; i++) { 3497 + buf[i].xdp = NULL; 3498 + bdp->cbd_bufaddr = cpu_to_fec32(0); 3499 + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); 3500 + } 3501 + 3502 + return 0; 3503 + } 3504 + 3505 + static int 3506 + fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue) 3507 + { 3508 + struct fec_enet_private *fep = netdev_priv(ndev); 3509 + struct fec_enet_priv_rx_q *rxq; 3510 + int err; 3511 + 3512 + rxq = fep->rx_queue[queue]; 3513 + if (rxq->xsk_pool) { 3514 + /* RX XDP ZC buffer pool may not be populated, e.g. 3515 + * xdpsock TX-only. 3516 + */ 3517 + fec_alloc_rxq_buffers_zc(fep, rxq); 3518 + } else { 3519 + err = fec_alloc_rxq_buffers_pp(fep, rxq); 3520 + if (err) 3521 + goto free_buffers; 3522 + } 3523 + 3524 + err = fec_xdp_rxq_info_reg(fep, rxq); 3525 + if (err) 3526 + goto free_buffers; 3527 + 3528 + return 0; 3529 + 3530 + free_buffers: 4048 3531 fec_enet_free_buffers(ndev); 4049 - return -ENOMEM; 3532 + 3533 + return err; 4050 3534 } 4051 3535 4052 3536 static int ··· 4410 3794 return fec_enet_vlan_pri_to_queue[vlan_tag >> 13]; 4411 3795 } 4412 3796 3797 + static void fec_free_rxq(struct fec_enet_priv_rx_q *rxq) 3798 + { 3799 + fec_xdp_rxq_info_unreg(rxq); 3800 + fec_free_rxq_buffers(rxq); 3801 + kfree(rxq); 3802 + } 3803 + 3804 + static struct fec_enet_priv_rx_q * 3805 + fec_alloc_new_rxq_xsk(struct fec_enet_private *fep, int queue, 3806 + struct xsk_buff_pool *pool) 3807 + { 3808 + struct fec_enet_priv_rx_q *old_rxq = fep->rx_queue[queue]; 3809 + struct fec_enet_priv_rx_q *rxq; 3810 + union fec_rx_buffer *buf; 3811 + int i; 3812 + 3813 + rxq = kzalloc(sizeof(*rxq), GFP_KERNEL); 3814 + if (!rxq) 3815 + return NULL; 3816 + 3817 + /* Copy the BD ring to the new rxq */ 3818 + rxq->bd = old_rxq->bd; 3819 + rxq->id = queue; 3820 + rxq->xsk_pool = pool; 3821 + buf = &rxq->rx_buf[0]; 3822 + 3823 + for (i = 0; i < rxq->bd.ring_size; i++) { 3824 + buf[i].xdp = xsk_buff_alloc(pool); 3825 + /* RX XDP ZC buffer pool may not be populated, e.g. 3826 + * xdpsock TX-only. 3827 + */ 3828 + if (!buf[i].xdp) 3829 + break; 3830 + } 3831 + 3832 + if (fec_xdp_rxq_info_reg(fep, rxq)) 3833 + goto free_buffers; 3834 + 3835 + return rxq; 3836 + 3837 + free_buffers: 3838 + while (--i >= 0) 3839 + xsk_buff_free(buf[i].xdp); 3840 + 3841 + kfree(rxq); 3842 + 3843 + return NULL; 3844 + } 3845 + 3846 + static struct fec_enet_priv_rx_q * 3847 + fec_alloc_new_rxq_pp(struct fec_enet_private *fep, int queue) 3848 + { 3849 + struct fec_enet_priv_rx_q *old_rxq = fep->rx_queue[queue]; 3850 + struct fec_enet_priv_rx_q *rxq; 3851 + union fec_rx_buffer *buf; 3852 + int i = 0; 3853 + 3854 + rxq = kzalloc(sizeof(*rxq), GFP_KERNEL); 3855 + if (!rxq) 3856 + return NULL; 3857 + 3858 + rxq->bd = old_rxq->bd; 3859 + rxq->id = queue; 3860 + 3861 + if (fec_enet_create_page_pool(fep, rxq)) 3862 + goto free_rxq; 3863 + 3864 + buf = &rxq->rx_buf[0]; 3865 + for (; i < rxq->bd.ring_size; i++) { 3866 + buf[i].page = page_pool_dev_alloc_pages(rxq->page_pool); 3867 + if (!buf[i].page) 3868 + goto free_buffers; 3869 + } 3870 + 3871 + if (fec_xdp_rxq_info_reg(fep, rxq)) 3872 + goto free_buffers; 3873 + 3874 + return rxq; 3875 + 3876 + free_buffers: 3877 + while (--i >= 0) 3878 + page_pool_put_full_page(rxq->page_pool, 3879 + buf[i].page, false); 3880 + 3881 + page_pool_destroy(rxq->page_pool); 3882 + free_rxq: 3883 + kfree(rxq); 3884 + 3885 + return NULL; 3886 + } 3887 + 3888 + static void fec_init_rxq_bd_buffers(struct fec_enet_priv_rx_q *rxq, bool xsk) 3889 + { 3890 + union fec_rx_buffer *buf = &rxq->rx_buf[0]; 3891 + struct bufdesc *bdp = rxq->bd.base; 3892 + dma_addr_t dma; 3893 + 3894 + for (int i = 0; i < rxq->bd.ring_size; i++) { 3895 + if (xsk) 3896 + dma = buf[i].xdp ? 3897 + xsk_buff_xdp_get_dma(buf[i].xdp) : 0; 3898 + else 3899 + dma = page_pool_get_dma_addr(buf[i].page) + 3900 + FEC_ENET_XDP_HEADROOM; 3901 + 3902 + bdp->cbd_bufaddr = cpu_to_fec32(dma); 3903 + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); 3904 + } 3905 + } 3906 + 3907 + static int fec_xsk_restart_napi(struct fec_enet_private *fep, 3908 + struct xsk_buff_pool *pool, 3909 + u16 queue) 3910 + { 3911 + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; 3912 + struct net_device *ndev = fep->netdev; 3913 + struct fec_enet_priv_rx_q *rxq; 3914 + int err; 3915 + 3916 + napi_disable(&fep->napi); 3917 + netif_tx_disable(ndev); 3918 + synchronize_rcu(); 3919 + 3920 + rxq = pool ? fec_alloc_new_rxq_xsk(fep, queue, pool) : 3921 + fec_alloc_new_rxq_pp(fep, queue); 3922 + if (!rxq) { 3923 + err = -ENOMEM; 3924 + goto err_alloc_new_rxq; 3925 + } 3926 + 3927 + /* Replace the old rxq with the new rxq */ 3928 + fec_free_rxq(fep->rx_queue[queue]); 3929 + fep->rx_queue[queue] = rxq; 3930 + fec_init_rxq_bd_buffers(rxq, !!pool); 3931 + txq->xsk_pool = pool; 3932 + 3933 + fec_restart(ndev); 3934 + napi_enable(&fep->napi); 3935 + netif_tx_start_all_queues(ndev); 3936 + 3937 + return 0; 3938 + 3939 + err_alloc_new_rxq: 3940 + napi_enable(&fep->napi); 3941 + netif_tx_start_all_queues(ndev); 3942 + 3943 + return err; 3944 + } 3945 + 3946 + static int fec_enable_xsk_pool(struct fec_enet_private *fep, 3947 + struct xsk_buff_pool *pool, 3948 + u16 queue) 3949 + { 3950 + int err; 3951 + 3952 + err = xsk_pool_dma_map(pool, &fep->pdev->dev, 0); 3953 + if (err) { 3954 + netdev_err(fep->netdev, "Failed to map xsk pool\n"); 3955 + return err; 3956 + } 3957 + 3958 + if (!netif_running(fep->netdev)) { 3959 + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; 3960 + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; 3961 + 3962 + rxq->xsk_pool = pool; 3963 + txq->xsk_pool = pool; 3964 + 3965 + return 0; 3966 + } 3967 + 3968 + err = fec_xsk_restart_napi(fep, pool, queue); 3969 + if (err) { 3970 + xsk_pool_dma_unmap(pool, 0); 3971 + return err; 3972 + } 3973 + 3974 + return 0; 3975 + } 3976 + 3977 + static int fec_disable_xsk_pool(struct fec_enet_private *fep, 3978 + u16 queue) 3979 + { 3980 + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; 3981 + struct xsk_buff_pool *old_pool = txq->xsk_pool; 3982 + int err; 3983 + 3984 + if (!netif_running(fep->netdev)) { 3985 + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; 3986 + 3987 + xsk_pool_dma_unmap(old_pool, 0); 3988 + rxq->xsk_pool = NULL; 3989 + txq->xsk_pool = NULL; 3990 + 3991 + return 0; 3992 + } 3993 + 3994 + err = fec_xsk_restart_napi(fep, NULL, queue); 3995 + if (err) 3996 + return err; 3997 + 3998 + xsk_pool_dma_unmap(old_pool, 0); 3999 + 4000 + return 0; 4001 + } 4002 + 4003 + static int fec_setup_xsk_pool(struct fec_enet_private *fep, 4004 + struct xsk_buff_pool *pool, 4005 + u16 queue) 4006 + { 4007 + if (queue >= fep->num_rx_queues || queue >= fep->num_tx_queues) 4008 + return -ERANGE; 4009 + 4010 + return pool ? fec_enable_xsk_pool(fep, pool, queue) : 4011 + fec_disable_xsk_pool(fep, queue); 4012 + } 4013 + 4413 4014 static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) 4414 4015 { 4415 4016 struct fec_enet_private *fep = netdev_priv(dev); 4416 4017 bool is_run = netif_running(dev); 4417 4018 struct bpf_prog *old_prog; 4418 4019 4020 + /* No need to support the SoCs that require to do the frame swap 4021 + * because the performance wouldn't be better than the skb mode. 4022 + */ 4023 + if (fep->quirks & FEC_QUIRK_SWAP_FRAME) 4024 + return -EOPNOTSUPP; 4025 + 4419 4026 switch (bpf->command) { 4420 4027 case XDP_SETUP_PROG: 4421 - /* No need to support the SoCs that require to 4422 - * do the frame swap because the performance wouldn't be 4423 - * better than the skb mode. 4424 - */ 4425 - if (fep->quirks & FEC_QUIRK_SWAP_FRAME) 4426 - return -EOPNOTSUPP; 4427 - 4428 4028 if (!bpf->prog) 4429 4029 xdp_features_clear_redirect_target(dev); 4430 4030 ··· 4664 3832 xdp_features_set_redirect_target(dev, false); 4665 3833 4666 3834 return 0; 4667 - 4668 3835 case XDP_SETUP_XSK_POOL: 4669 - return -EOPNOTSUPP; 4670 - 3836 + return fec_setup_xsk_pool(fep, bpf->xsk.pool, 3837 + bpf->xsk.queue_id); 4671 3838 default: 4672 3839 return -EOPNOTSUPP; 4673 3840 } 4674 - } 4675 - 4676 - static int 4677 - fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index) 4678 - { 4679 - if (unlikely(index < 0)) 4680 - return 0; 4681 - 4682 - return (index % fep->num_tx_queues); 4683 3841 } 4684 3842 4685 3843 static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, ··· 4757 3935 4758 3936 txq->bd.cur = bdp; 4759 3937 4760 - /* Trigger transmission start */ 4761 - if (!(fep->quirks & FEC_QUIRK_ERR007885) || 4762 - !readl(txq->bd.reg_desc_active) || 4763 - !readl(txq->bd.reg_desc_active) || 4764 - !readl(txq->bd.reg_desc_active) || 4765 - !readl(txq->bd.reg_desc_active)) 4766 - writel(0, txq->bd.reg_desc_active); 4767 - 4768 3938 return 0; 4769 3939 } 4770 3940 4771 3941 static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep, 4772 3942 int cpu, struct xdp_buff *xdp, 4773 - u32 dma_sync_len) 3943 + u32 dma_sync_len, int queue) 4774 3944 { 4775 - struct fec_enet_priv_tx_q *txq; 4776 - struct netdev_queue *nq; 4777 - int queue, ret; 4778 - 4779 - queue = fec_enet_xdp_get_tx_queue(fep, cpu); 4780 - txq = fep->tx_queue[queue]; 4781 - nq = netdev_get_tx_queue(fep->netdev, queue); 3945 + struct netdev_queue *nq = netdev_get_tx_queue(fep->netdev, queue); 3946 + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; 3947 + int ret; 4782 3948 4783 3949 __netif_tx_lock(nq, cpu); 4784 3950 ··· 4806 3996 sent_frames++; 4807 3997 } 4808 3998 3999 + if (sent_frames) 4000 + fec_txq_trigger_xmit(fep, txq); 4001 + 4809 4002 __netif_tx_unlock(nq); 4810 4003 4811 4004 return sent_frames; 4005 + } 4006 + 4007 + static int fec_enet_xsk_wakeup(struct net_device *ndev, u32 queue, u32 flags) 4008 + { 4009 + struct fec_enet_private *fep = netdev_priv(ndev); 4010 + struct fec_enet_priv_rx_q *rxq; 4011 + 4012 + if (!netif_running(ndev) || !netif_carrier_ok(ndev)) 4013 + return -ENETDOWN; 4014 + 4015 + if (queue >= fep->num_rx_queues || queue >= fep->num_tx_queues) 4016 + return -ERANGE; 4017 + 4018 + rxq = fep->rx_queue[queue]; 4019 + if (!rxq->xsk_pool) 4020 + return -EINVAL; 4021 + 4022 + if (!napi_if_scheduled_mark_missed(&fep->napi)) { 4023 + if (likely(napi_schedule_prep(&fep->napi))) 4024 + __napi_schedule(&fep->napi); 4025 + } 4026 + 4027 + return 0; 4812 4028 } 4813 4029 4814 4030 static int fec_hwtstamp_get(struct net_device *ndev, ··· 4899 4063 .ndo_set_features = fec_set_features, 4900 4064 .ndo_bpf = fec_enet_bpf, 4901 4065 .ndo_xdp_xmit = fec_enet_xdp_xmit, 4066 + .ndo_xsk_wakeup = fec_enet_xsk_wakeup, 4902 4067 .ndo_hwtstamp_get = fec_hwtstamp_get, 4903 4068 .ndo_hwtstamp_set = fec_hwtstamp_set, 4904 4069 }; ··· 5027 4190 5028 4191 if (!(fep->quirks & FEC_QUIRK_SWAP_FRAME)) 5029 4192 ndev->xdp_features = NETDEV_XDP_ACT_BASIC | 5030 - NETDEV_XDP_ACT_REDIRECT; 4193 + NETDEV_XDP_ACT_REDIRECT | 4194 + NETDEV_XDP_ACT_XSK_ZEROCOPY; 5031 4195 5032 4196 fec_restart(ndev); 5033 4197 ··· 5430 4592 fep->max_buf_size = PKT_MAXBUF_SIZE; 5431 4593 5432 4594 ndev->max_mtu = fep->max_buf_size - VLAN_ETH_HLEN - ETH_FCS_LEN; 4595 + 4596 + if (fep->quirks & FEC_QUIRK_HAS_RACC) 4597 + fep->rx_shift = 2; 4598 + else 4599 + fep->rx_shift = 0; 5433 4600 5434 4601 ret = register_netdev(ndev); 5435 4602 if (ret)