Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: fec: add initial XDP support

This patch adds the initial XDP support to Freescale driver. It supports
XDP_PASS, XDP_DROP and XDP_REDIRECT actions. Upcoming patches will add
support for XDP_TX and Zero Copy features.

As the patch is rather large, the part of codes to collect the
statistics is separated and will prepare a dedicated patch for that
part.

I just tested with the application of xdpsock.
-- Native here means running command of "xdpsock -i eth0"
-- SKB-Mode means running command of "xdpsock -S -i eth0"

The following are the testing result relating to XDP mode:

root@imx8qxpc0mek:~/bpf# ./xdpsock -i eth0
sock0@eth0:0 rxdrop xdp-drv
pps pkts 1.00
rx 371347 2717794
tx 0 0

root@imx8qxpc0mek:~/bpf# ./xdpsock -S -i eth0
sock0@eth0:0 rxdrop xdp-skb
pps pkts 1.00
rx 202229 404528
tx 0 0

root@imx8qxpc0mek:~/bpf# ./xdp2 eth0
proto 0: 496708 pkt/s
proto 0: 505469 pkt/s
proto 0: 505283 pkt/s
proto 0: 505443 pkt/s
proto 0: 505465 pkt/s

root@imx8qxpc0mek:~/bpf# ./xdp2 -S eth0
proto 0: 0 pkt/s
proto 17: 118778 pkt/s
proto 17: 118989 pkt/s
proto 0: 1 pkt/s
proto 17: 118987 pkt/s
proto 0: 0 pkt/s
proto 17: 118943 pkt/s
proto 17: 118976 pkt/s
proto 0: 1 pkt/s
proto 17: 119006 pkt/s
proto 0: 0 pkt/s
proto 17: 119071 pkt/s
proto 17: 119092 pkt/s

Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20221031185350.2045675-1-shenwei.wang@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Shenwei Wang and committed by
Paolo Abeni
6d6b39f1 598d2982

+226 -2
+3 -1
drivers/net/ethernet/freescale/fec.h
··· 348 348 */ 349 349 350 350 #define FEC_ENET_XDP_HEADROOM (XDP_PACKET_HEADROOM) 351 - 352 351 #define FEC_ENET_RX_PAGES 256 353 352 #define FEC_ENET_RX_FRSIZE (PAGE_SIZE - FEC_ENET_XDP_HEADROOM \ 354 353 - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) ··· 661 662 u64 perout_stime; 662 663 663 664 struct imx_sc_ipc *ipc_handle; 665 + 666 + /* XDP BPF Program */ 667 + struct bpf_prog *xdp_prog; 664 668 665 669 u64 ethtool_stats[]; 666 670 };
+223 -1
drivers/net/ethernet/freescale/fec_main.c
··· 89 89 #define FEC_ENET_OPD_V 0xFFF0 90 90 #define FEC_MDIO_PM_TIMEOUT 100 /* ms */ 91 91 92 + #define FEC_ENET_XDP_PASS 0 93 + #define FEC_ENET_XDP_CONSUMED BIT(0) 94 + #define FEC_ENET_XDP_TX BIT(1) 95 + #define FEC_ENET_XDP_REDIR BIT(2) 96 + 92 97 struct fec_devinfo { 93 98 u32 quirks; 94 99 }; ··· 423 418 fec_enet_create_page_pool(struct fec_enet_private *fep, 424 419 struct fec_enet_priv_rx_q *rxq, int size) 425 420 { 421 + struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); 426 422 struct page_pool_params pp_params = { 427 423 .order = 0, 428 424 .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, 429 425 .pool_size = size, 430 426 .nid = dev_to_node(&fep->pdev->dev), 431 427 .dev = &fep->pdev->dev, 432 - .dma_dir = DMA_FROM_DEVICE, 428 + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, 433 429 .offset = FEC_ENET_XDP_HEADROOM, 434 430 .max_len = FEC_ENET_RX_FRSIZE, 435 431 }; ··· 1505 1499 bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); 1506 1500 } 1507 1501 1502 + static u32 1503 + fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog, 1504 + struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int index) 1505 + { 1506 + unsigned int sync, len = xdp->data_end - xdp->data; 1507 + u32 ret = FEC_ENET_XDP_PASS; 1508 + struct page *page; 1509 + int err; 1510 + u32 act; 1511 + 1512 + act = bpf_prog_run_xdp(prog, xdp); 1513 + 1514 + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ 1515 + sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM; 1516 + sync = max(sync, len); 1517 + 1518 + switch (act) { 1519 + case XDP_PASS: 1520 + ret = FEC_ENET_XDP_PASS; 1521 + break; 1522 + 1523 + case XDP_REDIRECT: 1524 + err = xdp_do_redirect(fep->netdev, xdp, prog); 1525 + if (!err) { 1526 + ret = FEC_ENET_XDP_REDIR; 1527 + } else { 1528 + ret = FEC_ENET_XDP_CONSUMED; 1529 + page = virt_to_head_page(xdp->data); 1530 + page_pool_put_page(rxq->page_pool, page, sync, true); 1531 + } 1532 + break; 1533 + 1534 + default: 1535 + bpf_warn_invalid_xdp_action(fep->netdev, prog, act); 1536 + fallthrough; 1537 + 1538 + case XDP_TX: 1539 + bpf_warn_invalid_xdp_action(fep->netdev, prog, act); 1540 + fallthrough; 1541 + 1542 + case XDP_ABORTED: 1543 + fallthrough; /* handle aborts by dropping packet */ 1544 + 1545 + case XDP_DROP: 1546 + ret = FEC_ENET_XDP_CONSUMED; 1547 + page = virt_to_head_page(xdp->data); 1548 + page_pool_put_page(rxq->page_pool, page, sync, true); 1549 + break; 1550 + } 1551 + 1552 + return ret; 1553 + } 1554 + 1508 1555 /* During a receive, the bd_rx.cur points to the current incoming buffer. 1509 1556 * When we update through the ring, if the next incoming buffer has 1510 1557 * not been given to the system, we just set the empty indicator, ··· 1579 1520 u16 vlan_tag; 1580 1521 int index = 0; 1581 1522 bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME; 1523 + struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); 1524 + u32 ret, xdp_result = FEC_ENET_XDP_PASS; 1525 + struct xdp_buff xdp; 1582 1526 struct page *page; 1583 1527 1584 1528 #ifdef CONFIG_M532x ··· 1593 1531 * These get messed up if we get called due to a busy condition. 1594 1532 */ 1595 1533 bdp = rxq->bd.cur; 1534 + xdp_init_buff(&xdp, PAGE_SIZE, &rxq->xdp_rxq); 1596 1535 1597 1536 while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { 1598 1537 ··· 1642 1579 DMA_FROM_DEVICE); 1643 1580 prefetch(page_address(page)); 1644 1581 fec_enet_update_cbd(rxq, bdp, index); 1582 + 1583 + if (xdp_prog) { 1584 + xdp_buff_clear_frags_flag(&xdp); 1585 + xdp_prepare_buff(&xdp, page_address(page), 1586 + FEC_ENET_XDP_HEADROOM, pkt_len, false); 1587 + 1588 + ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, index); 1589 + xdp_result |= ret; 1590 + if (ret != FEC_ENET_XDP_PASS) 1591 + goto rx_processing_done; 1592 + } 1645 1593 1646 1594 /* The packet length includes FCS, but we don't want to 1647 1595 * include that when passing upstream as it messes up ··· 1749 1675 writel(0, rxq->bd.reg_desc_active); 1750 1676 } 1751 1677 rxq->bd.cur = bdp; 1678 + 1679 + if (xdp_result & FEC_ENET_XDP_REDIR) 1680 + xdp_do_flush_map(); 1681 + 1752 1682 return pkt_received; 1753 1683 } 1754 1684 ··· 3596 3518 return fec_enet_vlan_pri_to_queue[vlan_tag >> 13]; 3597 3519 } 3598 3520 3521 + static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) 3522 + { 3523 + struct fec_enet_private *fep = netdev_priv(dev); 3524 + bool is_run = netif_running(dev); 3525 + struct bpf_prog *old_prog; 3526 + 3527 + switch (bpf->command) { 3528 + case XDP_SETUP_PROG: 3529 + if (is_run) { 3530 + napi_disable(&fep->napi); 3531 + netif_tx_disable(dev); 3532 + } 3533 + 3534 + old_prog = xchg(&fep->xdp_prog, bpf->prog); 3535 + fec_restart(dev); 3536 + 3537 + if (is_run) { 3538 + napi_enable(&fep->napi); 3539 + netif_tx_start_all_queues(dev); 3540 + } 3541 + 3542 + if (old_prog) 3543 + bpf_prog_put(old_prog); 3544 + 3545 + return 0; 3546 + 3547 + case XDP_SETUP_XSK_POOL: 3548 + return -EOPNOTSUPP; 3549 + 3550 + default: 3551 + return -EOPNOTSUPP; 3552 + } 3553 + } 3554 + 3555 + static int 3556 + fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int cpu) 3557 + { 3558 + int index = cpu; 3559 + 3560 + if (unlikely(index < 0)) 3561 + index = 0; 3562 + 3563 + while (index >= fep->num_tx_queues) 3564 + index -= fep->num_tx_queues; 3565 + 3566 + return index; 3567 + } 3568 + 3569 + static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, 3570 + struct fec_enet_priv_tx_q *txq, 3571 + struct xdp_frame *frame) 3572 + { 3573 + unsigned int index, status, estatus; 3574 + struct bufdesc *bdp, *last_bdp; 3575 + dma_addr_t dma_addr; 3576 + int entries_free; 3577 + 3578 + entries_free = fec_enet_get_free_txdesc_num(txq); 3579 + if (entries_free < MAX_SKB_FRAGS + 1) { 3580 + netdev_err(fep->netdev, "NOT enough BD for SG!\n"); 3581 + return NETDEV_TX_OK; 3582 + } 3583 + 3584 + /* Fill in a Tx ring entry */ 3585 + bdp = txq->bd.cur; 3586 + last_bdp = bdp; 3587 + status = fec16_to_cpu(bdp->cbd_sc); 3588 + status &= ~BD_ENET_TX_STATS; 3589 + 3590 + index = fec_enet_get_bd_index(bdp, &txq->bd); 3591 + 3592 + dma_addr = dma_map_single(&fep->pdev->dev, frame->data, 3593 + frame->len, DMA_TO_DEVICE); 3594 + if (dma_mapping_error(&fep->pdev->dev, dma_addr)) 3595 + return FEC_ENET_XDP_CONSUMED; 3596 + 3597 + status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); 3598 + if (fep->bufdesc_ex) 3599 + estatus = BD_ENET_TX_INT; 3600 + 3601 + bdp->cbd_bufaddr = cpu_to_fec32(dma_addr); 3602 + bdp->cbd_datlen = cpu_to_fec16(frame->len); 3603 + 3604 + if (fep->bufdesc_ex) { 3605 + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; 3606 + 3607 + if (fep->quirks & FEC_QUIRK_HAS_AVB) 3608 + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); 3609 + 3610 + ebdp->cbd_bdu = 0; 3611 + ebdp->cbd_esc = cpu_to_fec32(estatus); 3612 + } 3613 + 3614 + index = fec_enet_get_bd_index(last_bdp, &txq->bd); 3615 + txq->tx_skbuff[index] = NULL; 3616 + 3617 + /* Send it on its way. Tell FEC it's ready, interrupt when done, 3618 + * it's the last BD of the frame, and to put the CRC on the end. 3619 + */ 3620 + status |= (BD_ENET_TX_READY | BD_ENET_TX_TC); 3621 + bdp->cbd_sc = cpu_to_fec16(status); 3622 + 3623 + /* If this was the last BD in the ring, start at the beginning again. */ 3624 + bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd); 3625 + 3626 + txq->bd.cur = bdp; 3627 + 3628 + return 0; 3629 + } 3630 + 3631 + static int fec_enet_xdp_xmit(struct net_device *dev, 3632 + int num_frames, 3633 + struct xdp_frame **frames, 3634 + u32 flags) 3635 + { 3636 + struct fec_enet_private *fep = netdev_priv(dev); 3637 + struct fec_enet_priv_tx_q *txq; 3638 + int cpu = smp_processor_id(); 3639 + struct netdev_queue *nq; 3640 + unsigned int queue; 3641 + int i; 3642 + 3643 + queue = fec_enet_xdp_get_tx_queue(fep, cpu); 3644 + txq = fep->tx_queue[queue]; 3645 + nq = netdev_get_tx_queue(fep->netdev, queue); 3646 + 3647 + __netif_tx_lock(nq, cpu); 3648 + 3649 + for (i = 0; i < num_frames; i++) 3650 + fec_enet_txq_xmit_frame(fep, txq, frames[i]); 3651 + 3652 + /* Make sure the update to bdp and tx_skbuff are performed. */ 3653 + wmb(); 3654 + 3655 + /* Trigger transmission start */ 3656 + writel(0, txq->bd.reg_desc_active); 3657 + 3658 + __netif_tx_unlock(nq); 3659 + 3660 + return num_frames; 3661 + } 3662 + 3599 3663 static const struct net_device_ops fec_netdev_ops = { 3600 3664 .ndo_open = fec_enet_open, 3601 3665 .ndo_stop = fec_enet_close, ··· 3752 3532 .ndo_poll_controller = fec_poll_controller, 3753 3533 #endif 3754 3534 .ndo_set_features = fec_set_features, 3535 + .ndo_bpf = fec_enet_bpf, 3536 + .ndo_xdp_xmit = fec_enet_xdp_xmit, 3755 3537 }; 3756 3538 3757 3539 static const unsigned short offset_des_active_rxq[] = {