Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'xdp-ice-mbuf'

Maciej Fijalkowski says:

====================
Although this work started as an effort to add multi-buffer XDP support
to ice driver, as usual it turned out that some other side stuff needed
to be addressed, so let me give you an overview.

First patch adjusts legacy-rx in a way that it will be possible to refer
to skb_shared_info being at the end of the buffer when gathering up
frame fragments within xdp_buff.

Then, patches 2-9 prepare ice driver in a way that actual multi-buffer
patches will be easier to swallow.

10 and 11 are the meat. What is worth mentioning is that this set
actually *fixes* things as patch 11 removes the logic based on
next_dd/rs and we previously stepped away from this for ice_xmit_zc().
Currently, AF_XDP ZC XDP_TX workload is off as there are two cleaning
sides that can be triggered and two of them work on different internal
logic. This set unifies that and allows us to improve the performance by
2x with a trick on the last (13) patch.

12th is a simple cleanup of no longer fields from Tx ring.

I might be wrong but I have not seen anyone reporting performance impact
among patches that add XDP multi-buffer support to a particular driver.
Numbers below were gathered via xdp_rxq_info and xdp_redirect_map on
1500 MTU:

XDP_DROP +1%
XDP_PASS -1,2%
XDP_TX -0,5%
XDP_REDIRECT -3,3%

Cherry on top, which is not directly related to mbuf support (last
patch):

XDP_TX ZC +126%

Target the we agreed on was to not degrade performance for any action by
anything that would be over 5%, so our goal was met. Basically this set
keeps the performance where it was. Redirect is slower due to more
frequent tail bumps.
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>

+645 -432
+12 -9
drivers/net/ethernet/intel/ice/ice_base.c
··· 355 355 { 356 356 if (ice_ring_uses_build_skb(rx_ring)) 357 357 return ICE_SKB_PAD; 358 - else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) 359 - return XDP_PACKET_HEADROOM; 360 - 361 358 return 0; 362 359 } 363 360 ··· 492 495 int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) 493 496 { 494 497 struct device *dev = ice_pf_to_dev(ring->vsi->back); 495 - u16 num_bufs = ICE_DESC_UNUSED(ring); 498 + u32 num_bufs = ICE_RX_DESC_UNUSED(ring); 496 499 int err; 497 500 498 501 ring->rx_buf_len = ring->vsi->rx_buf_len; ··· 500 503 if (ring->vsi->type == ICE_VSI_PF) { 501 504 if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) 502 505 /* coverity[check_return] */ 503 - xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 504 - ring->q_index, ring->q_vector->napi.napi_id); 506 + __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, 507 + ring->q_index, 508 + ring->q_vector->napi.napi_id, 509 + ring->vsi->rx_buf_len); 505 510 506 511 ring->xsk_pool = ice_xsk_pool(ring); 507 512 if (ring->xsk_pool) { ··· 523 524 } else { 524 525 if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) 525 526 /* coverity[check_return] */ 526 - xdp_rxq_info_reg(&ring->xdp_rxq, 527 - ring->netdev, 528 - ring->q_index, ring->q_vector->napi.napi_id); 527 + __xdp_rxq_info_reg(&ring->xdp_rxq, 528 + ring->netdev, 529 + ring->q_index, 530 + ring->q_vector->napi.napi_id, 531 + ring->vsi->rx_buf_len); 529 532 530 533 err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 531 534 MEM_TYPE_PAGE_SHARED, ··· 537 536 } 538 537 } 539 538 539 + xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); 540 + ring->xdp.data = NULL; 540 541 err = ice_setup_rx_ctx(ring); 541 542 if (err) { 542 543 dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
+1 -3
drivers/net/ethernet/intel/ice/ice_ethtool.c
··· 3046 3046 /* clone ring and setup updated count */ 3047 3047 xdp_rings[i] = *vsi->xdp_rings[i]; 3048 3048 xdp_rings[i].count = new_tx_cnt; 3049 - xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1; 3050 - xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1; 3051 3049 xdp_rings[i].desc = NULL; 3052 3050 xdp_rings[i].tx_buf = NULL; 3053 3051 err = ice_setup_tx_ring(&xdp_rings[i]); ··· 3090 3092 3091 3093 /* allocate Rx buffers */ 3092 3094 err = ice_alloc_rx_bufs(&rx_rings[i], 3093 - ICE_DESC_UNUSED(&rx_rings[i])); 3095 + ICE_RX_DESC_UNUSED(&rx_rings[i])); 3094 3096 rx_unwind: 3095 3097 if (err) { 3096 3098 while (i) {
+2 -6
drivers/net/ethernet/intel/ice/ice_lib.c
··· 1992 1992 void ice_vsi_cfg_frame_size(struct ice_vsi *vsi) 1993 1993 { 1994 1994 if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) { 1995 - vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; 1996 - vsi->rx_buf_len = ICE_RXBUF_2048; 1995 + vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX; 1996 + vsi->rx_buf_len = ICE_RXBUF_1664; 1997 1997 #if (PAGE_SIZE < 8192) 1998 1998 } else if (!ICE_2K_TOO_SMALL_WITH_PADDING && 1999 1999 (vsi->netdev->mtu <= ETH_DATA_LEN)) { ··· 2002 2002 #endif 2003 2003 } else { 2004 2004 vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; 2005 - #if (PAGE_SIZE < 8192) 2006 2005 vsi->rx_buf_len = ICE_RXBUF_3072; 2007 - #else 2008 - vsi->rx_buf_len = ICE_RXBUF_2048; 2009 - #endif 2010 2006 } 2011 2007 } 2012 2008
+28 -19
drivers/net/ethernet/intel/ice/ice_main.c
··· 2570 2570 xdp_ring->netdev = NULL; 2571 2571 xdp_ring->dev = dev; 2572 2572 xdp_ring->count = vsi->num_tx_desc; 2573 - xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1; 2574 - xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1; 2575 2573 WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); 2576 2574 if (ice_setup_tx_ring(xdp_ring)) 2577 2575 goto free_xdp_rings; ··· 2861 2863 } 2862 2864 2863 2865 /** 2866 + * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP 2867 + * @vsi: Pointer to VSI structure 2868 + */ 2869 + static int ice_max_xdp_frame_size(struct ice_vsi *vsi) 2870 + { 2871 + if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) 2872 + return ICE_RXBUF_1664; 2873 + else 2874 + return ICE_RXBUF_3072; 2875 + } 2876 + 2877 + /** 2864 2878 * ice_xdp_setup_prog - Add or remove XDP eBPF program 2865 2879 * @vsi: VSI to setup XDP for 2866 2880 * @prog: XDP program ··· 2882 2872 ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, 2883 2873 struct netlink_ext_ack *extack) 2884 2874 { 2885 - int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; 2875 + unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; 2886 2876 bool if_running = netif_running(vsi->netdev); 2887 2877 int ret = 0, xdp_ring_err = 0; 2888 2878 2889 - if (frame_size > vsi->rx_buf_len) { 2890 - NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP"); 2891 - return -EOPNOTSUPP; 2879 + if (prog && !prog->aux->xdp_has_frags) { 2880 + if (frame_size > ice_max_xdp_frame_size(vsi)) { 2881 + NL_SET_ERR_MSG_MOD(extack, 2882 + "MTU is too large for linear frames and XDP prog does not support frags"); 2883 + return -EOPNOTSUPP; 2884 + } 2892 2885 } 2893 2886 2894 2887 /* need to stop netdev while setting up the program for Rx rings */ ··· 7344 7331 } 7345 7332 7346 7333 /** 7347 - * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP 7348 - * @vsi: Pointer to VSI structure 7349 - */ 7350 - static int ice_max_xdp_frame_size(struct ice_vsi *vsi) 7351 - { 7352 - if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) 7353 - return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM; 7354 - else 7355 - return ICE_RXBUF_3072; 7356 - } 7357 - 7358 - /** 7359 7334 * ice_change_mtu - NDO callback to change the MTU 7360 7335 * @netdev: network interface device structure 7361 7336 * @new_mtu: new value for maximum frame size ··· 7355 7354 struct ice_netdev_priv *np = netdev_priv(netdev); 7356 7355 struct ice_vsi *vsi = np->vsi; 7357 7356 struct ice_pf *pf = vsi->back; 7357 + struct bpf_prog *prog; 7358 7358 u8 count = 0; 7359 7359 int err = 0; 7360 7360 ··· 7364 7362 return 0; 7365 7363 } 7366 7364 7367 - if (ice_is_xdp_ena_vsi(vsi)) { 7365 + prog = vsi->xdp_prog; 7366 + if (prog && !prog->aux->xdp_has_frags) { 7368 7367 int frame_size = ice_max_xdp_frame_size(vsi); 7369 7368 7370 7369 if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) { 7371 7370 netdev_err(netdev, "max MTU for XDP usage is %d\n", 7372 7371 frame_size - ICE_ETH_PKT_HDR_PAD); 7372 + return -EINVAL; 7373 + } 7374 + } else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) { 7375 + if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) { 7376 + netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n", 7377 + ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD); 7373 7378 return -EINVAL; 7374 7379 } 7375 7380 }
+227 -197
drivers/net/ethernet/intel/ice/ice_txrx.c
··· 113 113 ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) 114 114 { 115 115 if (tx_buf->skb) { 116 - if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) 116 + if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) { 117 117 devm_kfree(ring->dev, tx_buf->raw_buf); 118 - else if (ice_ring_is_xdp(ring)) 119 - page_frag_free(tx_buf->raw_buf); 120 - else 118 + } else if (ice_ring_is_xdp(ring)) { 119 + if (ring->xsk_pool) 120 + xsk_buff_free(tx_buf->xdp); 121 + else 122 + page_frag_free(tx_buf->raw_buf); 123 + } else { 121 124 dev_kfree_skb_any(tx_buf->skb); 125 + } 122 126 if (dma_unmap_len(tx_buf, len)) 123 127 dma_unmap_single(ring->dev, 124 128 dma_unmap_addr(tx_buf, dma), ··· 178 174 179 175 tx_ring->next_to_use = 0; 180 176 tx_ring->next_to_clean = 0; 181 - tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1; 182 - tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1; 183 177 184 178 if (!tx_ring->netdev) 185 179 return; ··· 384 382 */ 385 383 void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) 386 384 { 385 + struct xdp_buff *xdp = &rx_ring->xdp; 387 386 struct device *dev = rx_ring->dev; 388 387 u32 size; 389 388 u16 i; ··· 393 390 if (!rx_ring->rx_buf) 394 391 return; 395 392 396 - if (rx_ring->skb) { 397 - dev_kfree_skb(rx_ring->skb); 398 - rx_ring->skb = NULL; 399 - } 400 - 401 393 if (rx_ring->xsk_pool) { 402 394 ice_xsk_clean_rx_ring(rx_ring); 403 395 goto rx_skip_free; 396 + } 397 + 398 + if (xdp->data) { 399 + xdp_return_buff(xdp); 400 + xdp->data = NULL; 404 401 } 405 402 406 403 /* Free all the Rx ring sk_buffs */ ··· 440 437 441 438 rx_ring->next_to_alloc = 0; 442 439 rx_ring->next_to_clean = 0; 440 + rx_ring->first_desc = 0; 443 441 rx_ring->next_to_use = 0; 444 442 } 445 443 ··· 510 506 511 507 rx_ring->next_to_use = 0; 512 508 rx_ring->next_to_clean = 0; 509 + rx_ring->first_desc = 0; 513 510 514 511 if (ice_is_xdp_ena_vsi(rx_ring->vsi)) 515 512 WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); ··· 528 523 return -ENOMEM; 529 524 } 530 525 526 + /** 527 + * ice_rx_frame_truesize 528 + * @rx_ring: ptr to Rx ring 529 + * @size: size 530 + * 531 + * calculate the truesize with taking into the account PAGE_SIZE of 532 + * underlying arch 533 + */ 531 534 static unsigned int 532 - ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size) 535 + ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) 533 536 { 534 537 unsigned int truesize; 535 538 ··· 558 545 * @xdp: xdp_buff used as input to the XDP program 559 546 * @xdp_prog: XDP program to run 560 547 * @xdp_ring: ring to be used for XDP_TX action 548 + * @rx_buf: Rx buffer to store the XDP action 561 549 * 562 550 * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} 563 551 */ 564 - static int 552 + static void 565 553 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, 566 - struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) 554 + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, 555 + struct ice_rx_buf *rx_buf) 567 556 { 568 - int err; 557 + unsigned int ret = ICE_XDP_PASS; 569 558 u32 act; 559 + 560 + if (!xdp_prog) 561 + goto exit; 570 562 571 563 act = bpf_prog_run_xdp(xdp_prog, xdp); 572 564 switch (act) { 573 565 case XDP_PASS: 574 - return ICE_XDP_PASS; 566 + break; 575 567 case XDP_TX: 576 568 if (static_branch_unlikely(&ice_xdp_locking_key)) 577 569 spin_lock(&xdp_ring->tx_lock); 578 - err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); 570 + ret = __ice_xmit_xdp_ring(xdp, xdp_ring); 579 571 if (static_branch_unlikely(&ice_xdp_locking_key)) 580 572 spin_unlock(&xdp_ring->tx_lock); 581 - if (err == ICE_XDP_CONSUMED) 573 + if (ret == ICE_XDP_CONSUMED) 582 574 goto out_failure; 583 - return err; 575 + break; 584 576 case XDP_REDIRECT: 585 - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); 586 - if (err) 577 + if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog)) 587 578 goto out_failure; 588 - return ICE_XDP_REDIR; 579 + ret = ICE_XDP_REDIR; 580 + break; 589 581 default: 590 582 bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); 591 583 fallthrough; ··· 599 581 trace_xdp_exception(rx_ring->netdev, xdp_prog, act); 600 582 fallthrough; 601 583 case XDP_DROP: 602 - return ICE_XDP_CONSUMED; 584 + ret = ICE_XDP_CONSUMED; 603 585 } 586 + exit: 587 + rx_buf->act = ret; 588 + if (unlikely(xdp_buff_has_frags(xdp))) 589 + ice_set_rx_bufs_act(xdp, rx_ring, ret); 604 590 } 605 591 606 592 /** ··· 627 605 unsigned int queue_index = smp_processor_id(); 628 606 struct ice_vsi *vsi = np->vsi; 629 607 struct ice_tx_ring *xdp_ring; 608 + struct ice_tx_buf *tx_buf; 630 609 int nxmit = 0, i; 631 610 632 611 if (test_bit(ICE_VSI_DOWN, vsi->state)) ··· 650 627 xdp_ring = vsi->xdp_rings[queue_index]; 651 628 } 652 629 630 + tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; 653 631 for (i = 0; i < n; i++) { 654 632 struct xdp_frame *xdpf = frames[i]; 655 633 int err; 656 634 657 - err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); 635 + err = ice_xmit_xdp_ring(xdpf, xdp_ring); 658 636 if (err != ICE_XDP_TX) 659 637 break; 660 638 nxmit++; 661 639 } 662 640 641 + tx_buf->rs_idx = ice_set_rs_bit(xdp_ring); 663 642 if (unlikely(flags & XDP_XMIT_FLUSH)) 664 643 ice_xdp_ring_update_tail(xdp_ring); 665 644 ··· 731 706 * buffers. Then bump tail at most one time. Grouping like this lets us avoid 732 707 * multiple tail writes per call. 733 708 */ 734 - bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count) 709 + bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) 735 710 { 736 711 union ice_32b_rx_flex_desc *rx_desc; 737 712 u16 ntu = rx_ring->next_to_use; ··· 808 783 /** 809 784 * ice_can_reuse_rx_page - Determine if page can be reused for another Rx 810 785 * @rx_buf: buffer containing the page 811 - * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call 812 786 * 813 787 * If page is reusable, we have a green light for calling ice_reuse_rx_page, 814 788 * which will assign the current buffer to the buffer that next_to_alloc is ··· 815 791 * page freed 816 792 */ 817 793 static bool 818 - ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) 794 + ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) 819 795 { 820 796 unsigned int pagecnt_bias = rx_buf->pagecnt_bias; 821 797 struct page *page = rx_buf->page; ··· 826 802 827 803 #if (PAGE_SIZE < 8192) 828 804 /* if we are only owner of page we can reuse it */ 829 - if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) 805 + if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) 830 806 return false; 831 807 #else 832 808 #define ICE_LAST_OFFSET \ ··· 848 824 } 849 825 850 826 /** 851 - * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag 827 + * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag 852 828 * @rx_ring: Rx descriptor ring to transact packets on 829 + * @xdp: xdp buff to place the data into 853 830 * @rx_buf: buffer containing page to add 854 - * @skb: sk_buff to place the data into 855 831 * @size: packet length from rx_desc 856 832 * 857 - * This function will add the data contained in rx_buf->page to the skb. 858 - * It will just attach the page as a frag to the skb. 859 - * The function will then update the page offset. 833 + * This function will add the data contained in rx_buf->page to the xdp buf. 834 + * It will just attach the page as a frag. 860 835 */ 861 - static void 862 - ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, 863 - struct sk_buff *skb, unsigned int size) 836 + static int 837 + ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, 838 + struct ice_rx_buf *rx_buf, const unsigned int size) 864 839 { 865 - #if (PAGE_SIZE >= 8192) 866 - unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset); 867 - #else 868 - unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; 869 - #endif 840 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 870 841 871 842 if (!size) 872 - return; 873 - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, 874 - rx_buf->page_offset, size, truesize); 843 + return 0; 875 844 876 - /* page is being used so we must update the page offset */ 877 - ice_rx_buf_adjust_pg_offset(rx_buf, truesize); 845 + if (!xdp_buff_has_frags(xdp)) { 846 + sinfo->nr_frags = 0; 847 + sinfo->xdp_frags_size = 0; 848 + xdp_buff_set_frags_flag(xdp); 849 + } 850 + 851 + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { 852 + if (unlikely(xdp_buff_has_frags(xdp))) 853 + ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); 854 + return -ENOMEM; 855 + } 856 + 857 + __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, 858 + rx_buf->page_offset, size); 859 + sinfo->xdp_frags_size += size; 860 + 861 + if (page_is_pfmemalloc(rx_buf->page)) 862 + xdp_buff_set_frag_pfmemalloc(xdp); 863 + 864 + return 0; 878 865 } 879 866 880 867 /** ··· 921 886 * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use 922 887 * @rx_ring: Rx descriptor ring to transact packets on 923 888 * @size: size of buffer to add to skb 924 - * @rx_buf_pgcnt: rx_buf page refcount 925 889 * 926 890 * This function will pull an Rx buffer from the ring and synchronize it 927 891 * for use by the CPU. 928 892 */ 929 893 static struct ice_rx_buf * 930 894 ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, 931 - int *rx_buf_pgcnt) 895 + const unsigned int ntc) 932 896 { 933 897 struct ice_rx_buf *rx_buf; 934 898 935 - rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; 936 - *rx_buf_pgcnt = 899 + rx_buf = &rx_ring->rx_buf[ntc]; 900 + rx_buf->pgcnt = 937 901 #if (PAGE_SIZE < 8192) 938 902 page_count(rx_buf->page); 939 903 #else ··· 956 922 /** 957 923 * ice_build_skb - Build skb around an existing buffer 958 924 * @rx_ring: Rx descriptor ring to transact packets on 959 - * @rx_buf: Rx buffer to pull data from 960 925 * @xdp: xdp_buff pointing to the data 961 926 * 962 - * This function builds an skb around an existing Rx buffer, taking care 963 - * to set up the skb correctly and avoid any memcpy overhead. 927 + * This function builds an skb around an existing XDP buffer, taking care 928 + * to set up the skb correctly and avoid any memcpy overhead. Driver has 929 + * already combined frags (if any) to skb_shared_info. 964 930 */ 965 931 static struct sk_buff * 966 - ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, 967 - struct xdp_buff *xdp) 932 + ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) 968 933 { 969 934 u8 metasize = xdp->data - xdp->data_meta; 970 - #if (PAGE_SIZE < 8192) 971 - unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; 972 - #else 973 - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 974 - SKB_DATA_ALIGN(xdp->data_end - 975 - xdp->data_hard_start); 976 - #endif 935 + struct skb_shared_info *sinfo = NULL; 936 + unsigned int nr_frags; 977 937 struct sk_buff *skb; 938 + 939 + if (unlikely(xdp_buff_has_frags(xdp))) { 940 + sinfo = xdp_get_shared_info_from_buff(xdp); 941 + nr_frags = sinfo->nr_frags; 942 + } 978 943 979 944 /* Prefetch first cache line of first page. If xdp->data_meta 980 945 * is unused, this points exactly as xdp->data, otherwise we ··· 982 949 */ 983 950 net_prefetch(xdp->data_meta); 984 951 /* build an skb around the page buffer */ 985 - skb = napi_build_skb(xdp->data_hard_start, truesize); 952 + skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); 986 953 if (unlikely(!skb)) 987 954 return NULL; 988 955 ··· 997 964 if (metasize) 998 965 skb_metadata_set(skb, metasize); 999 966 1000 - /* buffer is used by skb, update page_offset */ 1001 - ice_rx_buf_adjust_pg_offset(rx_buf, truesize); 967 + if (unlikely(xdp_buff_has_frags(xdp))) 968 + xdp_update_skb_shared_info(skb, nr_frags, 969 + sinfo->xdp_frags_size, 970 + nr_frags * xdp->frame_sz, 971 + xdp_buff_is_frag_pfmemalloc(xdp)); 1002 972 1003 973 return skb; 1004 974 } ··· 1017 981 * skb correctly. 1018 982 */ 1019 983 static struct sk_buff * 1020 - ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, 1021 - struct xdp_buff *xdp) 984 + ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) 1022 985 { 1023 - unsigned int metasize = xdp->data - xdp->data_meta; 1024 986 unsigned int size = xdp->data_end - xdp->data; 987 + struct skb_shared_info *sinfo = NULL; 988 + struct ice_rx_buf *rx_buf; 989 + unsigned int nr_frags = 0; 1025 990 unsigned int headlen; 1026 991 struct sk_buff *skb; 1027 992 1028 993 /* prefetch first cache line of first page */ 1029 - net_prefetch(xdp->data_meta); 994 + net_prefetch(xdp->data); 995 + 996 + if (unlikely(xdp_buff_has_frags(xdp))) { 997 + sinfo = xdp_get_shared_info_from_buff(xdp); 998 + nr_frags = sinfo->nr_frags; 999 + } 1030 1000 1031 1001 /* allocate a skb to store the frags */ 1032 - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, 1033 - ICE_RX_HDR_SIZE + metasize, 1002 + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, 1034 1003 GFP_ATOMIC | __GFP_NOWARN); 1035 1004 if (unlikely(!skb)) 1036 1005 return NULL; 1037 1006 1007 + rx_buf = &rx_ring->rx_buf[rx_ring->first_desc]; 1038 1008 skb_record_rx_queue(skb, rx_ring->q_index); 1039 1009 /* Determine available headroom for copy */ 1040 1010 headlen = size; ··· 1048 1006 headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); 1049 1007 1050 1008 /* align pull length to size of long to optimize memcpy performance */ 1051 - memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, 1052 - ALIGN(headlen + metasize, sizeof(long))); 1053 - 1054 - if (metasize) { 1055 - skb_metadata_set(skb, metasize); 1056 - __skb_pull(skb, metasize); 1057 - } 1009 + memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, 1010 + sizeof(long))); 1058 1011 1059 1012 /* if we exhaust the linear part then add what is left as a frag */ 1060 1013 size -= headlen; 1061 1014 if (size) { 1062 - #if (PAGE_SIZE >= 8192) 1063 - unsigned int truesize = SKB_DATA_ALIGN(size); 1064 - #else 1065 - unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; 1066 - #endif 1067 - skb_add_rx_frag(skb, 0, rx_buf->page, 1068 - rx_buf->page_offset + headlen, size, truesize); 1069 - /* buffer is used by skb, update page_offset */ 1070 - ice_rx_buf_adjust_pg_offset(rx_buf, truesize); 1071 - } else { 1072 - /* buffer is unused, reset bias back to rx_buf; data was copied 1073 - * onto skb's linear part so there's no need for adjusting 1074 - * page offset and we can reuse this buffer as-is 1015 + /* besides adding here a partial frag, we are going to add 1016 + * frags from xdp_buff, make sure there is enough space for 1017 + * them 1075 1018 */ 1076 - rx_buf->pagecnt_bias++; 1019 + if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) { 1020 + dev_kfree_skb(skb); 1021 + return NULL; 1022 + } 1023 + skb_add_rx_frag(skb, 0, rx_buf->page, 1024 + rx_buf->page_offset + headlen, size, 1025 + xdp->frame_sz); 1026 + } else { 1027 + /* buffer is unused, change the act that should be taken later 1028 + * on; data was copied onto skb's linear part so there's no 1029 + * need for adjusting page offset and we can reuse this buffer 1030 + * as-is 1031 + */ 1032 + rx_buf->act = ICE_SKB_CONSUMED; 1033 + } 1034 + 1035 + if (unlikely(xdp_buff_has_frags(xdp))) { 1036 + struct skb_shared_info *skinfo = skb_shinfo(skb); 1037 + 1038 + memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], 1039 + sizeof(skb_frag_t) * nr_frags); 1040 + 1041 + xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags, 1042 + sinfo->xdp_frags_size, 1043 + nr_frags * xdp->frame_sz, 1044 + xdp_buff_is_frag_pfmemalloc(xdp)); 1077 1045 } 1078 1046 1079 1047 return skb; ··· 1093 1041 * ice_put_rx_buf - Clean up used buffer and either recycle or free 1094 1042 * @rx_ring: Rx descriptor ring to transact packets on 1095 1043 * @rx_buf: Rx buffer to pull data from 1096 - * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect() 1097 1044 * 1098 - * This function will update next_to_clean and then clean up the contents 1099 - * of the rx_buf. It will either recycle the buffer or unmap it and free 1100 - * the associated resources. 1045 + * This function will clean up the contents of the rx_buf. It will either 1046 + * recycle the buffer or unmap it and free the associated resources. 1101 1047 */ 1102 1048 static void 1103 - ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, 1104 - int rx_buf_pgcnt) 1049 + ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) 1105 1050 { 1106 - u16 ntc = rx_ring->next_to_clean + 1; 1107 - 1108 - /* fetch, update, and store next to clean */ 1109 - ntc = (ntc < rx_ring->count) ? ntc : 0; 1110 - rx_ring->next_to_clean = ntc; 1111 - 1112 1051 if (!rx_buf) 1113 1052 return; 1114 1053 1115 - if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) { 1054 + if (ice_can_reuse_rx_page(rx_buf)) { 1116 1055 /* hand second half of page back to the ring */ 1117 1056 ice_reuse_rx_page(rx_ring, rx_buf); 1118 1057 } else { ··· 1116 1073 1117 1074 /* clear contents of buffer_info */ 1118 1075 rx_buf->page = NULL; 1119 - } 1120 - 1121 - /** 1122 - * ice_is_non_eop - process handling of non-EOP buffers 1123 - * @rx_ring: Rx ring being processed 1124 - * @rx_desc: Rx descriptor for current buffer 1125 - * 1126 - * If the buffer is an EOP buffer, this function exits returning false, 1127 - * otherwise return true indicating that this is in fact a non-EOP buffer. 1128 - */ 1129 - static bool 1130 - ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) 1131 - { 1132 - /* if we are the last buffer then there is nothing else to do */ 1133 - #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) 1134 - if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) 1135 - return false; 1136 - 1137 - rx_ring->ring_stats->rx_stats.non_eop_descs++; 1138 - 1139 - return true; 1140 1076 } 1141 1077 1142 1078 /** ··· 1132 1110 */ 1133 1111 int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) 1134 1112 { 1135 - unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0; 1136 - u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); 1113 + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; 1137 1114 unsigned int offset = rx_ring->rx_offset; 1115 + struct xdp_buff *xdp = &rx_ring->xdp; 1138 1116 struct ice_tx_ring *xdp_ring = NULL; 1139 - unsigned int xdp_res, xdp_xmit = 0; 1140 - struct sk_buff *skb = rx_ring->skb; 1141 1117 struct bpf_prog *xdp_prog = NULL; 1142 - struct xdp_buff xdp; 1118 + u32 ntc = rx_ring->next_to_clean; 1119 + u32 cnt = rx_ring->count; 1120 + u32 cached_ntc = ntc; 1121 + u32 xdp_xmit = 0; 1122 + u32 cached_ntu; 1143 1123 bool failure; 1124 + u32 first; 1144 1125 1145 1126 /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ 1146 1127 #if (PAGE_SIZE < 8192) 1147 - frame_sz = ice_rx_frame_truesize(rx_ring, 0); 1128 + xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); 1148 1129 #endif 1149 - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); 1150 1130 1151 1131 xdp_prog = READ_ONCE(rx_ring->xdp_prog); 1152 - if (xdp_prog) 1132 + if (xdp_prog) { 1153 1133 xdp_ring = rx_ring->xdp_ring; 1134 + cached_ntu = xdp_ring->next_to_use; 1135 + } 1154 1136 1155 1137 /* start the loop to process Rx packets bounded by 'budget' */ 1156 1138 while (likely(total_rx_pkts < (unsigned int)budget)) { 1157 1139 union ice_32b_rx_flex_desc *rx_desc; 1158 1140 struct ice_rx_buf *rx_buf; 1159 - unsigned char *hard_start; 1141 + struct sk_buff *skb; 1160 1142 unsigned int size; 1161 1143 u16 stat_err_bits; 1162 - int rx_buf_pgcnt; 1163 1144 u16 vlan_tag = 0; 1164 1145 u16 rx_ptype; 1165 1146 1166 1147 /* get the Rx desc from Rx ring based on 'next_to_clean' */ 1167 - rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); 1148 + rx_desc = ICE_RX_DESC(rx_ring, ntc); 1168 1149 1169 1150 /* status_error_len will always be zero for unused descriptors 1170 1151 * because it's cleared in cleanup, and overlaps with hdr_addr ··· 1191 1166 if (rx_desc->wb.rxdid == FDIR_DESC_RXDID && 1192 1167 ctrl_vsi->vf) 1193 1168 ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); 1194 - ice_put_rx_buf(rx_ring, NULL, 0); 1195 - cleaned_count++; 1169 + if (++ntc == cnt) 1170 + ntc = 0; 1196 1171 continue; 1197 1172 } 1198 1173 ··· 1200 1175 ICE_RX_FLX_DESC_PKT_LEN_M; 1201 1176 1202 1177 /* retrieve a buffer from the ring */ 1203 - rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt); 1178 + rx_buf = ice_get_rx_buf(rx_ring, size, ntc); 1204 1179 1205 - if (!size) { 1206 - xdp.data = NULL; 1207 - xdp.data_end = NULL; 1208 - xdp.data_hard_start = NULL; 1209 - xdp.data_meta = NULL; 1210 - goto construct_skb; 1211 - } 1180 + if (!xdp->data) { 1181 + void *hard_start; 1212 1182 1213 - hard_start = page_address(rx_buf->page) + rx_buf->page_offset - 1214 - offset; 1215 - xdp_prepare_buff(&xdp, hard_start, offset, size, true); 1183 + hard_start = page_address(rx_buf->page) + rx_buf->page_offset - 1184 + offset; 1185 + xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); 1216 1186 #if (PAGE_SIZE > 4096) 1217 - /* At larger PAGE_SIZE, frame_sz depend on len size */ 1218 - xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); 1187 + /* At larger PAGE_SIZE, frame_sz depend on len size */ 1188 + xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); 1219 1189 #endif 1220 - 1221 - if (!xdp_prog) 1222 - goto construct_skb; 1223 - 1224 - xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring); 1225 - if (!xdp_res) 1226 - goto construct_skb; 1227 - if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { 1228 - xdp_xmit |= xdp_res; 1229 - ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz); 1230 - } else { 1231 - rx_buf->pagecnt_bias++; 1232 - } 1233 - total_rx_bytes += size; 1234 - total_rx_pkts++; 1235 - 1236 - cleaned_count++; 1237 - ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); 1238 - continue; 1239 - construct_skb: 1240 - if (skb) { 1241 - ice_add_rx_frag(rx_ring, rx_buf, skb, size); 1242 - } else if (likely(xdp.data)) { 1243 - if (ice_ring_uses_build_skb(rx_ring)) 1244 - skb = ice_build_skb(rx_ring, rx_buf, &xdp); 1245 - else 1246 - skb = ice_construct_skb(rx_ring, rx_buf, &xdp); 1247 - } 1248 - /* exit if we failed to retrieve a buffer */ 1249 - if (!skb) { 1250 - rx_ring->ring_stats->rx_stats.alloc_buf_failed++; 1251 - if (rx_buf) 1252 - rx_buf->pagecnt_bias++; 1190 + xdp_buff_clear_frags_flag(xdp); 1191 + } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { 1253 1192 break; 1254 1193 } 1255 - 1256 - ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); 1257 - cleaned_count++; 1194 + if (++ntc == cnt) 1195 + ntc = 0; 1258 1196 1259 1197 /* skip if it is NOP desc */ 1260 1198 if (ice_is_non_eop(rx_ring, rx_desc)) 1261 1199 continue; 1200 + 1201 + ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf); 1202 + if (rx_buf->act == ICE_XDP_PASS) 1203 + goto construct_skb; 1204 + total_rx_bytes += xdp_get_buff_len(xdp); 1205 + total_rx_pkts++; 1206 + 1207 + xdp->data = NULL; 1208 + rx_ring->first_desc = ntc; 1209 + continue; 1210 + construct_skb: 1211 + if (likely(ice_ring_uses_build_skb(rx_ring))) 1212 + skb = ice_build_skb(rx_ring, xdp); 1213 + else 1214 + skb = ice_construct_skb(rx_ring, xdp); 1215 + /* exit if we failed to retrieve a buffer */ 1216 + if (!skb) { 1217 + rx_ring->ring_stats->rx_stats.alloc_page_failed++; 1218 + rx_buf->act = ICE_XDP_CONSUMED; 1219 + if (unlikely(xdp_buff_has_frags(xdp))) 1220 + ice_set_rx_bufs_act(xdp, rx_ring, 1221 + ICE_XDP_CONSUMED); 1222 + xdp->data = NULL; 1223 + rx_ring->first_desc = ntc; 1224 + break; 1225 + } 1226 + xdp->data = NULL; 1227 + rx_ring->first_desc = ntc; 1262 1228 1263 1229 stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); 1264 1230 if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, ··· 1261 1245 vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc); 1262 1246 1263 1247 /* pad the skb if needed, to make a valid ethernet frame */ 1264 - if (eth_skb_pad(skb)) { 1265 - skb = NULL; 1248 + if (eth_skb_pad(skb)) 1266 1249 continue; 1267 - } 1268 1250 1269 1251 /* probably a little skewed due to removing CRC */ 1270 1252 total_rx_bytes += skb->len; ··· 1276 1262 ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb); 1277 1263 /* send completed skb up the stack */ 1278 1264 ice_receive_skb(rx_ring, skb, vlan_tag); 1279 - skb = NULL; 1280 1265 1281 1266 /* update budget accounting */ 1282 1267 total_rx_pkts++; 1283 1268 } 1284 1269 1285 - /* return up to cleaned_count buffers to hardware */ 1286 - failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); 1270 + first = rx_ring->first_desc; 1271 + while (cached_ntc != first) { 1272 + struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; 1287 1273 1288 - if (xdp_prog) 1289 - ice_finalize_xdp_rx(xdp_ring, xdp_xmit); 1290 - rx_ring->skb = skb; 1274 + if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { 1275 + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); 1276 + xdp_xmit |= buf->act; 1277 + } else if (buf->act & ICE_XDP_CONSUMED) { 1278 + buf->pagecnt_bias++; 1279 + } else if (buf->act == ICE_XDP_PASS) { 1280 + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); 1281 + } 1282 + 1283 + ice_put_rx_buf(rx_ring, buf); 1284 + if (++cached_ntc >= cnt) 1285 + cached_ntc = 0; 1286 + } 1287 + rx_ring->next_to_clean = ntc; 1288 + /* return up to cleaned_count buffers to hardware */ 1289 + failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); 1290 + 1291 + if (xdp_xmit) 1292 + ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu); 1291 1293 1292 1294 if (rx_ring->ring_stats) 1293 1295 ice_update_rx_ring_stats(rx_ring, total_rx_pkts,
+36 -20
drivers/net/ethernet/intel/ice/ice_txrx.h
··· 9 9 #define ICE_DFLT_IRQ_WORK 256 10 10 #define ICE_RXBUF_3072 3072 11 11 #define ICE_RXBUF_2048 2048 12 + #define ICE_RXBUF_1664 1664 12 13 #define ICE_RXBUF_1536 1536 13 14 #define ICE_MAX_CHAINED_RX_BUFS 5 14 15 #define ICE_MAX_BUF_TXD 8 15 16 #define ICE_MIN_TX_LEN 17 17 + #define ICE_MAX_FRAME_LEGACY_RX 8320 16 18 17 19 /* The size limit for a transmit buffer in a descriptor is (16K - 1). 18 20 * In order to align with the read requests we will align the value to ··· 112 110 (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ 113 111 (R)->next_to_clean - (R)->next_to_use - 1) 114 112 113 + #define ICE_RX_DESC_UNUSED(R) \ 114 + ((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \ 115 + (R)->first_desc - (R)->next_to_use - 1) 116 + 115 117 #define ICE_RING_QUARTER(R) ((R)->count >> 2) 116 118 117 119 #define ICE_TX_FLAGS_TSO BIT(0) ··· 140 134 #define ICE_XDP_TX BIT(1) 141 135 #define ICE_XDP_REDIR BIT(2) 142 136 #define ICE_XDP_EXIT BIT(3) 137 + #define ICE_SKB_CONSUMED ICE_XDP_CONSUMED 143 138 144 139 #define ICE_RX_DMA_ATTR \ 145 140 (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) ··· 150 143 #define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS) 151 144 152 145 struct ice_tx_buf { 153 - struct ice_tx_desc *next_to_watch; 146 + union { 147 + struct ice_tx_desc *next_to_watch; 148 + u32 rs_idx; 149 + }; 154 150 union { 155 151 struct sk_buff *skb; 156 152 void *raw_buf; /* used for XDP */ 153 + struct xdp_buff *xdp; /* used for XDP_TX ZC */ 157 154 }; 158 155 unsigned int bytecount; 159 - unsigned short gso_segs; 156 + union { 157 + unsigned int gso_segs; 158 + unsigned int nr_frags; /* used for mbuf XDP */ 159 + }; 160 160 u32 tx_flags; 161 161 DEFINE_DMA_UNMAP_LEN(len); 162 162 DEFINE_DMA_UNMAP_ADDR(dma); ··· 184 170 dma_addr_t dma; 185 171 struct page *page; 186 172 unsigned int page_offset; 187 - u16 pagecnt_bias; 173 + unsigned int pgcnt; 174 + unsigned int act; 175 + unsigned int pagecnt_bias; 188 176 }; 189 177 190 178 struct ice_q_stats { ··· 289 273 struct ice_vsi *vsi; /* Backreference to associated VSI */ 290 274 struct ice_q_vector *q_vector; /* Backreference to associated vector */ 291 275 u8 __iomem *tail; 292 - union { 293 - struct ice_rx_buf *rx_buf; 294 - struct xdp_buff **xdp_buf; 295 - }; 296 - /* CL2 - 2nd cacheline starts here */ 297 - struct xdp_rxq_info xdp_rxq; 298 - /* CL3 - 3rd cacheline starts here */ 299 276 u16 q_index; /* Queue number of ring */ 300 277 301 278 u16 count; /* Number of descriptors */ 302 279 u16 reg_idx; /* HW register index of the ring */ 280 + u16 next_to_alloc; 281 + /* CL2 - 2nd cacheline starts here */ 282 + union { 283 + struct ice_rx_buf *rx_buf; 284 + struct xdp_buff **xdp_buf; 285 + }; 286 + struct xdp_buff xdp; 287 + /* CL3 - 3rd cacheline starts here */ 288 + struct bpf_prog *xdp_prog; 289 + u16 rx_offset; 303 290 304 291 /* used in interrupt processing */ 305 292 u16 next_to_use; 306 293 u16 next_to_clean; 307 - u16 next_to_alloc; 308 - u16 rx_offset; 309 - u16 rx_buf_len; 294 + u16 first_desc; 310 295 311 296 /* stats structs */ 312 297 struct ice_ring_stats *ring_stats; 313 298 314 299 struct rcu_head rcu; /* to avoid race on free */ 315 - /* CL4 - 3rd cacheline starts here */ 300 + /* CL4 - 4th cacheline starts here */ 316 301 struct ice_channel *ch; 317 - struct bpf_prog *xdp_prog; 318 302 struct ice_tx_ring *xdp_ring; 319 303 struct xsk_buff_pool *xsk_pool; 320 - struct sk_buff *skb; 321 304 dma_addr_t dma; /* physical address of ring */ 322 305 u64 cached_phctime; 306 + u16 rx_buf_len; 323 307 u8 dcb_tc; /* Traffic class of ring */ 324 308 u8 ptp_rx; 325 309 #define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) 326 310 #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) 327 311 u8 flags; 312 + /* CL5 - 5th cacheline starts here */ 313 + struct xdp_rxq_info xdp_rxq; 328 314 } ____cacheline_internodealigned_in_smp; 329 315 330 316 struct ice_tx_ring { ··· 344 326 struct xsk_buff_pool *xsk_pool; 345 327 u16 next_to_use; 346 328 u16 next_to_clean; 347 - u16 next_rs; 348 - u16 next_dd; 349 329 u16 q_handle; /* Queue handle per TC */ 350 330 u16 reg_idx; /* HW register index of the ring */ 351 331 u16 count; /* Number of descriptors */ 352 332 u16 q_index; /* Queue number of ring */ 333 + u16 xdp_tx_active; 353 334 /* stats structs */ 354 335 struct ice_ring_stats *ring_stats; 355 336 /* CL3 - 3rd cacheline starts here */ ··· 359 342 spinlock_t tx_lock; 360 343 u32 txq_teid; /* Added Tx queue TEID */ 361 344 /* CL4 - 4th cacheline starts here */ 362 - u16 xdp_tx_active; 363 345 #define ICE_TX_FLAGS_RING_XDP BIT(0) 364 346 #define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) 365 347 #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) ··· 447 431 448 432 union ice_32b_rx_flex_desc; 449 433 450 - bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count); 434 + bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count); 451 435 netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); 452 436 u16 453 437 ice_select_queue(struct net_device *dev, struct sk_buff *skb,
+161 -89
drivers/net/ethernet/intel/ice/ice_txrx_lib.c
··· 221 221 } 222 222 223 223 /** 224 + * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer 225 + * @xdp_ring: XDP Tx ring 226 + * @tx_buf: Tx buffer to clean 227 + */ 228 + static void 229 + ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) 230 + { 231 + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), 232 + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); 233 + dma_unmap_len_set(tx_buf, len, 0); 234 + xdp_ring->xdp_tx_active--; 235 + page_frag_free(tx_buf->raw_buf); 236 + tx_buf->raw_buf = NULL; 237 + } 238 + 239 + /** 224 240 * ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring 225 241 * @xdp_ring: XDP ring to clean 226 242 */ 227 - static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) 243 + static u32 ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) 228 244 { 229 - unsigned int total_bytes = 0, total_pkts = 0; 230 - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); 231 - u16 ntc = xdp_ring->next_to_clean; 232 - struct ice_tx_desc *next_dd_desc; 233 - u16 next_dd = xdp_ring->next_dd; 234 - struct ice_tx_buf *tx_buf; 235 - int i; 245 + int total_bytes = 0, total_pkts = 0; 246 + u32 ntc = xdp_ring->next_to_clean; 247 + struct ice_tx_desc *tx_desc; 248 + u32 cnt = xdp_ring->count; 249 + u32 ready_frames = 0; 250 + u32 frags; 251 + u32 idx; 252 + u32 ret; 236 253 237 - next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); 238 - if (!(next_dd_desc->cmd_type_offset_bsz & 239 - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) 240 - return; 241 - 242 - for (i = 0; i < tx_thresh; i++) { 243 - tx_buf = &xdp_ring->tx_buf[ntc]; 244 - 245 - total_bytes += tx_buf->bytecount; 246 - /* normally tx_buf->gso_segs was taken but at this point 247 - * it's always 1 for us 248 - */ 249 - total_pkts++; 250 - 251 - page_frag_free(tx_buf->raw_buf); 252 - dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), 253 - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); 254 - dma_unmap_len_set(tx_buf, len, 0); 255 - tx_buf->raw_buf = NULL; 256 - 257 - ntc++; 258 - if (ntc >= xdp_ring->count) 259 - ntc = 0; 254 + idx = xdp_ring->tx_buf[ntc].rs_idx; 255 + tx_desc = ICE_TX_DESC(xdp_ring, idx); 256 + if (tx_desc->cmd_type_offset_bsz & 257 + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) { 258 + if (idx >= ntc) 259 + ready_frames = idx - ntc + 1; 260 + else 261 + ready_frames = idx + cnt - ntc + 1; 260 262 } 261 263 262 - next_dd_desc->cmd_type_offset_bsz = 0; 263 - xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh; 264 - if (xdp_ring->next_dd > xdp_ring->count) 265 - xdp_ring->next_dd = tx_thresh - 1; 264 + if (!ready_frames) 265 + return 0; 266 + ret = ready_frames; 267 + 268 + while (ready_frames) { 269 + struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc]; 270 + 271 + /* bytecount holds size of head + frags */ 272 + total_bytes += tx_buf->bytecount; 273 + frags = tx_buf->nr_frags; 274 + total_pkts++; 275 + /* count head + frags */ 276 + ready_frames -= frags + 1; 277 + 278 + if (xdp_ring->xsk_pool) 279 + xsk_buff_free(tx_buf->xdp); 280 + else 281 + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); 282 + ntc++; 283 + if (ntc == cnt) 284 + ntc = 0; 285 + 286 + for (int i = 0; i < frags; i++) { 287 + tx_buf = &xdp_ring->tx_buf[ntc]; 288 + 289 + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); 290 + ntc++; 291 + if (ntc == cnt) 292 + ntc = 0; 293 + } 294 + } 295 + 296 + tx_desc->cmd_type_offset_bsz = 0; 266 297 xdp_ring->next_to_clean = ntc; 267 298 ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes); 299 + 300 + return ret; 268 301 } 269 302 270 303 /** 271 - * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission 272 - * @data: packet data pointer 273 - * @size: packet data size 304 + * __ice_xmit_xdp_ring - submit frame to XDP ring for transmission 305 + * @xdp: XDP buffer to be placed onto Tx descriptors 274 306 * @xdp_ring: XDP ring for transmission 275 307 */ 276 - int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) 308 + int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) 277 309 { 278 - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); 279 - u16 i = xdp_ring->next_to_use; 310 + struct skb_shared_info *sinfo = NULL; 311 + u32 size = xdp->data_end - xdp->data; 312 + struct device *dev = xdp_ring->dev; 313 + u32 ntu = xdp_ring->next_to_use; 280 314 struct ice_tx_desc *tx_desc; 315 + struct ice_tx_buf *tx_head; 281 316 struct ice_tx_buf *tx_buf; 282 - dma_addr_t dma; 317 + u32 cnt = xdp_ring->count; 318 + void *data = xdp->data; 319 + u32 nr_frags = 0; 320 + u32 free_space; 321 + u32 frag = 0; 283 322 284 - if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh) 285 - ice_clean_xdp_irq(xdp_ring); 323 + free_space = ICE_DESC_UNUSED(xdp_ring); 286 324 287 - if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { 288 - xdp_ring->ring_stats->tx_stats.tx_busy++; 289 - return ICE_XDP_CONSUMED; 325 + if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) 326 + free_space += ice_clean_xdp_irq(xdp_ring); 327 + 328 + if (unlikely(xdp_buff_has_frags(xdp))) { 329 + sinfo = xdp_get_shared_info_from_buff(xdp); 330 + nr_frags = sinfo->nr_frags; 331 + if (free_space < nr_frags + 1) { 332 + xdp_ring->ring_stats->tx_stats.tx_busy++; 333 + return ICE_XDP_CONSUMED; 334 + } 290 335 } 291 336 292 - dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); 293 - if (dma_mapping_error(xdp_ring->dev, dma)) 294 - return ICE_XDP_CONSUMED; 337 + tx_desc = ICE_TX_DESC(xdp_ring, ntu); 338 + tx_head = &xdp_ring->tx_buf[ntu]; 339 + tx_buf = tx_head; 295 340 296 - tx_buf = &xdp_ring->tx_buf[i]; 297 - tx_buf->bytecount = size; 298 - tx_buf->gso_segs = 1; 299 - tx_buf->raw_buf = data; 341 + for (;;) { 342 + dma_addr_t dma; 300 343 301 - /* record length, and DMA address */ 302 - dma_unmap_len_set(tx_buf, len, size); 303 - dma_unmap_addr_set(tx_buf, dma, dma); 344 + dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); 345 + if (dma_mapping_error(dev, dma)) 346 + goto dma_unmap; 304 347 305 - tx_desc = ICE_TX_DESC(xdp_ring, i); 306 - tx_desc->buf_addr = cpu_to_le64(dma); 307 - tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, 308 - size, 0); 348 + /* record length, and DMA address */ 349 + dma_unmap_len_set(tx_buf, len, size); 350 + dma_unmap_addr_set(tx_buf, dma, dma); 351 + 352 + tx_desc->buf_addr = cpu_to_le64(dma); 353 + tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0); 354 + tx_buf->raw_buf = data; 355 + 356 + ntu++; 357 + if (ntu == cnt) 358 + ntu = 0; 359 + 360 + if (frag == nr_frags) 361 + break; 362 + 363 + tx_desc = ICE_TX_DESC(xdp_ring, ntu); 364 + tx_buf = &xdp_ring->tx_buf[ntu]; 365 + 366 + data = skb_frag_address(&sinfo->frags[frag]); 367 + size = skb_frag_size(&sinfo->frags[frag]); 368 + frag++; 369 + } 370 + 371 + /* store info about bytecount and frag count in first desc */ 372 + tx_head->bytecount = xdp_get_buff_len(xdp); 373 + tx_head->nr_frags = nr_frags; 374 + 375 + /* update last descriptor from a frame with EOP */ 376 + tx_desc->cmd_type_offset_bsz |= 377 + cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S); 309 378 310 379 xdp_ring->xdp_tx_active++; 311 - i++; 312 - if (i == xdp_ring->count) { 313 - i = 0; 314 - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); 315 - tx_desc->cmd_type_offset_bsz |= 316 - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); 317 - xdp_ring->next_rs = tx_thresh - 1; 318 - } 319 - xdp_ring->next_to_use = i; 320 - 321 - if (i > xdp_ring->next_rs) { 322 - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); 323 - tx_desc->cmd_type_offset_bsz |= 324 - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); 325 - xdp_ring->next_rs += tx_thresh; 326 - } 380 + xdp_ring->next_to_use = ntu; 327 381 328 382 return ICE_XDP_TX; 383 + 384 + dma_unmap: 385 + for (;;) { 386 + tx_buf = &xdp_ring->tx_buf[ntu]; 387 + dma_unmap_page(dev, dma_unmap_addr(tx_buf, dma), 388 + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); 389 + dma_unmap_len_set(tx_buf, len, 0); 390 + if (tx_buf == tx_head) 391 + break; 392 + 393 + if (!ntu) 394 + ntu += cnt; 395 + ntu--; 396 + } 397 + return ICE_XDP_CONSUMED; 329 398 } 330 399 331 400 /** 332 - * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it 333 - * @xdp: XDP buffer 334 - * @xdp_ring: XDP Tx ring 335 - * 336 - * Returns negative on failure, 0 on success. 401 + * ice_xmit_xdp_ring - submit frame to XDP ring for transmission 402 + * @xdpf: XDP frame that will be converted to XDP buff 403 + * @xdp_ring: XDP ring for transmission 337 404 */ 338 - int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) 405 + int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring) 339 406 { 340 - struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 407 + struct xdp_buff xdp; 341 408 342 - if (unlikely(!xdpf)) 343 - return ICE_XDP_CONSUMED; 344 - 345 - return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); 409 + xdp_convert_frame_to_buff(xdpf, &xdp); 410 + return __ice_xmit_xdp_ring(&xdp, xdp_ring); 346 411 } 347 412 348 413 /** ··· 419 354 * should be called when a batch of packets has been processed in the 420 355 * napi loop. 421 356 */ 422 - void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res) 357 + void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, 358 + u32 first_idx) 423 359 { 360 + struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[first_idx]; 361 + 424 362 if (xdp_res & ICE_XDP_REDIR) 425 363 xdp_do_flush_map(); 426 364 427 365 if (xdp_res & ICE_XDP_TX) { 428 366 if (static_branch_unlikely(&ice_xdp_locking_key)) 429 367 spin_lock(&xdp_ring->tx_lock); 368 + /* store index of descriptor with RS bit set in the first 369 + * ice_tx_buf of given NAPI batch 370 + */ 371 + tx_buf->rs_idx = ice_set_rs_bit(xdp_ring); 430 372 ice_xdp_ring_update_tail(xdp_ring); 431 373 if (static_branch_unlikely(&ice_xdp_locking_key)) 432 374 spin_unlock(&xdp_ring->tx_lock);
+73 -2
drivers/net/ethernet/intel/ice/ice_txrx_lib.h
··· 6 6 #include "ice.h" 7 7 8 8 /** 9 + * ice_set_rx_bufs_act - propagate Rx buffer action to frags 10 + * @xdp: XDP buffer representing frame (linear and frags part) 11 + * @rx_ring: Rx ring struct 12 + * act: action to store onto Rx buffers related to XDP buffer parts 13 + * 14 + * Set action that should be taken before putting Rx buffer from first frag 15 + * to one before last. Last one is handled by caller of this function as it 16 + * is the EOP frag that is currently being processed. This function is 17 + * supposed to be called only when XDP buffer contains frags. 18 + */ 19 + static inline void 20 + ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, 21 + const unsigned int act) 22 + { 23 + const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 24 + u32 first = rx_ring->first_desc; 25 + u32 nr_frags = sinfo->nr_frags; 26 + u32 cnt = rx_ring->count; 27 + struct ice_rx_buf *buf; 28 + 29 + for (int i = 0; i < nr_frags; i++) { 30 + buf = &rx_ring->rx_buf[first]; 31 + buf->act = act; 32 + 33 + if (++first == cnt) 34 + first = 0; 35 + } 36 + } 37 + 38 + /** 9 39 * ice_test_staterr - tests bits in Rx descriptor status and error fields 10 40 * @status_err_n: Rx descriptor status_error0 or status_error1 bits 11 41 * @stat_err_bits: value to mask ··· 49 19 ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits) 50 20 { 51 21 return !!(status_err_n & cpu_to_le16(stat_err_bits)); 22 + } 23 + 24 + /** 25 + * ice_is_non_eop - process handling of non-EOP buffers 26 + * @rx_ring: Rx ring being processed 27 + * @rx_desc: Rx descriptor for current buffer 28 + * 29 + * If the buffer is an EOP buffer, this function exits returning false, 30 + * otherwise return true indicating that this is in fact a non-EOP buffer. 31 + */ 32 + static inline bool 33 + ice_is_non_eop(const struct ice_rx_ring *rx_ring, 34 + const union ice_32b_rx_flex_desc *rx_desc) 35 + { 36 + /* if we are the last buffer then there is nothing else to do */ 37 + #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) 38 + if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) 39 + return false; 40 + 41 + rx_ring->ring_stats->rx_stats.non_eop_descs++; 42 + 43 + return true; 52 44 } 53 45 54 46 static inline __le64 ··· 122 70 writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); 123 71 } 124 72 125 - void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res); 73 + /** 74 + * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU) 75 + * @xdp_ring: XDP ring to produce the HW Tx descriptors on 76 + * 77 + * returns index of descriptor that had RS bit produced on 78 + */ 79 + static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring) 80 + { 81 + u32 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; 82 + struct ice_tx_desc *tx_desc; 83 + 84 + tx_desc = ICE_TX_DESC(xdp_ring, rs_idx); 85 + tx_desc->cmd_type_offset_bsz |= 86 + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); 87 + 88 + return rs_idx; 89 + } 90 + 91 + void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx); 126 92 int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); 127 - int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring); 93 + int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring); 94 + int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); 128 95 void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val); 129 96 void 130 97 ice_process_skb_fields(struct ice_rx_ring *rx_ring,
+105 -87
drivers/net/ethernet/intel/ice/ice_xsk.c
··· 598 598 } 599 599 600 600 /** 601 + * ice_clean_xdp_irq_zc - AF_XDP ZC specific Tx cleaning routine 602 + * @xdp_ring: XDP Tx ring 603 + */ 604 + static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) 605 + { 606 + u16 ntc = xdp_ring->next_to_clean; 607 + struct ice_tx_desc *tx_desc; 608 + u16 cnt = xdp_ring->count; 609 + struct ice_tx_buf *tx_buf; 610 + u16 xsk_frames = 0; 611 + u16 last_rs; 612 + int i; 613 + 614 + last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1; 615 + tx_desc = ICE_TX_DESC(xdp_ring, last_rs); 616 + if (tx_desc->cmd_type_offset_bsz & 617 + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) { 618 + if (last_rs >= ntc) 619 + xsk_frames = last_rs - ntc + 1; 620 + else 621 + xsk_frames = last_rs + cnt - ntc + 1; 622 + } 623 + 624 + if (!xsk_frames) 625 + return; 626 + 627 + if (likely(!xdp_ring->xdp_tx_active)) 628 + goto skip; 629 + 630 + ntc = xdp_ring->next_to_clean; 631 + for (i = 0; i < xsk_frames; i++) { 632 + tx_buf = &xdp_ring->tx_buf[ntc]; 633 + 634 + if (tx_buf->xdp) { 635 + xsk_buff_free(tx_buf->xdp); 636 + xdp_ring->xdp_tx_active--; 637 + } else { 638 + xsk_frames++; 639 + } 640 + 641 + ntc++; 642 + if (ntc == cnt) 643 + ntc = 0; 644 + } 645 + skip: 646 + tx_desc->cmd_type_offset_bsz = 0; 647 + xdp_ring->next_to_clean += xsk_frames; 648 + if (xdp_ring->next_to_clean >= cnt) 649 + xdp_ring->next_to_clean -= cnt; 650 + if (xsk_frames) 651 + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); 652 + } 653 + 654 + /** 655 + * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX 656 + * @xdp: XDP buffer to xmit 657 + * @xdp_ring: XDP ring to produce descriptor onto 658 + * 659 + * note that this function works directly on xdp_buff, no need to convert 660 + * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning 661 + * side will be able to xsk_buff_free() it. 662 + * 663 + * Returns ICE_XDP_TX for successfully produced desc, ICE_XDP_CONSUMED if there 664 + * was not enough space on XDP ring 665 + */ 666 + static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, 667 + struct ice_tx_ring *xdp_ring) 668 + { 669 + u32 size = xdp->data_end - xdp->data; 670 + u32 ntu = xdp_ring->next_to_use; 671 + struct ice_tx_desc *tx_desc; 672 + struct ice_tx_buf *tx_buf; 673 + dma_addr_t dma; 674 + 675 + if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) { 676 + ice_clean_xdp_irq_zc(xdp_ring); 677 + if (!ICE_DESC_UNUSED(xdp_ring)) { 678 + xdp_ring->ring_stats->tx_stats.tx_busy++; 679 + return ICE_XDP_CONSUMED; 680 + } 681 + } 682 + 683 + dma = xsk_buff_xdp_get_dma(xdp); 684 + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size); 685 + 686 + tx_buf = &xdp_ring->tx_buf[ntu]; 687 + tx_buf->xdp = xdp; 688 + tx_desc = ICE_TX_DESC(xdp_ring, ntu); 689 + tx_desc->buf_addr = cpu_to_le64(dma); 690 + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 691 + 0, size, 0); 692 + xdp_ring->xdp_tx_active++; 693 + 694 + if (++ntu == xdp_ring->count) 695 + ntu = 0; 696 + xdp_ring->next_to_use = ntu; 697 + 698 + return ICE_XDP_TX; 699 + } 700 + 701 + /** 601 702 * ice_run_xdp_zc - Executes an XDP program in zero-copy path 602 703 * @rx_ring: Rx ring 603 704 * @xdp: xdp_buff used as input to the XDP program ··· 731 630 case XDP_PASS: 732 631 break; 733 632 case XDP_TX: 734 - result = ice_xmit_xdp_buff(xdp, xdp_ring); 633 + result = ice_xmit_xdp_tx_zc(xdp, xdp_ring); 735 634 if (result == ICE_XDP_CONSUMED) 736 635 goto out_failure; 737 636 break; ··· 861 760 if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) 862 761 failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); 863 762 864 - ice_finalize_xdp_rx(xdp_ring, xdp_xmit); 763 + ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0); 865 764 ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); 866 765 867 766 if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) { ··· 874 773 } 875 774 876 775 return failure ? budget : (int)total_rx_packets; 877 - } 878 - 879 - /** 880 - * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer 881 - * @xdp_ring: XDP Tx ring 882 - * @tx_buf: Tx buffer to clean 883 - */ 884 - static void 885 - ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) 886 - { 887 - page_frag_free(tx_buf->raw_buf); 888 - xdp_ring->xdp_tx_active--; 889 - dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), 890 - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); 891 - dma_unmap_len_set(tx_buf, len, 0); 892 - } 893 - 894 - /** 895 - * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ 896 - * @xdp_ring: XDP Tx ring 897 - */ 898 - static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) 899 - { 900 - u16 ntc = xdp_ring->next_to_clean; 901 - struct ice_tx_desc *tx_desc; 902 - u16 cnt = xdp_ring->count; 903 - struct ice_tx_buf *tx_buf; 904 - u16 xsk_frames = 0; 905 - u16 last_rs; 906 - int i; 907 - 908 - last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1; 909 - tx_desc = ICE_TX_DESC(xdp_ring, last_rs); 910 - if ((tx_desc->cmd_type_offset_bsz & 911 - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) { 912 - if (last_rs >= ntc) 913 - xsk_frames = last_rs - ntc + 1; 914 - else 915 - xsk_frames = last_rs + cnt - ntc + 1; 916 - } 917 - 918 - if (!xsk_frames) 919 - return; 920 - 921 - if (likely(!xdp_ring->xdp_tx_active)) 922 - goto skip; 923 - 924 - ntc = xdp_ring->next_to_clean; 925 - for (i = 0; i < xsk_frames; i++) { 926 - tx_buf = &xdp_ring->tx_buf[ntc]; 927 - 928 - if (tx_buf->raw_buf) { 929 - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); 930 - tx_buf->raw_buf = NULL; 931 - } else { 932 - xsk_frames++; 933 - } 934 - 935 - ntc++; 936 - if (ntc >= xdp_ring->count) 937 - ntc = 0; 938 - } 939 - skip: 940 - tx_desc->cmd_type_offset_bsz = 0; 941 - xdp_ring->next_to_clean += xsk_frames; 942 - if (xdp_ring->next_to_clean >= cnt) 943 - xdp_ring->next_to_clean -= cnt; 944 - if (xsk_frames) 945 - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); 946 776 } 947 777 948 778 /** ··· 947 915 ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); 948 916 for (; i < batched + leftover; i++) 949 917 ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); 950 - } 951 - 952 - /** 953 - * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU) 954 - * @xdp_ring: XDP ring to produce the HW Tx descriptors on 955 - */ 956 - static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring) 957 - { 958 - u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; 959 - struct ice_tx_desc *tx_desc; 960 - 961 - tx_desc = ICE_TX_DESC(xdp_ring, ntu); 962 - tx_desc->cmd_type_offset_bsz |= 963 - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); 964 918 } 965 919 966 920 /** ··· 1083 1065 while (ntc != ntu) { 1084 1066 struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc]; 1085 1067 1086 - if (tx_buf->raw_buf) 1087 - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); 1068 + if (tx_buf->xdp) 1069 + xsk_buff_free(tx_buf->xdp); 1088 1070 else 1089 1071 xsk_frames++; 1090 1072