Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net/mlx5e: Reuse per-RQ XDP buffer to avoid stack zeroing overhead

CONFIG_INIT_STACK_ALL_ZERO introduces a performance cost by
zero-initializing all stack variables on function entry. The mlx5 XDP
RX path previously allocated a struct mlx5e_xdp_buff on the stack per
received CQE, resulting in measurable performance degradation under
this config.

This patch reuses a mlx5e_xdp_buff stored in the mlx5e_rq struct,
avoiding per-CQE stack allocations and repeated zeroing.

With this change, XDP_DROP and XDP_TX performance matches that of
kernels built without CONFIG_INIT_STACK_ALL_ZERO.

Performance was measured on a ConnectX-6Dx using a single RX channel
(1 CPU at 100% usage) at ~50 Mpps. The baseline results were taken from
net-next-6.15.

Stack zeroing disabled:
- XDP_DROP:
* baseline: 31.47 Mpps
* baseline + per-RQ allocation: 32.31 Mpps (+2.68%)

- XDP_TX:
* baseline: 12.41 Mpps
* baseline + per-RQ allocation: 12.95 Mpps (+4.30%)

Stack zeroing enabled:
- XDP_DROP:
* baseline: 24.32 Mpps
* baseline + per-RQ allocation: 32.27 Mpps (+32.7%)

- XDP_TX:
* baseline: 11.80 Mpps
* baseline + per-RQ allocation: 12.24 Mpps (+3.72%)

Reported-by: Sebastiano Miano <mianosebastiano@gmail.com>
Reported-by: Samuel Dobron <sdobron@redhat.com>
Link: https://lore.kernel.org/all/CAMENy5pb8ea+piKLg5q5yRTMZacQqYWAoVLE1FE9WhQPq92E0g@mail.gmail.com/
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Link: https://patch.msgid.link/1747253032-663457-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Carolina Jubran and committed by
Jakub Kicinski
b66b76a8 15d7b3df

+51 -43
+7
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 520 520 struct mlx5e_channel *channel; 521 521 } ____cacheline_aligned_in_smp; 522 522 523 + struct mlx5e_xdp_buff { 524 + struct xdp_buff xdp; 525 + struct mlx5_cqe64 *cqe; 526 + struct mlx5e_rq *rq; 527 + }; 528 + 523 529 struct mlx5e_ktls_resync_resp; 524 530 525 531 struct mlx5e_icosq { ··· 722 716 struct mlx5e_xdpsq *xdpsq; 723 717 DECLARE_BITMAP(flags, 8); 724 718 struct page_pool *page_pool; 719 + struct mlx5e_xdp_buff mxbuf; 725 720 726 721 /* AF_XDP zero-copy */ 727 722 struct xsk_buff_pool *xsk_pool;
-6
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
··· 45 45 (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ 46 46 sizeof(struct mlx5_wqe_inline_seg)) 47 47 48 - struct mlx5e_xdp_buff { 49 - struct xdp_buff xdp; 50 - struct mlx5_cqe64 *cqe; 51 - struct mlx5e_rq *rq; 52 - }; 53 - 54 48 /* XDP packets can be transmitted in different ways. On completion, we need to 55 49 * distinguish between them to clean up things in a proper way. 56 50 */
+44 -37
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
··· 1684 1684 1685 1685 prog = rcu_dereference(rq->xdp_prog); 1686 1686 if (prog) { 1687 - struct mlx5e_xdp_buff mxbuf; 1687 + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; 1688 1688 1689 1689 net_prefetchw(va); /* xdp_frame data area */ 1690 1690 mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, 1691 - cqe_bcnt, &mxbuf); 1692 - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) 1691 + cqe_bcnt, mxbuf); 1692 + if (mlx5e_xdp_handle(rq, prog, mxbuf)) 1693 1693 return NULL; /* page/packet was consumed by XDP */ 1694 1694 1695 - rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; 1696 - metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; 1697 - cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; 1695 + rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start; 1696 + metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta; 1697 + cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data; 1698 1698 } 1699 1699 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 1700 1700 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); ··· 1713 1713 struct mlx5_cqe64 *cqe, u32 cqe_bcnt) 1714 1714 { 1715 1715 struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; 1716 + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; 1716 1717 struct mlx5e_wqe_frag_info *head_wi = wi; 1717 1718 u16 rx_headroom = rq->buff.headroom; 1718 1719 struct mlx5e_frag_page *frag_page; 1719 1720 struct skb_shared_info *sinfo; 1720 - struct mlx5e_xdp_buff mxbuf; 1721 1721 u32 frag_consumed_bytes; 1722 1722 struct bpf_prog *prog; 1723 1723 struct sk_buff *skb; ··· 1737 1737 net_prefetch(va + rx_headroom); 1738 1738 1739 1739 mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, 1740 - frag_consumed_bytes, &mxbuf); 1741 - sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); 1740 + frag_consumed_bytes, mxbuf); 1741 + sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp); 1742 1742 truesize = 0; 1743 1743 1744 1744 cqe_bcnt -= frag_consumed_bytes; ··· 1750 1750 1751 1751 frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); 1752 1752 1753 - mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, 1754 - wi->offset, frag_consumed_bytes); 1753 + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp, 1754 + frag_page, wi->offset, 1755 + frag_consumed_bytes); 1755 1756 truesize += frag_info->frag_stride; 1756 1757 1757 1758 cqe_bcnt -= frag_consumed_bytes; ··· 1761 1760 } 1762 1761 1763 1762 prog = rcu_dereference(rq->xdp_prog); 1764 - if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { 1763 + if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) { 1765 1764 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { 1766 1765 struct mlx5e_wqe_frag_info *pwi; 1767 1766 ··· 1771 1770 return NULL; /* page/packet was consumed by XDP */ 1772 1771 } 1773 1772 1774 - skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz, 1775 - mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 1776 - mxbuf.xdp.data_end - mxbuf.xdp.data, 1777 - mxbuf.xdp.data - mxbuf.xdp.data_meta); 1773 + skb = mlx5e_build_linear_skb( 1774 + rq, mxbuf->xdp.data_hard_start, rq->buff.frame0_sz, 1775 + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 1776 + mxbuf->xdp.data_end - mxbuf->xdp.data, 1777 + mxbuf->xdp.data - mxbuf->xdp.data_meta); 1778 1778 if (unlikely(!skb)) 1779 1779 return NULL; 1780 1780 1781 1781 skb_mark_for_recycle(skb); 1782 1782 head_wi->frag_page->frags++; 1783 1783 1784 - if (xdp_buff_has_frags(&mxbuf.xdp)) { 1784 + if (xdp_buff_has_frags(&mxbuf->xdp)) { 1785 1785 /* sinfo->nr_frags is reset by build_skb, calculate again. */ 1786 1786 xdp_update_skb_shared_info(skb, wi - head_wi - 1, 1787 1787 sinfo->xdp_frags_size, truesize, 1788 - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); 1788 + xdp_buff_is_frag_pfmemalloc( 1789 + &mxbuf->xdp)); 1789 1790 1790 1791 for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) 1791 1792 pwi->frag_page->frags++; ··· 1987 1984 struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; 1988 1985 u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); 1989 1986 struct mlx5e_frag_page *head_page = frag_page; 1987 + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; 1990 1988 u32 frag_offset = head_offset; 1991 1989 u32 byte_cnt = cqe_bcnt; 1992 1990 struct skb_shared_info *sinfo; 1993 - struct mlx5e_xdp_buff mxbuf; 1994 1991 unsigned int truesize = 0; 1995 1992 struct bpf_prog *prog; 1996 1993 struct sk_buff *skb; ··· 2036 2033 } 2037 2034 } 2038 2035 2039 - mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf); 2036 + mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, 2037 + linear_data_len, mxbuf); 2040 2038 2041 - sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); 2039 + sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp); 2042 2040 2043 2041 while (byte_cnt) { 2044 2042 /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ ··· 2050 2046 else 2051 2047 truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); 2052 2048 2053 - mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset, 2049 + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp, 2050 + frag_page, frag_offset, 2054 2051 pg_consumed_bytes); 2055 2052 byte_cnt -= pg_consumed_bytes; 2056 2053 frag_offset = 0; ··· 2059 2054 } 2060 2055 2061 2056 if (prog) { 2062 - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { 2057 + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { 2063 2058 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { 2064 2059 struct mlx5e_frag_page *pfp; 2065 2060 ··· 2072 2067 return NULL; /* page/packet was consumed by XDP */ 2073 2068 } 2074 2069 2075 - skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, 2076 - linear_frame_sz, 2077 - mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0, 2078 - mxbuf.xdp.data - mxbuf.xdp.data_meta); 2070 + skb = mlx5e_build_linear_skb( 2071 + rq, mxbuf->xdp.data_hard_start, linear_frame_sz, 2072 + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0, 2073 + mxbuf->xdp.data - mxbuf->xdp.data_meta); 2079 2074 if (unlikely(!skb)) { 2080 2075 mlx5e_page_release_fragmented(rq, &wi->linear_page); 2081 2076 return NULL; ··· 2085 2080 wi->linear_page.frags++; 2086 2081 mlx5e_page_release_fragmented(rq, &wi->linear_page); 2087 2082 2088 - if (xdp_buff_has_frags(&mxbuf.xdp)) { 2083 + if (xdp_buff_has_frags(&mxbuf->xdp)) { 2089 2084 struct mlx5e_frag_page *pagep; 2090 2085 2091 2086 /* sinfo->nr_frags is reset by build_skb, calculate again. */ 2092 2087 xdp_update_skb_shared_info(skb, frag_page - head_page, 2093 2088 sinfo->xdp_frags_size, truesize, 2094 - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); 2089 + xdp_buff_is_frag_pfmemalloc( 2090 + &mxbuf->xdp)); 2095 2091 2096 2092 pagep = head_page; 2097 2093 do ··· 2103 2097 } else { 2104 2098 dma_addr_t addr; 2105 2099 2106 - if (xdp_buff_has_frags(&mxbuf.xdp)) { 2100 + if (xdp_buff_has_frags(&mxbuf->xdp)) { 2107 2101 struct mlx5e_frag_page *pagep; 2108 2102 2109 2103 xdp_update_skb_shared_info(skb, sinfo->nr_frags, 2110 2104 sinfo->xdp_frags_size, truesize, 2111 - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); 2105 + xdp_buff_is_frag_pfmemalloc( 2106 + &mxbuf->xdp)); 2112 2107 2113 2108 pagep = frag_page - sinfo->nr_frags; 2114 2109 do ··· 2159 2152 2160 2153 prog = rcu_dereference(rq->xdp_prog); 2161 2154 if (prog) { 2162 - struct mlx5e_xdp_buff mxbuf; 2155 + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; 2163 2156 2164 2157 net_prefetchw(va); /* xdp_frame data area */ 2165 2158 mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, 2166 - cqe_bcnt, &mxbuf); 2167 - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { 2159 + cqe_bcnt, mxbuf); 2160 + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { 2168 2161 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) 2169 2162 frag_page->frags++; 2170 2163 return NULL; /* page/packet was consumed by XDP */ 2171 2164 } 2172 2165 2173 - rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; 2174 - metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; 2175 - cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; 2166 + rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start; 2167 + metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta; 2168 + cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data; 2176 2169 } 2177 2170 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 2178 2171 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);