Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-mlx5e-xdp-add-support-for-multi-packet-per-page'

Tariq Toukan says:

====================
net/mlx5e: XDP, Add support for multi-packet per page

This series removes the limitation of having one packet per page in XDP
mode. This has the following implications:

- XDP in Striding RQ mode can now be used on 64K page systems.

- XDP in Legacy RQ mode was using a single packet per page which on 64K
page systems is quite inefficient. The improvement can be observed
with an XDP_DROP test when running in Legacy RQ mode on a ARM
Neoverse-N1 system with a 64K page size:
+-----------------------------------------------+
| MTU | baseline | this change | improvement |
|------+------------+-------------+-------------|
| 1500 | 15.55 Mpps | 18.99 Mpps | 22.0 % |
| 9000 | 15.53 Mpps | 18.24 Mpps | 17.5 % |
+-----------------------------------------------+

After lifting this limitation, the series switches to using fragments
for the side page in non-linear mode. This small improvement is at most
visible for XDP_DROP tests with small 64B packets and a large enough MTU
for Striding RQ to be in non-linear mode:
+----------------------------------------------------------------------+
| System | MTU | baseline | this change | improvement |
|----------------------+------+------------+-------------+-------------|
| 4K page x86_64 [1] | 9000 | 26.30 Mpps | 30.45 Mpps | 15.80 % |
| 64K page aarch64 [2] | 9000 | 15.27 Mpps | 20.10 Mpps | 31.62 % |
+----------------------------------------------------------------------+

This series does not cover the xsk (AF_XDP) paths for 64K page systems.

[1] https://lore.kernel.org/all/20260324024235.929875-1-kuba@kernel.org/
====================

Link: https://patch.msgid.link/20260403090927.139042-1-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+113 -29
+11 -1
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 82 82 83 83 #define MLX5E_PAGECNT_BIAS_MAX U16_MAX 84 84 #define MLX5E_RX_MAX_HEAD (256) 85 + #define MLX5E_XDP_LOG_MAX_LINEAR_SZ \ 86 + order_base_2(MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM + MLX5E_RX_MAX_HEAD)) 87 + 85 88 #define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8) 86 89 #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE \ 87 90 (PAGE_SIZE >> MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE) ··· 594 591 struct mlx5e_mpw_info { 595 592 u16 consumed_strides; 596 593 DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); 597 - struct mlx5e_frag_page linear_page; 598 594 union mlx5e_alloc_units alloc_units; 595 + }; 596 + 597 + struct mlx5e_mpw_linear_info { 598 + struct mlx5e_frag_page frag_page; 599 + u16 max_frags; 599 600 }; 600 601 601 602 #define MLX5E_MAX_RX_FRAGS 4 ··· 696 689 u8 umr_wqebbs; 697 690 u8 mtts_per_wqe; 698 691 u8 umr_mode; 692 + struct mlx5e_mpw_linear_info *linear_info; 699 693 struct mlx5e_shampo_hd *shampo; 700 694 } mpwqe; 701 695 }; ··· 1084 1076 bool dim_enabled); 1085 1077 bool mlx5e_reset_rx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, 1086 1078 bool dim_enabled, bool keep_dim_state); 1079 + 1080 + void mlx5e_mpwqe_dealloc_linear_page(struct mlx5e_rq *rq); 1087 1081 1088 1082 struct mlx5e_sq_param; 1089 1083 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+2 -9
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
··· 298 298 * no_head_tail_room should be set in the case of XDP with Striding RQ 299 299 * when SKB is not linear. This is because another page is allocated for the linear part. 300 300 */ 301 - sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, no_head_tail_room)); 301 + sz = mlx5e_rx_get_linear_sz_skb(params, no_head_tail_room); 302 302 303 - /* XDP in mlx5e doesn't support multiple packets per page. 304 - * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set. 305 - */ 306 - return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz; 303 + return roundup_pow_of_two(sz); 307 304 } 308 305 309 306 static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, ··· 449 452 if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo)) 450 453 return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, 451 454 rqo, true)); 452 - 453 - /* XDP in mlx5e doesn't support multiple packets per page. */ 454 - if (params->xdp_prog) 455 - return PAGE_SHIFT; 456 455 457 456 return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); 458 457 }
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en/params.h
··· 8 8 9 9 struct mlx5e_xsk_param { 10 10 u16 headroom; 11 - u16 chunk_size; 11 + u32 chunk_size; 12 12 bool unaligned; 13 13 }; 14 14
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
··· 123 123 * mode. 124 124 */ 125 125 126 - dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); 126 + dma_addr = page_pool_get_dma_addr(page) + offset_in_page(xdpf->data); 127 127 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL); 128 128 129 129 if (xdptxd->has_frags) {
+45 -5
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 369 369 return 0; 370 370 } 371 371 372 + static int mlx5e_rq_alloc_mpwqe_linear_info(struct mlx5e_rq *rq, int node, 373 + struct mlx5e_params *params, 374 + struct mlx5e_rq_opt_param *rqo) 375 + { 376 + struct mlx5_core_dev *mdev = rq->mdev; 377 + struct mlx5e_mpw_linear_info *li; 378 + u32 linear_frag_count; 379 + 380 + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) || 381 + !params->xdp_prog) 382 + return 0; 383 + 384 + li = kvzalloc_node(sizeof(*li), GFP_KERNEL, node); 385 + if (!li) 386 + return -ENOMEM; 387 + 388 + linear_frag_count = 389 + BIT(rq->mpwqe.page_shift - MLX5E_XDP_LOG_MAX_LINEAR_SZ); 390 + if (linear_frag_count > U16_MAX) { 391 + netdev_warn(rq->netdev, 392 + "rq %d: linear_frag_count (%u) larger than expected (%u), page_shift: %u, log_max_linear_sz: %u\n", 393 + rq->ix, linear_frag_count, U16_MAX, 394 + rq->mpwqe.page_shift, MLX5E_XDP_LOG_MAX_LINEAR_SZ); 395 + kvfree(li); 396 + return -EINVAL; 397 + } 398 + 399 + li->max_frags = linear_frag_count; 400 + rq->mpwqe.linear_info = li; 401 + 402 + /* Set to max to force allocation on first run. */ 403 + li->frag_page.frags = li->max_frags; 404 + 405 + return 0; 406 + } 372 407 373 408 static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) 374 409 { ··· 950 915 mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo); 951 916 pool_order = rq->mpwqe.page_shift - PAGE_SHIFT; 952 917 953 - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) && 954 - params->xdp_prog) 955 - pool_size *= 2; /* additional page per packet for the linear part */ 956 - 957 918 rq->mpwqe.log_stride_sz = 958 919 mlx5e_mpwqe_get_log_stride_size(mdev, params, 959 920 rqo); ··· 967 936 if (err) 968 937 goto err_rq_mkey; 969 938 970 - err = mlx5_rq_shampo_alloc(mdev, params, rq_param, rq, node); 939 + err = mlx5e_rq_alloc_mpwqe_linear_info(rq, node, params, rqo); 971 940 if (err) 972 941 goto err_free_mpwqe_info; 942 + 943 + err = mlx5_rq_shampo_alloc(mdev, params, rq_param, rq, node); 944 + if (err) 945 + goto err_free_mpwqe_linear_info; 973 946 974 947 break; 975 948 default: /* MLX5_WQ_TYPE_CYCLIC */ ··· 1089 1054 switch (rq->wq_type) { 1090 1055 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 1091 1056 mlx5e_rq_free_shampo(rq); 1057 + err_free_mpwqe_linear_info: 1058 + kvfree(rq->mpwqe.linear_info); 1092 1059 err_free_mpwqe_info: 1093 1060 kvfree(rq->mpwqe.info); 1094 1061 err_rq_mkey: ··· 1118 1081 switch (rq->wq_type) { 1119 1082 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 1120 1083 mlx5e_rq_free_shampo(rq); 1084 + kvfree(rq->mpwqe.linear_info); 1121 1085 kvfree(rq->mpwqe.info); 1122 1086 mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); 1123 1087 mlx5e_free_mpwqe_rq_drop_page(rq); ··· 1358 1320 mlx5_wq_ll_pop(wq, wqe_ix_be, 1359 1321 &wqe->next.next_wqe_index); 1360 1322 } 1323 + 1324 + mlx5e_mpwqe_dealloc_linear_page(rq); 1361 1325 } else { 1362 1326 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 1363 1327 u16 missing = mlx5_wq_cyc_missing(wq);
+53 -12
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
··· 300 300 page_pool_put_unrefed_netmem(pp, netmem, -1, true); 301 301 } 302 302 303 + static int mlx5e_mpwqe_linear_page_refill(struct mlx5e_rq *rq) 304 + { 305 + struct mlx5e_mpw_linear_info *li = rq->mpwqe.linear_info; 306 + 307 + if (likely(li->frag_page.frags < li->max_frags)) 308 + return 0; 309 + 310 + if (likely(li->frag_page.netmem)) { 311 + mlx5e_page_release_fragmented(rq->page_pool, &li->frag_page); 312 + li->frag_page.netmem = 0; 313 + } 314 + 315 + return mlx5e_page_alloc_fragmented(rq->page_pool, &li->frag_page); 316 + } 317 + 318 + static void *mlx5e_mpwqe_get_linear_page_frag(struct mlx5e_rq *rq) 319 + { 320 + struct mlx5e_mpw_linear_info *li = rq->mpwqe.linear_info; 321 + u32 frag_offset; 322 + 323 + if (unlikely(mlx5e_mpwqe_linear_page_refill(rq))) 324 + return NULL; 325 + 326 + frag_offset = li->frag_page.frags << MLX5E_XDP_LOG_MAX_LINEAR_SZ; 327 + WARN_ON(frag_offset >= BIT(rq->mpwqe.page_shift)); 328 + 329 + return netmem_address(li->frag_page.netmem) + frag_offset; 330 + } 331 + 303 332 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, 304 333 struct mlx5e_wqe_frag_info *frag) 305 334 { ··· 729 700 * for missing wqes on an already flushed RQ. 730 701 */ 731 702 bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); 703 + } 704 + 705 + void mlx5e_mpwqe_dealloc_linear_page(struct mlx5e_rq *rq) 706 + { 707 + struct mlx5e_mpw_linear_info *li = rq->mpwqe.linear_info; 708 + 709 + if (!li || !li->frag_page.netmem) 710 + return; 711 + 712 + mlx5e_page_release_fragmented(rq->page_pool, &li->frag_page); 713 + 714 + /* Recovery flow can call this function and then alloc again, so leave 715 + * things in a good state for re-allocation. 716 + */ 717 + li->frag_page.netmem = 0; 718 + li->frag_page.frags = li->max_frags; 732 719 } 733 720 734 721 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) ··· 1914 1869 struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; 1915 1870 u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); 1916 1871 struct mlx5e_frag_page *head_page = frag_page; 1872 + struct mlx5e_frag_page *linear_page = NULL; 1917 1873 struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; 1918 1874 u32 page_size = BIT(rq->mpwqe.page_shift); 1919 1875 u32 frag_offset = head_offset; ··· 1943 1897 if (prog) { 1944 1898 /* area for bpf_xdp_[store|load]_bytes */ 1945 1899 net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); 1946 - if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, 1947 - &wi->linear_page))) { 1900 + 1901 + va = mlx5e_mpwqe_get_linear_page_frag(rq); 1902 + if (!va) { 1948 1903 rq->stats->buff_alloc_err++; 1949 1904 return NULL; 1950 1905 } 1951 1906 1952 - va = netmem_address(wi->linear_page.netmem); 1953 1907 net_prefetchw(va); /* xdp_frame data area */ 1954 1908 linear_hr = XDP_PACKET_HEADROOM; 1955 1909 linear_data_len = 0; 1956 1910 linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD); 1911 + linear_page = &rq->mpwqe.linear_info->frag_page; 1957 1912 } else { 1958 1913 skb = napi_alloc_skb(rq->cq.napi, 1959 1914 ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); ··· 2013 1966 for (pfp = head_page; pfp < frag_page; pfp++) 2014 1967 pfp->frags++; 2015 1968 2016 - wi->linear_page.frags++; 1969 + linear_page->frags++; 2017 1970 } 2018 - mlx5e_page_release_fragmented(rq->page_pool, 2019 - &wi->linear_page); 2020 1971 return NULL; /* page/packet was consumed by XDP */ 2021 1972 } 2022 1973 ··· 2031 1986 rq, mxbuf->xdp.data_hard_start, linear_frame_sz, 2032 1987 mxbuf->xdp.data - mxbuf->xdp.data_hard_start, len, 2033 1988 mxbuf->xdp.data - mxbuf->xdp.data_meta); 2034 - if (unlikely(!skb)) { 2035 - mlx5e_page_release_fragmented(rq->page_pool, 2036 - &wi->linear_page); 1989 + if (unlikely(!skb)) 2037 1990 return NULL; 2038 - } 2039 1991 2040 1992 skb_mark_for_recycle(skb); 2041 - wi->linear_page.frags++; 2042 - mlx5e_page_release_fragmented(rq->page_pool, &wi->linear_page); 1993 + linear_page->frags++; 2043 1994 2044 1995 if (xdp_buff_has_frags(&mxbuf->xdp)) { 2045 1996 struct mlx5e_frag_page *pagep;