Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-mlx5e-add-support-for-devmem-and-io_uring-tcp-zero-copy'

Mark Bloch says:

====================
net/mlx5e: Add support for devmem and io_uring TCP zero-copy

This series adds support for zerocopy rx TCP with devmem and io_uring
for ConnectX7 NICs and above. For performance reasons and simplicity
HW-GRO will also be turned on when header-data split mode is on.

Performance
===========

Test setup:

* CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz (single NUMA)
* NIC: ConnectX7
* Benchmarking tool: kperf [0]
* Single TCP flow
* Test duration: 60s

With application thread and interrupts pinned to the *same* core:

|------+-----------+----------|
| MTU | epoll | io_uring |
|------+-----------+----------|
| 1500 | 61.6 Gbps | 114 Gbps |
| 4096 | 69.3 Gbps | 151 Gbps |
| 9000 | 67.8 Gbps | 187 Gbps |
|------+-----------+----------|

The CPU usage for io_uring is 95%.

Reproduction steps for io_uring:

server --no-daemon -a 2001:db8::1 --no-memcmp --iou --iou_sendzc \
--iou_zcrx --iou_dev_name eth2 --iou_zcrx_queue_id 2

server --no-daemon -a 2001:db8::2 --no-memcmp --iou --iou_sendzc

client --src 2001:db8::2 --dst 2001:db8::1 \
--msg-zerocopy -t 60 --cpu-min=2 --cpu-max=2

Patch overview:
================

First, a netmem API for skb_can_coalesce is added to the core to be able
to do skb fragment coalescing on netmems.

The next patches introduce some cleanups in the internal SHAMPO code and
improvements to hw gro capability checks in FW.

A separate page_pool is introduced for headers, to be used only when
the rxq has a memory provider.

Then the driver is converted to use the netmem API and to allow support
for unreadable netmem page pool.

The queue management ops are implemented.

Finally, the tcp-data-split ring parameter is exposed.
References
==========
[0] kperf: git://git.kernel.dk/kperf.git
v1: https://lore.kernel.org/20250116215530.158886-1-saeed@kernel.org
v2: https://lore.kernel.org/1747950086-1246773-1-git-send-email-tariqt@nvidia.com
v3: https://lore.kernel.org/20250609145833.990793-1-mbloch@nvidia.com
v4: https://lore.kernel.org/20250610150950.1094376-1-mbloch@nvidia.com
v5: https://lore.kernel.org/20250612154648.1161201-1-mbloch@nvidia.com
====================

Link: https://patch.msgid.link/20250616141441.1243044-1-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+381 -167
+5 -6
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 278 278 struct mlx5e_packet_merge_param { 279 279 enum packet_merge type; 280 280 u32 timeout; 281 - struct { 282 - u8 match_criteria_type; 283 - u8 alignment_granularity; 284 - } shampo; 285 281 }; 286 282 287 283 struct mlx5e_params { ··· 553 557 } ____cacheline_aligned_in_smp; 554 558 555 559 struct mlx5e_frag_page { 556 - struct page *page; 560 + netmem_ref netmem; 557 561 u16 frags; 558 562 }; 559 563 ··· 634 638 struct mlx5e_frag_page *pages; 635 639 u32 hd_per_wq; 636 640 u16 hd_per_wqe; 637 - u16 pages_per_wq; 638 641 unsigned long *bitmap; 639 642 u16 pi; 640 643 u16 ci; ··· 716 721 struct bpf_prog __rcu *xdp_prog; 717 722 struct mlx5e_xdpsq *xdpsq; 718 723 DECLARE_BITMAP(flags, 8); 724 + 725 + /* page pools */ 719 726 struct page_pool *page_pool; 727 + struct page_pool *hd_page_pool; 728 + 720 729 struct mlx5e_xdp_buff mxbuf; 721 730 722 731 /* AF_XDP zero-copy */
+20 -16
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
··· 901 901 { 902 902 void *rqc = param->rqc; 903 903 void *wq = MLX5_ADDR_OF(rqc, rqc, wq); 904 + u32 lro_timeout; 904 905 int ndsegs = 1; 905 906 int err; 906 907 ··· 927 926 MLX5_SET(wq, wq, log_wqe_stride_size, 928 927 log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); 929 928 MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); 930 - if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { 931 - MLX5_SET(wq, wq, shampo_enable, true); 932 - MLX5_SET(wq, wq, log_reservation_size, 933 - mlx5e_shampo_get_log_rsrv_size(mdev, params)); 934 - MLX5_SET(wq, wq, 935 - log_max_num_of_packets_per_reservation, 936 - mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); 937 - MLX5_SET(wq, wq, log_headers_entry_size, 938 - mlx5e_shampo_get_log_hd_entry_size(mdev, params)); 939 - MLX5_SET(rqc, rqc, reservation_timeout, 940 - mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); 941 - MLX5_SET(rqc, rqc, shampo_match_criteria_type, 942 - params->packet_merge.shampo.match_criteria_type); 943 - MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, 944 - params->packet_merge.shampo.alignment_granularity); 945 - } 929 + if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) 930 + break; 931 + 932 + MLX5_SET(wq, wq, shampo_enable, true); 933 + MLX5_SET(wq, wq, log_reservation_size, 934 + mlx5e_shampo_get_log_rsrv_size(mdev, params)); 935 + MLX5_SET(wq, wq, 936 + log_max_num_of_packets_per_reservation, 937 + mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); 938 + MLX5_SET(wq, wq, log_headers_entry_size, 939 + mlx5e_shampo_get_log_hd_entry_size(mdev, params)); 940 + lro_timeout = 941 + mlx5e_choose_lro_timeout(mdev, 942 + MLX5E_DEFAULT_SHAMPO_TIMEOUT); 943 + MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout); 944 + MLX5_SET(rqc, rqc, shampo_match_criteria_type, 945 + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED); 946 + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, 947 + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE); 946 948 break; 947 949 } 948 950 default: /* MLX5_WQ_TYPE_CYCLIC */
+2 -1
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
··· 362 362 dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); 363 363 break; 364 364 case MLX5E_DMA_MAP_PAGE: 365 - dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); 365 + netmem_dma_unmap_page_attrs(pdev, dma->addr, dma->size, 366 + DMA_TO_DEVICE, 0); 366 367 break; 367 368 default: 368 369 WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+28 -5
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
··· 32 32 33 33 #include <linux/dim.h> 34 34 #include <linux/ethtool_netlink.h> 35 + #include <net/netdev_queues.h> 35 36 36 37 #include "en.h" 37 38 #include "en/channels.h" ··· 366 365 param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; 367 366 param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; 368 367 param->tx_pending = 1 << priv->channels.params.log_sq_size; 369 - 370 - kernel_param->tcp_data_split = 371 - (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ? 372 - ETHTOOL_TCP_DATA_SPLIT_ENABLED : 373 - ETHTOOL_TCP_DATA_SPLIT_DISABLED; 374 368 } 375 369 376 370 static void mlx5e_get_ringparam(struct net_device *dev, ··· 376 380 struct mlx5e_priv *priv = netdev_priv(dev); 377 381 378 382 mlx5e_ethtool_get_ringparam(priv, param, kernel_param); 383 + } 384 + 385 + static bool mlx5e_ethtool_set_tcp_data_split(struct mlx5e_priv *priv, 386 + u8 tcp_data_split, 387 + struct netlink_ext_ack *extack) 388 + { 389 + struct net_device *dev = priv->netdev; 390 + 391 + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && 392 + !(dev->features & NETIF_F_GRO_HW)) { 393 + NL_SET_ERR_MSG_MOD(extack, 394 + "TCP-data-split is not supported when GRO HW is disabled"); 395 + return false; 396 + } 397 + 398 + /* Might need to disable HW-GRO if it was kept on due to hds. */ 399 + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && 400 + dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED) 401 + netdev_update_features(priv->netdev); 402 + 403 + return true; 379 404 } 380 405 381 406 int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, ··· 456 439 struct netlink_ext_ack *extack) 457 440 { 458 441 struct mlx5e_priv *priv = netdev_priv(dev); 442 + 443 + if (!mlx5e_ethtool_set_tcp_data_split(priv, 444 + kernel_param->tcp_data_split, 445 + extack)) 446 + return -EINVAL; 459 447 460 448 return mlx5e_ethtool_set_ringparam(priv, param, extack); 461 449 } ··· 2645 2623 ETHTOOL_COALESCE_USE_ADAPTIVE | 2646 2624 ETHTOOL_COALESCE_USE_CQE, 2647 2625 .supported_input_xfrm = RXH_XFRM_SYM_OR_XOR, 2626 + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, 2648 2627 .get_drvinfo = mlx5e_get_drvinfo, 2649 2628 .get_link = ethtool_op_get_link, 2650 2629 .get_link_ext_state = mlx5e_get_link_ext_state,
+227 -79
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 41 41 #include <linux/filter.h> 42 42 #include <net/netdev_lock.h> 43 43 #include <net/netdev_queues.h> 44 + #include <net/netdev_rx_queue.h> 44 45 #include <net/page_pool/types.h> 45 46 #include <net/pkt_sched.h> 46 47 #include <net/xdp_sock_drv.h> ··· 79 78 80 79 static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) 81 80 { 82 - if (!MLX5_CAP_GEN(mdev, shampo)) 81 + if (!MLX5_CAP_GEN(mdev, shampo) || 82 + !MLX5_CAP_SHAMPO(mdev, shampo_header_split_data_merge)) 83 83 return false; 84 84 85 85 /* Our HW-GRO implementation relies on "KSM Mkey" for ··· 333 331 ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); 334 332 } 335 333 336 - static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node) 337 - { 338 - rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), 339 - GFP_KERNEL, node); 340 - if (!rq->mpwqe.shampo) 341 - return -ENOMEM; 342 - return 0; 343 - } 344 - 345 - static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq) 346 - { 347 - kvfree(rq->mpwqe.shampo); 348 - } 349 - 350 - static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) 351 - { 352 - struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 353 - 354 - shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, 355 - node); 356 - shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, 357 - sizeof(*shampo->pages)), 358 - GFP_KERNEL, node); 359 - if (!shampo->bitmap || !shampo->pages) 360 - goto err_nomem; 361 - 362 - return 0; 363 - 364 - err_nomem: 365 - bitmap_free(shampo->bitmap); 366 - kvfree(shampo->pages); 367 - 368 - return -ENOMEM; 369 - } 370 - 371 - static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) 372 - { 373 - bitmap_free(rq->mpwqe.shampo->bitmap); 374 - kvfree(rq->mpwqe.shampo->pages); 375 - } 376 - 377 334 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) 378 335 { 379 336 int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); ··· 545 584 } 546 585 547 586 static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, 548 - struct mlx5e_rq *rq) 587 + u16 hd_per_wq, u32 *umr_mkey) 549 588 { 550 589 u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); 551 590 552 - if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) { 591 + if (max_ksm_size < hd_per_wq) { 553 592 mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", 554 - max_ksm_size, rq->mpwqe.shampo->hd_per_wq); 593 + max_ksm_size, hd_per_wq); 555 594 return -EINVAL; 556 595 } 557 - 558 - return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, 596 + return mlx5e_create_umr_ksm_mkey(mdev, hd_per_wq, 559 597 MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, 560 - &rq->mpwqe.shampo->mkey); 598 + umr_mkey); 561 599 } 562 600 563 601 static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) ··· 718 758 xdp_frag_size); 719 759 } 720 760 761 + static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, u16 hd_per_wq, 762 + int node) 763 + { 764 + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 765 + 766 + shampo->hd_per_wq = hd_per_wq; 767 + 768 + shampo->bitmap = bitmap_zalloc_node(hd_per_wq, GFP_KERNEL, node); 769 + shampo->pages = kvzalloc_node(array_size(hd_per_wq, 770 + sizeof(*shampo->pages)), 771 + GFP_KERNEL, node); 772 + if (!shampo->bitmap || !shampo->pages) 773 + goto err_nomem; 774 + 775 + return 0; 776 + 777 + err_nomem: 778 + kvfree(shampo->pages); 779 + bitmap_free(shampo->bitmap); 780 + 781 + return -ENOMEM; 782 + } 783 + 784 + static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) 785 + { 786 + kvfree(rq->mpwqe.shampo->pages); 787 + bitmap_free(rq->mpwqe.shampo->bitmap); 788 + } 789 + 790 + static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq) 791 + { 792 + struct netdev_rx_queue *rxq = __netif_get_rx_queue(rq->netdev, rq->ix); 793 + 794 + return !!rxq->mp_params.mp_ops; 795 + } 796 + 721 797 static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, 722 798 struct mlx5e_params *params, 723 799 struct mlx5e_rq_param *rqp, ··· 761 765 u32 *pool_size, 762 766 int node) 763 767 { 768 + void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); 769 + u32 hd_pool_size; 770 + u16 hd_per_wq; 771 + int wq_size; 764 772 int err; 765 773 766 774 if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 767 775 return 0; 768 - err = mlx5e_rq_shampo_hd_alloc(rq, node); 776 + 777 + rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), 778 + GFP_KERNEL, node); 779 + if (!rq->mpwqe.shampo) 780 + return -ENOMEM; 781 + 782 + /* split headers data structures */ 783 + hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp); 784 + err = mlx5e_rq_shampo_hd_info_alloc(rq, hd_per_wq, node); 769 785 if (err) 770 - goto out; 771 - rq->mpwqe.shampo->hd_per_wq = 772 - mlx5e_shampo_hd_per_wq(mdev, params, rqp); 773 - err = mlx5e_create_rq_hd_umr_mkey(mdev, rq); 786 + goto err_shampo_hd_info_alloc; 787 + 788 + err = mlx5e_create_rq_hd_umr_mkey(mdev, hd_per_wq, 789 + &rq->mpwqe.shampo->mkey); 774 790 if (err) 775 - goto err_shampo_hd; 776 - err = mlx5e_rq_shampo_hd_info_alloc(rq, node); 777 - if (err) 778 - goto err_shampo_info; 791 + goto err_umr_mkey; 792 + 793 + rq->mpwqe.shampo->key = cpu_to_be32(rq->mpwqe.shampo->mkey); 794 + rq->mpwqe.shampo->hd_per_wqe = 795 + mlx5e_shampo_hd_per_wqe(mdev, params, rqp); 796 + wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); 797 + hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / 798 + MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; 799 + 800 + if (mlx5_rq_needs_separate_hd_pool(rq)) { 801 + /* Separate page pool for shampo headers */ 802 + struct page_pool_params pp_params = { }; 803 + 804 + pp_params.order = 0; 805 + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 806 + pp_params.pool_size = hd_pool_size; 807 + pp_params.nid = node; 808 + pp_params.dev = rq->pdev; 809 + pp_params.napi = rq->cq.napi; 810 + pp_params.netdev = rq->netdev; 811 + pp_params.dma_dir = rq->buff.map_dir; 812 + pp_params.max_len = PAGE_SIZE; 813 + 814 + rq->hd_page_pool = page_pool_create(&pp_params); 815 + if (IS_ERR(rq->hd_page_pool)) { 816 + err = PTR_ERR(rq->hd_page_pool); 817 + rq->hd_page_pool = NULL; 818 + goto err_hds_page_pool; 819 + } 820 + } else { 821 + /* Common page pool, reserve space for headers. */ 822 + *pool_size += hd_pool_size; 823 + rq->hd_page_pool = NULL; 824 + } 825 + 826 + /* gro only data structures */ 779 827 rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); 780 828 if (!rq->hw_gro_data) { 781 829 err = -ENOMEM; 782 830 goto err_hw_gro_data; 783 831 } 784 - rq->mpwqe.shampo->key = 785 - cpu_to_be32(rq->mpwqe.shampo->mkey); 786 - rq->mpwqe.shampo->hd_per_wqe = 787 - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); 788 - rq->mpwqe.shampo->pages_per_wq = 789 - rq->mpwqe.shampo->hd_per_wq / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; 790 - *pool_size += rq->mpwqe.shampo->pages_per_wq; 832 + 791 833 return 0; 792 834 793 835 err_hw_gro_data: 794 - mlx5e_rq_shampo_hd_info_free(rq); 795 - err_shampo_info: 836 + page_pool_destroy(rq->hd_page_pool); 837 + err_hds_page_pool: 796 838 mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey); 797 - err_shampo_hd: 798 - mlx5e_rq_shampo_hd_free(rq); 799 - out: 839 + err_umr_mkey: 840 + mlx5e_rq_shampo_hd_info_free(rq); 841 + err_shampo_hd_info_alloc: 842 + kvfree(rq->mpwqe.shampo); 800 843 return err; 801 844 } 802 845 ··· 845 810 return; 846 811 847 812 kvfree(rq->hw_gro_data); 813 + if (rq->hd_page_pool != rq->page_pool) 814 + page_pool_destroy(rq->hd_page_pool); 848 815 mlx5e_rq_shampo_hd_info_free(rq); 849 816 mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey); 850 - mlx5e_rq_shampo_hd_free(rq); 817 + kvfree(rq->mpwqe.shampo); 851 818 } 852 819 853 820 static int mlx5e_alloc_rq(struct mlx5e_params *params, ··· 966 929 pp_params.netdev = rq->netdev; 967 930 pp_params.dma_dir = rq->buff.map_dir; 968 931 pp_params.max_len = PAGE_SIZE; 932 + pp_params.queue_idx = rq->ix; 933 + 934 + /* Shampo header data split allow for unreadable netmem */ 935 + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 936 + pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; 969 937 970 938 /* page_pool can be used even when there is no rq->xdp_prog, 971 939 * given page_pool does not handle DMA mapping there is no ··· 983 941 rq->page_pool = NULL; 984 942 goto err_free_by_rq_type; 985 943 } 944 + if (!rq->hd_page_pool) 945 + rq->hd_page_pool = rq->page_pool; 986 946 if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) 987 947 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 988 948 MEM_TYPE_PAGE_POOL, rq->page_pool); ··· 4087 4043 4088 4044 if (enable) { 4089 4045 new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; 4090 - new_params.packet_merge.shampo.match_criteria_type = 4091 - MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; 4092 - new_params.packet_merge.shampo.alignment_granularity = 4093 - MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE; 4094 4046 } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { 4095 4047 new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; 4096 4048 } else { ··· 4413 4373 static netdev_features_t mlx5e_fix_features(struct net_device *netdev, 4414 4374 netdev_features_t features) 4415 4375 { 4376 + struct netdev_config *cfg = netdev->cfg_pending; 4416 4377 struct mlx5e_priv *priv = netdev_priv(netdev); 4417 4378 struct mlx5e_vlan_table *vlan; 4418 4379 struct mlx5e_params *params; ··· 4478 4437 netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); 4479 4438 features &= ~NETIF_F_GRO_HW; 4480 4439 } 4440 + } 4441 + 4442 + /* The header-data split ring param requires HW GRO to stay enabled. */ 4443 + if (cfg && cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && 4444 + !(features & NETIF_F_GRO_HW)) { 4445 + netdev_warn(netdev, "Keeping HW-GRO enabled, TCP header-data split depends on it\n"); 4446 + features |= NETIF_F_GRO_HW; 4481 4447 } 4482 4448 4483 4449 if (mlx5e_is_uplink_rep(priv)) { ··· 5502 5454 .get_base_stats = mlx5e_get_base_stats, 5503 5455 }; 5504 5456 5457 + struct mlx5_qmgmt_data { 5458 + struct mlx5e_channel *c; 5459 + struct mlx5e_channel_param cparam; 5460 + }; 5461 + 5462 + static int mlx5e_queue_mem_alloc(struct net_device *dev, void *newq, 5463 + int queue_index) 5464 + { 5465 + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; 5466 + struct mlx5e_priv *priv = netdev_priv(dev); 5467 + struct mlx5e_channels *chs = &priv->channels; 5468 + struct mlx5e_params params = chs->params; 5469 + struct mlx5_core_dev *mdev; 5470 + int err; 5471 + 5472 + mutex_lock(&priv->state_lock); 5473 + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { 5474 + err = -ENODEV; 5475 + goto unlock; 5476 + } 5477 + 5478 + if (queue_index >= chs->num) { 5479 + err = -ERANGE; 5480 + goto unlock; 5481 + } 5482 + 5483 + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || 5484 + chs->params.ptp_rx || 5485 + chs->params.xdp_prog || 5486 + priv->htb) { 5487 + netdev_err(priv->netdev, 5488 + "Cloning channels with Port/rx PTP, XDP or HTB is not supported\n"); 5489 + err = -EOPNOTSUPP; 5490 + goto unlock; 5491 + } 5492 + 5493 + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index); 5494 + err = mlx5e_build_channel_param(mdev, &params, &new->cparam); 5495 + if (err) 5496 + goto unlock; 5497 + 5498 + err = mlx5e_open_channel(priv, queue_index, &params, NULL, &new->c); 5499 + unlock: 5500 + mutex_unlock(&priv->state_lock); 5501 + return err; 5502 + } 5503 + 5504 + static void mlx5e_queue_mem_free(struct net_device *dev, void *mem) 5505 + { 5506 + struct mlx5_qmgmt_data *data = (struct mlx5_qmgmt_data *)mem; 5507 + 5508 + /* not supposed to happen since mlx5e_queue_start never fails 5509 + * but this is how this should be implemented just in case 5510 + */ 5511 + if (data->c) 5512 + mlx5e_close_channel(data->c); 5513 + } 5514 + 5515 + static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index) 5516 + { 5517 + /* In mlx5 a txq cannot be simply stopped in isolation, only restarted. 5518 + * mlx5e_queue_start does not fail, we stop the old queue there. 5519 + * TODO: Improve this. 5520 + */ 5521 + return 0; 5522 + } 5523 + 5524 + static int mlx5e_queue_start(struct net_device *dev, void *newq, 5525 + int queue_index) 5526 + { 5527 + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; 5528 + struct mlx5e_priv *priv = netdev_priv(dev); 5529 + struct mlx5e_channel *old; 5530 + 5531 + mutex_lock(&priv->state_lock); 5532 + 5533 + /* stop and close the old */ 5534 + old = priv->channels.c[queue_index]; 5535 + mlx5e_deactivate_priv_channels(priv); 5536 + /* close old before activating new, to avoid napi conflict */ 5537 + mlx5e_close_channel(old); 5538 + 5539 + /* start the new */ 5540 + priv->channels.c[queue_index] = new->c; 5541 + mlx5e_activate_priv_channels(priv); 5542 + mutex_unlock(&priv->state_lock); 5543 + return 0; 5544 + } 5545 + 5546 + static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { 5547 + .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), 5548 + .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, 5549 + .ndo_queue_mem_free = mlx5e_queue_mem_free, 5550 + .ndo_queue_start = mlx5e_queue_start, 5551 + .ndo_queue_stop = mlx5e_queue_stop, 5552 + }; 5553 + 5505 5554 static void mlx5e_build_nic_netdev(struct net_device *netdev) 5506 5555 { 5507 5556 struct mlx5e_priv *priv = netdev_priv(netdev); ··· 5609 5464 SET_NETDEV_DEV(netdev, mdev->device); 5610 5465 5611 5466 netdev->netdev_ops = &mlx5e_netdev_ops; 5467 + netdev->queue_mgmt_ops = &mlx5e_queue_mgmt_ops; 5612 5468 netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; 5613 5469 netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops; 5614 5470 netdev->request_ops_lock = true; ··· 5652 5506 MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 5653 5507 netdev->vlan_features |= NETIF_F_LRO; 5654 5508 5509 + if (mlx5e_hw_gro_supported(mdev) && 5510 + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, 5511 + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 5512 + netdev->vlan_features |= NETIF_F_GRO_HW; 5513 + 5655 5514 netdev->hw_features = netdev->vlan_features; 5656 5515 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 5657 5516 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 5658 5517 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; 5659 5518 netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; 5660 - 5661 - if (mlx5e_hw_gro_supported(mdev) && 5662 - mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, 5663 - MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 5664 - netdev->hw_features |= NETIF_F_GRO_HW; 5665 5519 5666 5520 if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { 5667 5521 netdev->hw_enc_features |= NETIF_F_HW_CSUM; ··· 5740 5594 netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; 5741 5595 5742 5596 netdev->priv_flags |= IFF_UNICAST_FLT; 5597 + 5598 + netdev->netmem_tx = true; 5743 5599 5744 5600 netif_set_tso_max_size(netdev, GSO_MAX_SIZE); 5745 5601 mlx5e_set_xdp_feature(netdev);
+82 -56
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
··· 273 273 274 274 #define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) 275 275 276 - static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, 276 + static int mlx5e_page_alloc_fragmented(struct page_pool *pp, 277 277 struct mlx5e_frag_page *frag_page) 278 278 { 279 - struct page *page; 279 + netmem_ref netmem = page_pool_dev_alloc_netmems(pp); 280 280 281 - page = page_pool_dev_alloc_pages(rq->page_pool); 282 - if (unlikely(!page)) 281 + if (unlikely(!netmem)) 283 282 return -ENOMEM; 284 283 285 - page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); 284 + page_pool_fragment_netmem(netmem, MLX5E_PAGECNT_BIAS_MAX); 286 285 287 286 *frag_page = (struct mlx5e_frag_page) { 288 - .page = page, 287 + .netmem = netmem, 289 288 .frags = 0, 290 289 }; 291 290 292 291 return 0; 293 292 } 294 293 295 - static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, 294 + static void mlx5e_page_release_fragmented(struct page_pool *pp, 296 295 struct mlx5e_frag_page *frag_page) 297 296 { 298 297 u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; 299 - struct page *page = frag_page->page; 298 + netmem_ref netmem = frag_page->netmem; 300 299 301 - if (page_pool_unref_page(page, drain_count) == 0) 302 - page_pool_put_unrefed_page(rq->page_pool, page, -1, true); 300 + if (page_pool_unref_netmem(netmem, drain_count) == 0) 301 + page_pool_put_unrefed_netmem(pp, netmem, -1, true); 303 302 } 304 303 305 304 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, ··· 312 313 * offset) should just use the new one without replenishing again 313 314 * by themselves. 314 315 */ 315 - err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); 316 + err = mlx5e_page_alloc_fragmented(rq->page_pool, 317 + frag->frag_page); 316 318 317 319 return err; 318 320 } ··· 332 332 struct mlx5e_wqe_frag_info *frag) 333 333 { 334 334 if (mlx5e_frag_can_release(frag)) 335 - mlx5e_page_release_fragmented(rq, frag->frag_page); 335 + mlx5e_page_release_fragmented(rq->page_pool, frag->frag_page); 336 336 } 337 337 338 338 static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) ··· 358 358 frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); 359 359 360 360 headroom = i == 0 ? rq->buff.headroom : 0; 361 - addr = page_pool_get_dma_addr(frag->frag_page->page); 361 + addr = page_pool_get_dma_addr_netmem(frag->frag_page->netmem); 362 362 wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); 363 363 } 364 364 ··· 499 499 struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, 500 500 u32 frag_offset, u32 len) 501 501 { 502 + netmem_ref netmem = frag_page->netmem; 502 503 skb_frag_t *frag; 503 504 504 - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); 505 + dma_addr_t addr = page_pool_get_dma_addr_netmem(netmem); 505 506 506 507 dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); 507 508 if (!xdp_buff_has_frags(xdp)) { ··· 515 514 } 516 515 517 516 frag = &sinfo->frags[sinfo->nr_frags++]; 518 - skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); 517 + skb_frag_fill_netmem_desc(frag, netmem, frag_offset, len); 519 518 520 - if (page_is_pfmemalloc(frag_page->page)) 519 + if (netmem_is_pfmemalloc(netmem)) 521 520 xdp_buff_set_frag_pfmemalloc(xdp); 522 521 sinfo->xdp_frags_size += len; 523 522 } ··· 528 527 u32 frag_offset, u32 len, 529 528 unsigned int truesize) 530 529 { 531 - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); 530 + dma_addr_t addr = page_pool_get_dma_addr_netmem(frag_page->netmem); 532 531 u8 next_frag = skb_shinfo(skb)->nr_frags; 532 + netmem_ref netmem = frag_page->netmem; 533 533 534 534 dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, 535 535 rq->buff.map_dir); 536 536 537 - if (skb_can_coalesce(skb, next_frag, frag_page->page, frag_offset)) { 537 + if (skb_can_coalesce_netmem(skb, next_frag, netmem, frag_offset)) { 538 538 skb_coalesce_rx_frag(skb, next_frag - 1, len, truesize); 539 - } else { 540 - frag_page->frags++; 541 - skb_add_rx_frag(skb, next_frag, frag_page->page, 542 - frag_offset, len, truesize); 539 + return; 543 540 } 541 + 542 + frag_page->frags++; 543 + skb_add_rx_frag_netmem(skb, next_frag, netmem, 544 + frag_offset, len, truesize); 544 545 } 545 546 546 547 static inline void 547 548 mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, 548 - struct page *page, dma_addr_t addr, 549 + netmem_ref netmem, dma_addr_t addr, 549 550 int offset_from, int dma_offset, u32 headlen) 550 551 { 551 - const void *from = page_address(page) + offset_from; 552 + const void *from = netmem_address(netmem) + offset_from; 552 553 /* Aligning len to sizeof(long) optimizes memcpy performance */ 553 554 unsigned int len = ALIGN(headlen, sizeof(long)); 554 555 ··· 587 584 struct mlx5e_frag_page *frag_page; 588 585 589 586 frag_page = &wi->alloc_units.frag_pages[i]; 590 - mlx5e_page_release_fragmented(rq, frag_page); 587 + mlx5e_page_release_fragmented(rq->page_pool, 588 + frag_page); 591 589 } 592 590 } 593 591 } ··· 683 679 struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); 684 680 u64 addr; 685 681 686 - err = mlx5e_page_alloc_fragmented(rq, frag_page); 682 + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); 687 683 if (unlikely(err)) 688 684 goto err_unmap; 689 685 690 - 691 - addr = page_pool_get_dma_addr(frag_page->page); 686 + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); 692 687 693 688 for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { 694 689 header_offset = mlx5e_shampo_hd_offset(index++); ··· 718 715 if (!header_offset) { 719 716 struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); 720 717 721 - mlx5e_page_release_fragmented(rq, frag_page); 718 + mlx5e_page_release_fragmented(rq->hd_page_pool, 719 + frag_page); 722 720 } 723 721 } 724 722 ··· 795 791 for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { 796 792 dma_addr_t addr; 797 793 798 - err = mlx5e_page_alloc_fragmented(rq, frag_page); 794 + err = mlx5e_page_alloc_fragmented(rq->page_pool, frag_page); 799 795 if (unlikely(err)) 800 796 goto err_unmap; 801 - addr = page_pool_get_dma_addr(frag_page->page); 797 + 798 + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); 802 799 umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { 803 800 .ptag = cpu_to_be64(addr | MLX5_EN_WR), 804 801 }; ··· 841 836 err_unmap: 842 837 while (--i >= 0) { 843 838 frag_page--; 844 - mlx5e_page_release_fragmented(rq, frag_page); 839 + mlx5e_page_release_fragmented(rq->page_pool, frag_page); 845 840 } 846 841 847 842 bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); ··· 860 855 if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { 861 856 struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); 862 857 863 - mlx5e_page_release_fragmented(rq, frag_page); 858 + mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); 864 859 } 865 860 clear_bit(header_index, shampo->bitmap); 866 861 } ··· 1105 1100 1106 1101 if (rq->page_pool) 1107 1102 page_pool_nid_changed(rq->page_pool, numa_mem_id()); 1103 + if (rq->hd_page_pool) 1104 + page_pool_nid_changed(rq->hd_page_pool, numa_mem_id()); 1108 1105 1109 1106 head = rq->mpwqe.actual_wq_head; 1110 1107 i = missing; ··· 1219 1212 struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); 1220 1213 u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; 1221 1214 1222 - return page_address(frag_page->page) + head_offset; 1215 + return netmem_address(frag_page->netmem) + head_offset; 1223 1216 } 1224 1217 1225 1218 static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) ··· 1680 1673 dma_addr_t addr; 1681 1674 u32 frag_size; 1682 1675 1683 - va = page_address(frag_page->page) + wi->offset; 1676 + va = netmem_address(frag_page->netmem) + wi->offset; 1684 1677 data = va + rx_headroom; 1685 1678 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 1686 1679 1687 - addr = page_pool_get_dma_addr(frag_page->page); 1680 + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); 1688 1681 dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, 1689 1682 frag_size, rq->buff.map_dir); 1690 1683 net_prefetch(data); ··· 1734 1727 1735 1728 frag_page = wi->frag_page; 1736 1729 1737 - va = page_address(frag_page->page) + wi->offset; 1730 + va = netmem_address(frag_page->netmem) + wi->offset; 1738 1731 frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); 1739 1732 1740 - addr = page_pool_get_dma_addr(frag_page->page); 1733 + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); 1741 1734 dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, 1742 1735 rq->buff.frame0_sz, rq->buff.map_dir); 1743 1736 net_prefetchw(va); /* xdp_frame data area */ ··· 2010 2003 2011 2004 if (prog) { 2012 2005 /* area for bpf_xdp_[store|load]_bytes */ 2013 - net_prefetchw(page_address(frag_page->page) + frag_offset); 2014 - if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) { 2006 + net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); 2007 + if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, 2008 + &wi->linear_page))) { 2015 2009 rq->stats->buff_alloc_err++; 2016 2010 return NULL; 2017 2011 } 2018 - va = page_address(wi->linear_page.page); 2012 + 2013 + va = netmem_address(wi->linear_page.netmem); 2019 2014 net_prefetchw(va); /* xdp_frame data area */ 2020 2015 linear_hr = XDP_PACKET_HEADROOM; 2021 2016 linear_data_len = 0; ··· 2077 2068 2078 2069 wi->linear_page.frags++; 2079 2070 } 2080 - mlx5e_page_release_fragmented(rq, &wi->linear_page); 2071 + mlx5e_page_release_fragmented(rq->page_pool, 2072 + &wi->linear_page); 2081 2073 return NULL; /* page/packet was consumed by XDP */ 2082 2074 } 2083 2075 ··· 2087 2077 mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0, 2088 2078 mxbuf->xdp.data - mxbuf->xdp.data_meta); 2089 2079 if (unlikely(!skb)) { 2090 - mlx5e_page_release_fragmented(rq, &wi->linear_page); 2080 + mlx5e_page_release_fragmented(rq->page_pool, 2081 + &wi->linear_page); 2091 2082 return NULL; 2092 2083 } 2093 2084 2094 2085 skb_mark_for_recycle(skb); 2095 2086 wi->linear_page.frags++; 2096 - mlx5e_page_release_fragmented(rq, &wi->linear_page); 2087 + mlx5e_page_release_fragmented(rq->page_pool, &wi->linear_page); 2097 2088 2098 2089 if (xdp_buff_has_frags(&mxbuf->xdp)) { 2099 2090 struct mlx5e_frag_page *pagep; ··· 2128 2117 while (++pagep < frag_page); 2129 2118 } 2130 2119 /* copy header */ 2131 - addr = page_pool_get_dma_addr(head_page->page); 2132 - mlx5e_copy_skb_header(rq, skb, head_page->page, addr, 2120 + addr = page_pool_get_dma_addr_netmem(head_page->netmem); 2121 + mlx5e_copy_skb_header(rq, skb, head_page->netmem, addr, 2133 2122 head_offset, head_offset, headlen); 2134 2123 /* skb linear part was allocated with headlen and aligned to long */ 2135 2124 skb->tail += headlen; ··· 2159 2148 return NULL; 2160 2149 } 2161 2150 2162 - va = page_address(frag_page->page) + head_offset; 2151 + va = netmem_address(frag_page->netmem) + head_offset; 2163 2152 data = va + rx_headroom; 2164 2153 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 2165 2154 2166 - addr = page_pool_get_dma_addr(frag_page->page); 2155 + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); 2167 2156 dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, 2168 2157 frag_size, rq->buff.map_dir); 2169 2158 net_prefetch(data); ··· 2202 2191 struct mlx5_cqe64 *cqe, u16 header_index) 2203 2192 { 2204 2193 struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); 2205 - dma_addr_t page_dma_addr = page_pool_get_dma_addr(frag_page->page); 2206 2194 u16 head_offset = mlx5e_shampo_hd_offset(header_index); 2207 - dma_addr_t dma_addr = page_dma_addr + head_offset; 2208 2195 u16 head_size = cqe->shampo.header_size; 2209 2196 u16 rx_headroom = rq->buff.headroom; 2210 2197 struct sk_buff *skb = NULL; 2198 + dma_addr_t page_dma_addr; 2199 + dma_addr_t dma_addr; 2211 2200 void *hdr, *data; 2212 2201 u32 frag_size; 2213 2202 2214 - hdr = page_address(frag_page->page) + head_offset; 2203 + page_dma_addr = page_pool_get_dma_addr_netmem(frag_page->netmem); 2204 + dma_addr = page_dma_addr + head_offset; 2205 + 2206 + hdr = netmem_address(frag_page->netmem) + head_offset; 2215 2207 data = hdr + rx_headroom; 2216 2208 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); 2217 2209 ··· 2239 2225 } 2240 2226 2241 2227 net_prefetchw(skb->data); 2242 - mlx5e_copy_skb_header(rq, skb, frag_page->page, dma_addr, 2228 + mlx5e_copy_skb_header(rq, skb, frag_page->netmem, dma_addr, 2243 2229 head_offset + rx_headroom, 2244 2230 rx_headroom, head_size); 2245 2231 /* skb linear part was allocated with headlen and aligned to long */ ··· 2333 2319 } 2334 2320 2335 2321 if (!*skb) { 2336 - if (likely(head_size)) 2322 + if (likely(head_size)) { 2337 2323 *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); 2338 - else 2339 - *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt, 2340 - data_offset, page_idx); 2324 + } else { 2325 + struct mlx5e_frag_page *frag_page; 2326 + 2327 + frag_page = &wi->alloc_units.frag_pages[page_idx]; 2328 + /* Drop packets with header in unreadable data area to 2329 + * prevent the kernel from touching it. 2330 + */ 2331 + if (unlikely(netmem_is_net_iov(frag_page->netmem))) 2332 + goto free_hd_entry; 2333 + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, 2334 + cqe_bcnt, 2335 + data_offset, 2336 + page_idx); 2337 + } 2338 + 2341 2339 if (unlikely(!*skb)) 2342 2340 goto free_hd_entry; 2343 2341
+9 -3
include/linux/skbuff.h
··· 3873 3873 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) 3874 3874 __must_check; 3875 3875 3876 - static inline bool skb_can_coalesce(struct sk_buff *skb, int i, 3877 - const struct page *page, int off) 3876 + static inline bool skb_can_coalesce_netmem(struct sk_buff *skb, int i, 3877 + netmem_ref netmem, int off) 3878 3878 { 3879 3879 if (skb_zcopy(skb)) 3880 3880 return false; 3881 3881 if (i) { 3882 3882 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; 3883 3883 3884 - return page == skb_frag_page(frag) && 3884 + return netmem == skb_frag_netmem(frag) && 3885 3885 off == skb_frag_off(frag) + skb_frag_size(frag); 3886 3886 } 3887 3887 return false; 3888 + } 3889 + 3890 + static inline bool skb_can_coalesce(struct sk_buff *skb, int i, 3891 + const struct page *page, int off) 3892 + { 3893 + return skb_can_coalesce_netmem(skb, i, page_to_netmem(page), off); 3888 3894 } 3889 3895 3890 3896 static inline int __skb_linearize(struct sk_buff *skb)
+1 -1
include/net/netmem.h
··· 139 139 return (__force netmem_ref)((unsigned long)niov | NET_IOV); 140 140 } 141 141 142 - static inline netmem_ref page_to_netmem(struct page *page) 142 + static inline netmem_ref page_to_netmem(const struct page *page) 143 143 { 144 144 return (__force netmem_ref)page; 145 145 }
+7
include/net/page_pool/helpers.h
··· 153 153 return page_pool_alloc_netmem(pool, offset, size, gfp); 154 154 } 155 155 156 + static inline netmem_ref page_pool_dev_alloc_netmems(struct page_pool *pool) 157 + { 158 + gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 159 + 160 + return page_pool_alloc_netmems(pool, gfp); 161 + } 162 + 156 163 static inline struct page *page_pool_alloc(struct page_pool *pool, 157 164 unsigned int *offset, 158 165 unsigned int *size, gfp_t gfp)