Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'page_pool-a-couple-of-assorted-optimizations'

Alexander Lobakin says:

====================
page_pool: a couple of assorted optimizations

That initially was a spin-off of the IAVF PP series[0], but has grown
(and shrunk) since then a bunch. In fact, it consists of three
semi-independent blocks:

* #1-2: Compile-time optimization. Split page_pool.h into 2 headers to
not overbloat the consumers not needing complex inline helpers and
then stop including it in skbuff.h at all. The first patch is also
prereq for the whole series.
* #3: Improve cacheline locality for users of the Page Pool frag API.
* #4-6: Use direct cache recycling more aggressively, when it is safe
obviously. In addition, make sure nobody wants to use Page Pool API
with disabled interrupts.

Patches #1 and #5 are authored by Yunsheng and Jakub respectively, with
small modifications from my side as per ML discussions.
For the perf numbers for #3-6, please see individual commit messages.

Also available on my GH with many more Page Pool goodies[1].

[0] https://lore.kernel.org/netdev/20230530150035.1943669-1-aleksander.lobakin@intel.com
[1] https://github.com/alobakin/linux/commits/iavf-pp-frag
====================

Link: https://lore.kernel.org/r/20230804180529.2483231-1-aleksander.lobakin@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+339 -308
+3 -3
Documentation/networking/page_pool.rst
··· 67 67 .. kernel-doc:: net/core/page_pool.c 68 68 :identifiers: page_pool_create 69 69 70 - .. kernel-doc:: include/net/page_pool.h 70 + .. kernel-doc:: include/net/page_pool/types.h 71 71 :identifiers: struct page_pool_params 72 72 73 - .. kernel-doc:: include/net/page_pool.h 73 + .. kernel-doc:: include/net/page_pool/helpers.h 74 74 :identifiers: page_pool_put_page page_pool_put_full_page 75 75 page_pool_recycle_direct page_pool_dev_alloc_pages 76 76 page_pool_get_dma_addr page_pool_get_dma_dir ··· 122 122 The API will fill in the provided struct page_pool_stats with 123 123 statistics about the page_pool. 124 124 125 - .. kernel-doc:: include/net/page_pool.h 125 + .. kernel-doc:: include/net/page_pool/types.h 126 126 :identifiers: struct page_pool_recycle_stats 127 127 struct page_pool_alloc_stats 128 128 struct page_pool_stats
+1 -1
MAINTAINERS
··· 16020 16020 L: netdev@vger.kernel.org 16021 16021 S: Supported 16022 16022 F: Documentation/networking/page_pool.rst 16023 - F: include/net/page_pool.h 16023 + F: include/net/page_pool/ 16024 16024 F: include/trace/events/page_pool.h 16025 16025 F: net/core/page_pool.c 16026 16026
+1 -1
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 54 54 #include <net/pkt_cls.h> 55 55 #include <linux/hwmon.h> 56 56 #include <linux/hwmon-sysfs.h> 57 - #include <net/page_pool.h> 57 + #include <net/page_pool/helpers.h> 58 58 #include <linux/align.h> 59 59 #include <net/netdev_queues.h> 60 60
+1 -1
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
··· 15 15 #include <linux/bpf.h> 16 16 #include <linux/bpf_trace.h> 17 17 #include <linux/filter.h> 18 - #include <net/page_pool.h> 18 + #include <net/page_pool/helpers.h> 19 19 #include "bnxt_hsi.h" 20 20 #include "bnxt.h" 21 21 #include "bnxt_xdp.h"
+1
drivers/net/ethernet/engleder/tsnep_main.c
··· 28 28 #include <linux/iopoll.h> 29 29 #include <linux/bpf.h> 30 30 #include <linux/bpf_trace.h> 31 + #include <net/page_pool/helpers.h> 31 32 #include <net/xdp_sock_drv.h> 32 33 33 34 #define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN)
+1
drivers/net/ethernet/freescale/fec_main.c
··· 38 38 #include <linux/in.h> 39 39 #include <linux/ip.h> 40 40 #include <net/ip.h> 41 + #include <net/page_pool/helpers.h> 41 42 #include <net/selftests.h> 42 43 #include <net/tso.h> 43 44 #include <linux/tcp.h>
+1
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
··· 18 18 #include <net/gre.h> 19 19 #include <net/gro.h> 20 20 #include <net/ip6_checksum.h> 21 + #include <net/page_pool/helpers.h> 21 22 #include <net/pkt_cls.h> 22 23 #include <net/pkt_sched.h> 23 24 #include <net/tcp.h>
+1 -1
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
··· 6 6 7 7 #include <linux/dim.h> 8 8 #include <linux/if_vlan.h> 9 - #include <net/page_pool.h> 9 + #include <net/page_pool/types.h> 10 10 #include <asm/barrier.h> 11 11 12 12 #include "hnae3.h"
+1 -1
drivers/net/ethernet/marvell/mvneta.c
··· 37 37 #include <net/ip.h> 38 38 #include <net/ipv6.h> 39 39 #include <net/tso.h> 40 - #include <net/page_pool.h> 40 + #include <net/page_pool/helpers.h> 41 41 #include <net/pkt_sched.h> 42 42 #include <linux/bpf_trace.h> 43 43
+1 -1
drivers/net/ethernet/marvell/mvpp2/mvpp2.h
··· 16 16 #include <linux/phy.h> 17 17 #include <linux/phylink.h> 18 18 #include <net/flow_offload.h> 19 - #include <net/page_pool.h> 19 + #include <net/page_pool/types.h> 20 20 #include <linux/bpf.h> 21 21 #include <net/xdp.h> 22 22
+1
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
··· 35 35 #include <uapi/linux/ppp_defs.h> 36 36 #include <net/ip.h> 37 37 #include <net/ipv6.h> 38 + #include <net/page_pool/helpers.h> 38 39 #include <net/tso.h> 39 40 #include <linux/bpf_trace.h> 40 41
+1
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
··· 7 7 8 8 #include <linux/interrupt.h> 9 9 #include <linux/pci.h> 10 + #include <net/page_pool/helpers.h> 10 11 #include <net/tso.h> 11 12 #include <linux/bitfield.h> 12 13
+1
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
··· 16 16 #include <linux/bpf.h> 17 17 #include <linux/bpf_trace.h> 18 18 #include <linux/bitfield.h> 19 + #include <net/page_pool/types.h> 19 20 20 21 #include "otx2_reg.h" 21 22 #include "otx2_common.h"
+1
drivers/net/ethernet/mediatek/mtk_eth_soc.c
··· 26 26 #include <linux/bitfield.h> 27 27 #include <net/dsa.h> 28 28 #include <net/dst_metadata.h> 29 + #include <net/page_pool/helpers.h> 29 30 30 31 #include "mtk_eth_soc.h" 31 32 #include "mtk_wed.h"
+1 -1
drivers/net/ethernet/mediatek/mtk_eth_soc.h
··· 18 18 #include <linux/rhashtable.h> 19 19 #include <linux/dim.h> 20 20 #include <linux/bitfield.h> 21 - #include <net/page_pool.h> 21 + #include <net/page_pool/types.h> 22 22 #include <linux/bpf_trace.h> 23 23 #include "mtk_ppe.h" 24 24
+1
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
··· 6 6 #include "en/port.h" 7 7 #include "en_accel/en_accel.h" 8 8 #include "en_accel/ipsec.h" 9 + #include <net/page_pool/types.h> 9 10 #include <net/xdp_sock_drv.h> 10 11 11 12 static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
-1
drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 2 /* Copyright (c) 2020 Mellanox Technologies */ 3 3 4 - #include <net/page_pool.h> 5 4 #include "en/txrx.h" 6 5 #include "en/params.h" 7 6 #include "en/trap.h"
+1
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
··· 35 35 #include "en/xdp.h" 36 36 #include "en/params.h" 37 37 #include <linux/bitfield.h> 38 + #include <net/page_pool/helpers.h> 38 39 39 40 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) 40 41 {
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 38 38 #include <linux/debugfs.h> 39 39 #include <linux/if_bridge.h> 40 40 #include <linux/filter.h> 41 - #include <net/page_pool.h> 41 + #include <net/page_pool/types.h> 42 42 #include <net/pkt_sched.h> 43 43 #include <net/xdp_sock_drv.h> 44 44 #include "eswitch.h"
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
··· 36 36 #include <linux/bitmap.h> 37 37 #include <linux/filter.h> 38 38 #include <net/ip6_checksum.h> 39 - #include <net/page_pool.h> 39 + #include <net/page_pool/helpers.h> 40 40 #include <net/inet_ecn.h> 41 41 #include <net/gro.h> 42 42 #include <net/udp.h>
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
··· 38 38 #include "en/port.h" 39 39 40 40 #ifdef CONFIG_PAGE_POOL_STATS 41 - #include <net/page_pool.h> 41 + #include <net/page_pool/helpers.h> 42 42 #endif 43 43 44 44 static unsigned int stats_grps_num(struct mlx5e_priv *priv)
+1
drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
··· 2 2 3 3 #include <linux/bpf.h> 4 4 #include <linux/filter.h> 5 + #include <net/page_pool/helpers.h> 5 6 6 7 #include "lan966x_main.h" 7 8
+1 -1
drivers/net/ethernet/microchip/lan966x/lan966x_main.h
··· 10 10 #include <linux/phy.h> 11 11 #include <linux/phylink.h> 12 12 #include <linux/ptp_clock_kernel.h> 13 - #include <net/page_pool.h> 13 + #include <net/page_pool/types.h> 14 14 #include <net/pkt_cls.h> 15 15 #include <net/pkt_sched.h> 16 16 #include <net/switchdev.h>
+1
drivers/net/ethernet/microsoft/mana/mana_en.c
··· 11 11 12 12 #include <net/checksum.h> 13 13 #include <net/ip6_checksum.h> 14 + #include <net/page_pool/helpers.h> 14 15 #include <net/xdp.h> 15 16 16 17 #include <net/mana/mana.h>
+1 -1
drivers/net/ethernet/socionext/netsec.c
··· 15 15 #include <linux/bpf_trace.h> 16 16 17 17 #include <net/tcp.h> 18 - #include <net/page_pool.h> 18 + #include <net/page_pool/helpers.h> 19 19 #include <net/ip6_checksum.h> 20 20 21 21 #define NETSEC_REG_SOFT_RST 0x104
+1 -1
drivers/net/ethernet/stmicro/stmmac/stmmac.h
··· 21 21 #include <linux/ptp_clock_kernel.h> 22 22 #include <linux/net_tstamp.h> 23 23 #include <linux/reset.h> 24 - #include <net/page_pool.h> 24 + #include <net/page_pool/types.h> 25 25 #include <net/xdp.h> 26 26 #include <uapi/linux/bpf.h> 27 27
+1
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
··· 39 39 #include <linux/phylink.h> 40 40 #include <linux/udp.h> 41 41 #include <linux/bpf_trace.h> 42 + #include <net/page_pool/helpers.h> 42 43 #include <net/pkt_cls.h> 43 44 #include <net/xdp_sock_drv.h> 44 45 #include "stmmac_ptp.h"
+1 -1
drivers/net/ethernet/ti/cpsw.c
··· 31 31 #include <linux/if_vlan.h> 32 32 #include <linux/kmemleak.h> 33 33 #include <linux/sys_soc.h> 34 - #include <net/page_pool.h> 34 + #include <net/page_pool/helpers.h> 35 35 #include <linux/bpf.h> 36 36 #include <linux/bpf_trace.h> 37 37
+1 -1
drivers/net/ethernet/ti/cpsw_new.c
··· 30 30 #include <linux/sys_soc.h> 31 31 32 32 #include <net/switchdev.h> 33 - #include <net/page_pool.h> 33 + #include <net/page_pool/helpers.h> 34 34 #include <net/pkt_cls.h> 35 35 #include <net/devlink.h> 36 36
+1 -1
drivers/net/ethernet/ti/cpsw_priv.c
··· 18 18 #include <linux/platform_device.h> 19 19 #include <linux/pm_runtime.h> 20 20 #include <linux/skbuff.h> 21 - #include <net/page_pool.h> 21 + #include <net/page_pool/helpers.h> 22 22 #include <net/pkt_cls.h> 23 23 #include <net/pkt_sched.h> 24 24
+1 -1
drivers/net/ethernet/wangxun/libwx/wx_lib.c
··· 3 3 4 4 #include <linux/etherdevice.h> 5 5 #include <net/ip6_checksum.h> 6 - #include <net/page_pool.h> 6 + #include <net/page_pool/helpers.h> 7 7 #include <net/inet_ecn.h> 8 8 #include <linux/iopoll.h> 9 9 #include <linux/sctp.h>
+1 -1
drivers/net/veth.c
··· 26 26 #include <linux/ptr_ring.h> 27 27 #include <linux/bpf_trace.h> 28 28 #include <linux/net_tstamp.h> 29 - #include <net/page_pool.h> 29 + #include <net/page_pool/helpers.h> 30 30 31 31 #define DRV_NAME "veth" 32 32 #define DRV_VERSION "1.0"
-1
drivers/net/wireless/mediatek/mt76/mac80211.c
··· 4 4 */ 5 5 #include <linux/sched.h> 6 6 #include <linux/of.h> 7 - #include <net/page_pool.h> 8 7 #include "mt76.h" 9 8 10 9 #define CHAN2G(_idx, _freq) { \
+1
drivers/net/wireless/mediatek/mt76/mt76.h
··· 15 15 #include <linux/average.h> 16 16 #include <linux/soc/mediatek/mtk_wed.h> 17 17 #include <net/mac80211.h> 18 + #include <net/page_pool/helpers.h> 18 19 #include "util.h" 19 20 #include "testmode.h" 20 21
+1 -1
drivers/net/xen-netfront.c
··· 45 45 #include <linux/slab.h> 46 46 #include <net/ip.h> 47 47 #include <linux/bpf.h> 48 - #include <net/page_pool.h> 48 + #include <net/page_pool/types.h> 49 49 #include <linux/bpf_trace.h> 50 50 51 51 #include <xen/xen.h>
+7
include/linux/lockdep.h
··· 625 625 WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \ 626 626 } while (0) 627 627 628 + #define lockdep_assert_no_hardirq() \ 629 + do { \ 630 + WARN_ON_ONCE(__lockdep_enabled && (this_cpu_read(hardirq_context) || \ 631 + !this_cpu_read(hardirqs_enabled))); \ 632 + } while (0) 633 + 628 634 #define lockdep_assert_preemption_enabled() \ 629 635 do { \ 630 636 WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ ··· 665 659 # define lockdep_assert_irqs_enabled() do { } while (0) 666 660 # define lockdep_assert_irqs_disabled() do { } while (0) 667 661 # define lockdep_assert_in_irq() do { } while (0) 662 + # define lockdep_assert_no_hardirq() do { } while (0) 668 663 669 664 # define lockdep_assert_preemption_enabled() do { } while (0) 670 665 # define lockdep_assert_preemption_disabled() do { } while (0)
+3 -2
include/linux/skbuff.h
··· 32 32 #include <linux/if_packet.h> 33 33 #include <linux/llist.h> 34 34 #include <net/flow.h> 35 - #include <net/page_pool.h> 36 35 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 37 36 #include <linux/netfilter/nf_conntrack_common.h> 38 37 #endif ··· 3420 3421 __skb_frag_ref(&skb_shinfo(skb)->frags[f]); 3421 3422 } 3422 3423 3424 + bool napi_pp_put_page(struct page *page, bool napi_safe); 3425 + 3423 3426 static inline void 3424 3427 napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) 3425 3428 { 3426 3429 struct page *page = skb_frag_page(frag); 3427 3430 3428 3431 #ifdef CONFIG_PAGE_POOL 3429 - if (recycle && page_pool_return_skb_page(page, napi_safe)) 3432 + if (recycle && napi_pp_put_page(page, napi_safe)) 3430 3433 return; 3431 3434 #endif 3432 3435 put_page(page);
+5 -237
include/net/page_pool.h include/net/page_pool/helpers.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 2 2 * 3 - * page_pool.h 3 + * page_pool/helpers.h 4 4 * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> 5 5 * Copyright (C) 2016 Red Hat, Inc. 6 6 */ ··· 26 26 * will release the DMA mapping and in-flight state accounting. We 27 27 * hope to lift this requirement in the future. 28 28 */ 29 - #ifndef _NET_PAGE_POOL_H 30 - #define _NET_PAGE_POOL_H 29 + #ifndef _NET_PAGE_POOL_HELPERS_H 30 + #define _NET_PAGE_POOL_HELPERS_H 31 31 32 - #include <linux/mm.h> /* Needed by ptr_ring */ 33 - #include <linux/ptr_ring.h> 34 - #include <linux/dma-direction.h> 35 - 36 - #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA 37 - * map/unmap 38 - */ 39 - #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets 40 - * from page_pool will be 41 - * DMA-synced-for-device according to 42 - * the length provided by the device 43 - * driver. 44 - * Please note DMA-sync-for-CPU is still 45 - * device driver responsibility 46 - */ 47 - #define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ 48 - #define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ 49 - PP_FLAG_DMA_SYNC_DEV |\ 50 - PP_FLAG_PAGE_FRAG) 51 - 52 - /* 53 - * Fast allocation side cache array/stack 54 - * 55 - * The cache size and refill watermark is related to the network 56 - * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX 57 - * ring is usually refilled and the max consumed elements will be 64, 58 - * thus a natural max size of objects needed in the cache. 59 - * 60 - * Keeping room for more objects, is due to XDP_DROP use-case. As 61 - * XDP_DROP allows the opportunity to recycle objects directly into 62 - * this array, as it shares the same softirq/NAPI protection. If 63 - * cache is already full (or partly full) then the XDP_DROP recycles 64 - * would have to take a slower code path. 65 - */ 66 - #define PP_ALLOC_CACHE_SIZE 128 67 - #define PP_ALLOC_CACHE_REFILL 64 68 - struct pp_alloc_cache { 69 - u32 count; 70 - struct page *cache[PP_ALLOC_CACHE_SIZE]; 71 - }; 72 - 73 - /** 74 - * struct page_pool_params - page pool parameters 75 - * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG 76 - * @order: 2^order pages on allocation 77 - * @pool_size: size of the ptr_ring 78 - * @nid: NUMA node id to allocate from pages from 79 - * @dev: device, for DMA pre-mapping purposes 80 - * @napi: NAPI which is the sole consumer of pages, otherwise NULL 81 - * @dma_dir: DMA mapping direction 82 - * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV 83 - * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV 84 - */ 85 - struct page_pool_params { 86 - unsigned int flags; 87 - unsigned int order; 88 - unsigned int pool_size; 89 - int nid; 90 - struct device *dev; 91 - struct napi_struct *napi; 92 - enum dma_data_direction dma_dir; 93 - unsigned int max_len; 94 - unsigned int offset; 95 - /* private: used by test code only */ 96 - void (*init_callback)(struct page *page, void *arg); 97 - void *init_arg; 98 - }; 32 + #include <net/page_pool/types.h> 99 33 100 34 #ifdef CONFIG_PAGE_POOL_STATS 101 - /** 102 - * struct page_pool_alloc_stats - allocation statistics 103 - * @fast: successful fast path allocations 104 - * @slow: slow path order-0 allocations 105 - * @slow_high_order: slow path high order allocations 106 - * @empty: ptr ring is empty, so a slow path allocation was forced 107 - * @refill: an allocation which triggered a refill of the cache 108 - * @waive: pages obtained from the ptr ring that cannot be added to 109 - * the cache due to a NUMA mismatch 110 - */ 111 - struct page_pool_alloc_stats { 112 - u64 fast; 113 - u64 slow; 114 - u64 slow_high_order; 115 - u64 empty; 116 - u64 refill; 117 - u64 waive; 118 - }; 119 - 120 - /** 121 - * struct page_pool_recycle_stats - recycling (freeing) statistics 122 - * @cached: recycling placed page in the page pool cache 123 - * @cache_full: page pool cache was full 124 - * @ring: page placed into the ptr ring 125 - * @ring_full: page released from page pool because the ptr ring was full 126 - * @released_refcnt: page released (and not recycled) because refcnt > 1 127 - */ 128 - struct page_pool_recycle_stats { 129 - u64 cached; 130 - u64 cache_full; 131 - u64 ring; 132 - u64 ring_full; 133 - u64 released_refcnt; 134 - }; 135 - 136 - /** 137 - * struct page_pool_stats - combined page pool use statistics 138 - * @alloc_stats: see struct page_pool_alloc_stats 139 - * @recycle_stats: see struct page_pool_recycle_stats 140 - * 141 - * Wrapper struct for combining page pool stats with different storage 142 - * requirements. 143 - */ 144 - struct page_pool_stats { 145 - struct page_pool_alloc_stats alloc_stats; 146 - struct page_pool_recycle_stats recycle_stats; 147 - }; 148 - 149 35 int page_pool_ethtool_stats_get_count(void); 150 36 u8 *page_pool_ethtool_stats_get_strings(u8 *data); 151 37 u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); ··· 44 158 bool page_pool_get_stats(struct page_pool *pool, 45 159 struct page_pool_stats *stats); 46 160 #else 47 - 48 161 static inline int page_pool_ethtool_stats_get_count(void) 49 162 { 50 163 return 0; ··· 58 173 { 59 174 return data; 60 175 } 61 - 62 176 #endif 63 - 64 - struct page_pool { 65 - struct page_pool_params p; 66 - 67 - struct delayed_work release_dw; 68 - void (*disconnect)(void *); 69 - unsigned long defer_start; 70 - unsigned long defer_warn; 71 - 72 - u32 pages_state_hold_cnt; 73 - unsigned int frag_offset; 74 - struct page *frag_page; 75 - long frag_users; 76 - 77 - #ifdef CONFIG_PAGE_POOL_STATS 78 - /* these stats are incremented while in softirq context */ 79 - struct page_pool_alloc_stats alloc_stats; 80 - #endif 81 - u32 xdp_mem_id; 82 - 83 - /* 84 - * Data structure for allocation side 85 - * 86 - * Drivers allocation side usually already perform some kind 87 - * of resource protection. Piggyback on this protection, and 88 - * require driver to protect allocation side. 89 - * 90 - * For NIC drivers this means, allocate a page_pool per 91 - * RX-queue. As the RX-queue is already protected by 92 - * Softirq/BH scheduling and napi_schedule. NAPI schedule 93 - * guarantee that a single napi_struct will only be scheduled 94 - * on a single CPU (see napi_schedule). 95 - */ 96 - struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; 97 - 98 - /* Data structure for storing recycled pages. 99 - * 100 - * Returning/freeing pages is more complicated synchronization 101 - * wise, because free's can happen on remote CPUs, with no 102 - * association with allocation resource. 103 - * 104 - * Use ptr_ring, as it separates consumer and producer 105 - * effeciently, it a way that doesn't bounce cache-lines. 106 - * 107 - * TODO: Implement bulk return pages into this structure. 108 - */ 109 - struct ptr_ring ring; 110 - 111 - #ifdef CONFIG_PAGE_POOL_STATS 112 - /* recycle stats are per-cpu to avoid locking */ 113 - struct page_pool_recycle_stats __percpu *recycle_stats; 114 - #endif 115 - atomic_t pages_state_release_cnt; 116 - 117 - /* A page_pool is strictly tied to a single RX-queue being 118 - * protected by NAPI, due to above pp_alloc_cache. This 119 - * refcnt serves purpose is to simplify drivers error handling. 120 - */ 121 - refcount_t user_cnt; 122 - 123 - u64 destroy_cnt; 124 - }; 125 - 126 - struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); 127 177 128 178 /** 129 179 * page_pool_dev_alloc_pages() - allocate a page. ··· 72 252 73 253 return page_pool_alloc_pages(pool, gfp); 74 254 } 75 - 76 - struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, 77 - unsigned int size, gfp_t gfp); 78 255 79 256 static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, 80 257 unsigned int *offset, ··· 94 277 { 95 278 return pool->p.dma_dir; 96 279 } 97 - 98 - bool page_pool_return_skb_page(struct page *page, bool napi_safe); 99 - 100 - struct page_pool *page_pool_create(const struct page_pool_params *params); 101 - 102 - struct xdp_mem_info; 103 - 104 - #ifdef CONFIG_PAGE_POOL 105 - void page_pool_unlink_napi(struct page_pool *pool); 106 - void page_pool_destroy(struct page_pool *pool); 107 - void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), 108 - struct xdp_mem_info *mem); 109 - void page_pool_put_page_bulk(struct page_pool *pool, void **data, 110 - int count); 111 - #else 112 - static inline void page_pool_unlink_napi(struct page_pool *pool) 113 - { 114 - } 115 - 116 - static inline void page_pool_destroy(struct page_pool *pool) 117 - { 118 - } 119 - 120 - static inline void page_pool_use_xdp_mem(struct page_pool *pool, 121 - void (*disconnect)(void *), 122 - struct xdp_mem_info *mem) 123 - { 124 - } 125 - 126 - static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, 127 - int count) 128 - { 129 - } 130 - #endif 131 - 132 - void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, 133 - unsigned int dma_sync_size, 134 - bool allow_direct); 135 280 136 281 /* pp_frag_count represents the number of writers who can update the page 137 282 * either by updating skb->data or via DMA mappings for the device. ··· 224 445 page->dma_addr_upper = upper_32_bits(addr); 225 446 } 226 447 227 - static inline bool is_page_pool_compiled_in(void) 228 - { 229 - #ifdef CONFIG_PAGE_POOL 230 - return true; 231 - #else 232 - return false; 233 - #endif 234 - } 235 - 236 448 static inline bool page_pool_put(struct page_pool *pool) 237 449 { 238 450 return refcount_dec_and_test(&pool->user_cnt); 239 451 } 240 452 241 - /* Caller must provide appropriate safe context, e.g. NAPI. */ 242 - void page_pool_update_nid(struct page_pool *pool, int new_nid); 243 453 static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) 244 454 { 245 455 if (unlikely(pool->p.nid != new_nid)) 246 456 page_pool_update_nid(pool, new_nid); 247 457 } 248 458 249 - #endif /* _NET_PAGE_POOL_H */ 459 + #endif /* _NET_PAGE_POOL_HELPERS_H */
+236
include/net/page_pool/types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef _NET_PAGE_POOL_TYPES_H 4 + #define _NET_PAGE_POOL_TYPES_H 5 + 6 + #include <linux/dma-direction.h> 7 + #include <linux/ptr_ring.h> 8 + 9 + #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA 10 + * map/unmap 11 + */ 12 + #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets 13 + * from page_pool will be 14 + * DMA-synced-for-device according to 15 + * the length provided by the device 16 + * driver. 17 + * Please note DMA-sync-for-CPU is still 18 + * device driver responsibility 19 + */ 20 + #define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ 21 + #define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ 22 + PP_FLAG_DMA_SYNC_DEV |\ 23 + PP_FLAG_PAGE_FRAG) 24 + 25 + /* 26 + * Fast allocation side cache array/stack 27 + * 28 + * The cache size and refill watermark is related to the network 29 + * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX 30 + * ring is usually refilled and the max consumed elements will be 64, 31 + * thus a natural max size of objects needed in the cache. 32 + * 33 + * Keeping room for more objects, is due to XDP_DROP use-case. As 34 + * XDP_DROP allows the opportunity to recycle objects directly into 35 + * this array, as it shares the same softirq/NAPI protection. If 36 + * cache is already full (or partly full) then the XDP_DROP recycles 37 + * would have to take a slower code path. 38 + */ 39 + #define PP_ALLOC_CACHE_SIZE 128 40 + #define PP_ALLOC_CACHE_REFILL 64 41 + struct pp_alloc_cache { 42 + u32 count; 43 + struct page *cache[PP_ALLOC_CACHE_SIZE]; 44 + }; 45 + 46 + /** 47 + * struct page_pool_params - page pool parameters 48 + * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG 49 + * @order: 2^order pages on allocation 50 + * @pool_size: size of the ptr_ring 51 + * @nid: NUMA node id to allocate from pages from 52 + * @dev: device, for DMA pre-mapping purposes 53 + * @napi: NAPI which is the sole consumer of pages, otherwise NULL 54 + * @dma_dir: DMA mapping direction 55 + * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV 56 + * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV 57 + */ 58 + struct page_pool_params { 59 + unsigned int flags; 60 + unsigned int order; 61 + unsigned int pool_size; 62 + int nid; 63 + struct device *dev; 64 + struct napi_struct *napi; 65 + enum dma_data_direction dma_dir; 66 + unsigned int max_len; 67 + unsigned int offset; 68 + /* private: used by test code only */ 69 + void (*init_callback)(struct page *page, void *arg); 70 + void *init_arg; 71 + }; 72 + 73 + #ifdef CONFIG_PAGE_POOL_STATS 74 + /** 75 + * struct page_pool_alloc_stats - allocation statistics 76 + * @fast: successful fast path allocations 77 + * @slow: slow path order-0 allocations 78 + * @slow_high_order: slow path high order allocations 79 + * @empty: ptr ring is empty, so a slow path allocation was forced 80 + * @refill: an allocation which triggered a refill of the cache 81 + * @waive: pages obtained from the ptr ring that cannot be added to 82 + * the cache due to a NUMA mismatch 83 + */ 84 + struct page_pool_alloc_stats { 85 + u64 fast; 86 + u64 slow; 87 + u64 slow_high_order; 88 + u64 empty; 89 + u64 refill; 90 + u64 waive; 91 + }; 92 + 93 + /** 94 + * struct page_pool_recycle_stats - recycling (freeing) statistics 95 + * @cached: recycling placed page in the page pool cache 96 + * @cache_full: page pool cache was full 97 + * @ring: page placed into the ptr ring 98 + * @ring_full: page released from page pool because the ptr ring was full 99 + * @released_refcnt: page released (and not recycled) because refcnt > 1 100 + */ 101 + struct page_pool_recycle_stats { 102 + u64 cached; 103 + u64 cache_full; 104 + u64 ring; 105 + u64 ring_full; 106 + u64 released_refcnt; 107 + }; 108 + 109 + /** 110 + * struct page_pool_stats - combined page pool use statistics 111 + * @alloc_stats: see struct page_pool_alloc_stats 112 + * @recycle_stats: see struct page_pool_recycle_stats 113 + * 114 + * Wrapper struct for combining page pool stats with different storage 115 + * requirements. 116 + */ 117 + struct page_pool_stats { 118 + struct page_pool_alloc_stats alloc_stats; 119 + struct page_pool_recycle_stats recycle_stats; 120 + }; 121 + #endif 122 + 123 + struct page_pool { 124 + struct page_pool_params p; 125 + 126 + long frag_users; 127 + struct page *frag_page; 128 + unsigned int frag_offset; 129 + u32 pages_state_hold_cnt; 130 + 131 + struct delayed_work release_dw; 132 + void (*disconnect)(void *pool); 133 + unsigned long defer_start; 134 + unsigned long defer_warn; 135 + 136 + #ifdef CONFIG_PAGE_POOL_STATS 137 + /* these stats are incremented while in softirq context */ 138 + struct page_pool_alloc_stats alloc_stats; 139 + #endif 140 + u32 xdp_mem_id; 141 + 142 + /* 143 + * Data structure for allocation side 144 + * 145 + * Drivers allocation side usually already perform some kind 146 + * of resource protection. Piggyback on this protection, and 147 + * require driver to protect allocation side. 148 + * 149 + * For NIC drivers this means, allocate a page_pool per 150 + * RX-queue. As the RX-queue is already protected by 151 + * Softirq/BH scheduling and napi_schedule. NAPI schedule 152 + * guarantee that a single napi_struct will only be scheduled 153 + * on a single CPU (see napi_schedule). 154 + */ 155 + struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; 156 + 157 + /* Data structure for storing recycled pages. 158 + * 159 + * Returning/freeing pages is more complicated synchronization 160 + * wise, because free's can happen on remote CPUs, with no 161 + * association with allocation resource. 162 + * 163 + * Use ptr_ring, as it separates consumer and producer 164 + * efficiently, it a way that doesn't bounce cache-lines. 165 + * 166 + * TODO: Implement bulk return pages into this structure. 167 + */ 168 + struct ptr_ring ring; 169 + 170 + #ifdef CONFIG_PAGE_POOL_STATS 171 + /* recycle stats are per-cpu to avoid locking */ 172 + struct page_pool_recycle_stats __percpu *recycle_stats; 173 + #endif 174 + atomic_t pages_state_release_cnt; 175 + 176 + /* A page_pool is strictly tied to a single RX-queue being 177 + * protected by NAPI, due to above pp_alloc_cache. This 178 + * refcnt serves purpose is to simplify drivers error handling. 179 + */ 180 + refcount_t user_cnt; 181 + 182 + u64 destroy_cnt; 183 + }; 184 + 185 + struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); 186 + struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, 187 + unsigned int size, gfp_t gfp); 188 + struct page_pool *page_pool_create(const struct page_pool_params *params); 189 + 190 + struct xdp_mem_info; 191 + 192 + #ifdef CONFIG_PAGE_POOL 193 + void page_pool_unlink_napi(struct page_pool *pool); 194 + void page_pool_destroy(struct page_pool *pool); 195 + void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), 196 + struct xdp_mem_info *mem); 197 + void page_pool_put_page_bulk(struct page_pool *pool, void **data, 198 + int count); 199 + #else 200 + static inline void page_pool_unlink_napi(struct page_pool *pool) 201 + { 202 + } 203 + 204 + static inline void page_pool_destroy(struct page_pool *pool) 205 + { 206 + } 207 + 208 + static inline void page_pool_use_xdp_mem(struct page_pool *pool, 209 + void (*disconnect)(void *), 210 + struct xdp_mem_info *mem) 211 + { 212 + } 213 + 214 + static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, 215 + int count) 216 + { 217 + } 218 + #endif 219 + 220 + void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, 221 + unsigned int dma_sync_size, 222 + bool allow_direct); 223 + 224 + static inline bool is_page_pool_compiled_in(void) 225 + { 226 + #ifdef CONFIG_PAGE_POOL 227 + return true; 228 + #else 229 + return false; 230 + #endif 231 + } 232 + 233 + /* Caller must provide appropriate safe context, e.g. NAPI. */ 234 + void page_pool_update_nid(struct page_pool *pool, int new_nid); 235 + 236 + #endif /* _NET_PAGE_POOL_H */
+1 -1
include/trace/events/page_pool.h
··· 9 9 #include <linux/tracepoint.h> 10 10 11 11 #include <trace/events/mmflags.h> 12 - #include <net/page_pool.h> 12 + #include <net/page_pool/types.h> 13 13 14 14 TRACE_EVENT(page_pool_release, 15 15
+1 -1
net/bpf/test_run.c
··· 15 15 #include <net/sock.h> 16 16 #include <net/tcp.h> 17 17 #include <net/net_namespace.h> 18 - #include <net/page_pool.h> 18 + #include <net/page_pool/helpers.h> 19 19 #include <linux/error-injection.h> 20 20 #include <linux/smp.h> 21 21 #include <linux/sock_diag.h>
+3 -40
net/core/page_pool.c
··· 10 10 #include <linux/slab.h> 11 11 #include <linux/device.h> 12 12 13 - #include <net/page_pool.h> 13 + #include <net/page_pool/helpers.h> 14 14 #include <net/xdp.h> 15 15 16 16 #include <linux/dma-direction.h> ··· 587 587 __page_pool_put_page(struct page_pool *pool, struct page *page, 588 588 unsigned int dma_sync_size, bool allow_direct) 589 589 { 590 + lockdep_assert_no_hardirq(); 591 + 590 592 /* This allocator is optimized for the XDP mode that uses 591 593 * one-frame-per-page, but have fallbacks that act like the 592 594 * regular page allocator APIs. ··· 937 935 } 938 936 } 939 937 EXPORT_SYMBOL(page_pool_update_nid); 940 - 941 - bool page_pool_return_skb_page(struct page *page, bool napi_safe) 942 - { 943 - struct napi_struct *napi; 944 - struct page_pool *pp; 945 - bool allow_direct; 946 - 947 - page = compound_head(page); 948 - 949 - /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation 950 - * in order to preserve any existing bits, such as bit 0 for the 951 - * head page of compound page and bit 1 for pfmemalloc page, so 952 - * mask those bits for freeing side when doing below checking, 953 - * and page_is_pfmemalloc() is checked in __page_pool_put_page() 954 - * to avoid recycling the pfmemalloc page. 955 - */ 956 - if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) 957 - return false; 958 - 959 - pp = page->pp; 960 - 961 - /* Allow direct recycle if we have reasons to believe that we are 962 - * in the same context as the consumer would run, so there's 963 - * no possible race. 964 - */ 965 - napi = READ_ONCE(pp->p.napi); 966 - allow_direct = napi_safe && napi && 967 - READ_ONCE(napi->list_owner) == smp_processor_id(); 968 - 969 - /* Driver set this to memory recycling info. Reset it on recycle. 970 - * This will *not* work for NIC using a split-page memory model. 971 - * The page will be returned to the pool here regardless of the 972 - * 'flipped' fragment being in use or not. 973 - */ 974 - page_pool_put_full_page(pp, page, allow_direct); 975 - 976 - return true; 977 - } 978 - EXPORT_SYMBOL(page_pool_return_skb_page);
+47 -2
net/core/skbuff.c
··· 73 73 #include <net/mpls.h> 74 74 #include <net/mptcp.h> 75 75 #include <net/mctp.h> 76 - #include <net/page_pool.h> 76 + #include <net/page_pool/helpers.h> 77 77 #include <net/dropreason.h> 78 78 79 79 #include <linux/uaccess.h> ··· 879 879 skb_get(list); 880 880 } 881 881 882 + #if IS_ENABLED(CONFIG_PAGE_POOL) 883 + bool napi_pp_put_page(struct page *page, bool napi_safe) 884 + { 885 + bool allow_direct = false; 886 + struct page_pool *pp; 887 + 888 + page = compound_head(page); 889 + 890 + /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation 891 + * in order to preserve any existing bits, such as bit 0 for the 892 + * head page of compound page and bit 1 for pfmemalloc page, so 893 + * mask those bits for freeing side when doing below checking, 894 + * and page_is_pfmemalloc() is checked in __page_pool_put_page() 895 + * to avoid recycling the pfmemalloc page. 896 + */ 897 + if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) 898 + return false; 899 + 900 + pp = page->pp; 901 + 902 + /* Allow direct recycle if we have reasons to believe that we are 903 + * in the same context as the consumer would run, so there's 904 + * no possible race. 905 + * __page_pool_put_page() makes sure we're not in hardirq context 906 + * and interrupts are enabled prior to accessing the cache. 907 + */ 908 + if (napi_safe || in_softirq()) { 909 + const struct napi_struct *napi = READ_ONCE(pp->p.napi); 910 + 911 + allow_direct = napi && 912 + READ_ONCE(napi->list_owner) == smp_processor_id(); 913 + } 914 + 915 + /* Driver set this to memory recycling info. Reset it on recycle. 916 + * This will *not* work for NIC using a split-page memory model. 917 + * The page will be returned to the pool here regardless of the 918 + * 'flipped' fragment being in use or not. 919 + */ 920 + page_pool_put_full_page(pp, page, allow_direct); 921 + 922 + return true; 923 + } 924 + EXPORT_SYMBOL(napi_pp_put_page); 925 + #endif 926 + 882 927 static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe) 883 928 { 884 929 if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) 885 930 return false; 886 - return page_pool_return_skb_page(virt_to_page(data), napi_safe); 931 + return napi_pp_put_page(virt_to_page(data), napi_safe); 887 932 } 888 933 889 934 static void skb_kfree_head(void *head, unsigned int end_offset)
+1 -1
net/core/xdp.c
··· 14 14 #include <linux/idr.h> 15 15 #include <linux/rhashtable.h> 16 16 #include <linux/bug.h> 17 - #include <net/page_pool.h> 17 + #include <net/page_pool/helpers.h> 18 18 19 19 #include <net/xdp.h> 20 20 #include <net/xdp_priv.h> /* struct xdp_mem_allocator */