Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'split-netmem-from-struct-page'

Byungchul Park says:

====================
Split netmem from struct page

The MM subsystem is trying to reduce struct page to a single pointer.
See the following link for your information:

https://kernelnewbies.org/MatthewWilcox/Memdescs/Path

The first step towards that is splitting struct page by its individual
users, as has already been done with folio and slab. This patchset does
that for page pool.

Matthew Wilcox tried and stopped the same work, you can see in:

https://lore.kernel.org/20230111042214.907030-1-willy@infradead.org

I focused on removing the page pool members in struct page this time,
not moving the allocation code of page pool from net to mm. It can be
done later if needed.
====================

Link: https://patch.msgid.link/20250721021835.63939-1-byungchul@sk.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+161 -40
+7 -3
drivers/net/ethernet/freescale/fec_main.c
··· 1045 1045 struct page *page = txq->tx_buf[i].buf_p; 1046 1046 1047 1047 if (page) 1048 - page_pool_put_page(page->pp, page, 0, false); 1048 + page_pool_put_page(pp_page_to_nmdesc(page)->pp, 1049 + page, 0, 1050 + false); 1049 1051 } 1050 1052 1051 1053 txq->tx_buf[i].buf_p = NULL; ··· 1588 1586 xdp_return_frame_rx_napi(xdpf); 1589 1587 } else { /* recycle pages of XDP_TX frames */ 1590 1588 /* The dma_sync_size = 0 as XDP_TX has already synced DMA for_device */ 1591 - page_pool_put_page(page->pp, page, 0, true); 1589 + page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, 1590 + 0, true); 1592 1591 } 1593 1592 1594 1593 txq->tx_buf[index].buf_p = NULL; ··· 3351 3348 } else { 3352 3349 struct page *page = txq->tx_buf[i].buf_p; 3353 3350 3354 - page_pool_put_page(page->pp, page, 0, false); 3351 + page_pool_put_page(pp_page_to_nmdesc(page)->pp, 3352 + page, 0, false); 3355 3353 } 3356 3354 3357 3355 txq->tx_buf[i].buf_p = NULL;
+1 -1
drivers/net/ethernet/intel/iavf/iavf_txrx.c
··· 1216 1216 unsigned int size) 1217 1217 { 1218 1218 struct page *buf_page = __netmem_to_page(rx_buffer->netmem); 1219 - u32 hr = buf_page->pp->p.offset; 1219 + u32 hr = pp_page_to_nmdesc(buf_page)->pp->p.offset; 1220 1220 struct sk_buff *skb; 1221 1221 void *va; 1222 1222
+5 -3
drivers/net/ethernet/intel/idpf/idpf_txrx.c
··· 3276 3276 3277 3277 hdr_page = __netmem_to_page(hdr->netmem); 3278 3278 buf_page = __netmem_to_page(buf->netmem); 3279 - dst = page_address(hdr_page) + hdr->offset + hdr_page->pp->p.offset; 3280 - src = page_address(buf_page) + buf->offset + buf_page->pp->p.offset; 3279 + dst = page_address(hdr_page) + hdr->offset + 3280 + pp_page_to_nmdesc(hdr_page)->pp->p.offset; 3281 + src = page_address(buf_page) + buf->offset + 3282 + pp_page_to_nmdesc(buf_page)->pp->p.offset; 3281 3283 3282 3284 memcpy(dst, src, LARGEST_ALIGN(copy)); 3283 3285 buf->offset += copy; ··· 3298 3296 struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size) 3299 3297 { 3300 3298 struct page *buf_page = __netmem_to_page(buf->netmem); 3301 - u32 hr = buf_page->pp->p.offset; 3299 + u32 hr = pp_page_to_nmdesc(buf_page)->pp->p.offset; 3302 3300 struct sk_buff *skb; 3303 3301 void *va; 3304 3302
+1 -1
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
··· 1571 1571 cq->pool_ptrs++; 1572 1572 if (xsk_buff) { 1573 1573 xsk_buff_free(xsk_buff); 1574 - } else if (page->pp) { 1574 + } else if (pp_page_to_nmdesc(page)->pp) { 1575 1575 page_pool_recycle_direct(pool->page_pool, page); 1576 1576 } else { 1577 1577 otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
+3 -1
drivers/net/ethernet/mellanox/mlx4/en_rx.c
··· 460 460 461 461 truesize += frag_info->frag_stride; 462 462 if (frag_info->frag_stride == PAGE_SIZE / 2) { 463 + struct netmem_desc *desc = pp_page_to_nmdesc(page); 464 + 463 465 frags->page_offset ^= PAGE_SIZE / 2; 464 466 release = page_count(page) != 1 || 465 - atomic_long_read(&page->pp_ref_count) != 1 || 467 + atomic_long_read(&desc->pp_ref_count) != 1 || 466 468 page_is_pfmemalloc(page) || 467 469 page_to_nid(page) != numa_mem_id(); 468 470 } else if (!priv->rx_headroom) {
+2 -1
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
··· 710 710 /* No need to check page_pool_page_is_pp() as we 711 711 * know this is a page_pool page. 712 712 */ 713 - page_pool_recycle_direct(page->pp, page); 713 + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, 714 + page); 714 715 } while (++n < num); 715 716 716 717 break;
+2 -2
drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c
··· 367 367 return IRQ_NONE; 368 368 369 369 prueth_tx_ts_sr1(emac, (void *)page_address(page)); 370 - page_pool_recycle_direct(page->pp, page); 370 + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, page); 371 371 372 372 return IRQ_HANDLED; 373 373 } ··· 392 392 complete(&emac->cmd_complete); 393 393 } 394 394 395 - page_pool_recycle_direct(page->pp, page); 395 + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, page); 396 396 397 397 return IRQ_HANDLED; 398 398 }
+4 -2
drivers/net/netdevsim/netdev.c
··· 917 917 if (!ns->page) 918 918 ret = -ENOMEM; 919 919 } else { 920 - page_pool_put_full_page(ns->page->pp, ns->page, false); 920 + page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp, 921 + ns->page, false); 921 922 ns->page = NULL; 922 923 } 923 924 ··· 1146 1145 1147 1146 /* Put this intentionally late to exercise the orphaning path */ 1148 1147 if (ns->page) { 1149 - page_pool_put_full_page(ns->page->pp, ns->page, false); 1148 + page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp, 1149 + ns->page, false); 1150 1150 ns->page = NULL; 1151 1151 } 1152 1152
+2 -1
drivers/net/wireless/mediatek/mt76/mt76.h
··· 1810 1810 { 1811 1811 struct page *page = virt_to_head_page(buf); 1812 1812 1813 - page_pool_put_full_page(page->pp, page, allow_direct); 1813 + page_pool_put_full_page(pp_page_to_nmdesc(page)->pp, page, 1814 + allow_direct); 1814 1815 } 1815 1816 1816 1817 static inline void *
+2 -2
include/linux/mm.h
··· 4178 4178 #define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) 4179 4179 4180 4180 #ifdef CONFIG_PAGE_POOL 4181 - static inline bool page_pool_page_is_pp(struct page *page) 4181 + static inline bool page_pool_page_is_pp(const struct page *page) 4182 4182 { 4183 4183 return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; 4184 4184 } 4185 4185 #else 4186 - static inline bool page_pool_page_is_pp(struct page *page) 4186 + static inline bool page_pool_page_is_pp(const struct page *page) 4187 4187 { 4188 4188 return false; 4189 4189 }
+1 -1
include/net/libeth/xdp.h
··· 1292 1292 xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq); 1293 1293 #endif 1294 1294 xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset, 1295 - page->pp->p.offset, len, true); 1295 + pp_page_to_nmdesc(page)->pp->p.offset, len, true); 1296 1296 } 1297 1297 1298 1298 /**
+131 -22
include/net/netmem.h
··· 12 12 #include <linux/mm.h> 13 13 #include <net/net_debug.h> 14 14 15 + /* These fields in struct page are used by the page_pool and net stack: 16 + * 17 + * struct { 18 + * unsigned long pp_magic; 19 + * struct page_pool *pp; 20 + * unsigned long _pp_mapping_pad; 21 + * unsigned long dma_addr; 22 + * atomic_long_t pp_ref_count; 23 + * }; 24 + * 25 + * We mirror the page_pool fields here so the page_pool can access these 26 + * fields without worrying whether the underlying fields belong to a 27 + * page or netmem_desc. 28 + * 29 + * CAUTION: Do not update the fields in netmem_desc without also 30 + * updating the anonymous aliasing union in struct net_iov. 31 + */ 32 + struct netmem_desc { 33 + unsigned long _flags; 34 + unsigned long pp_magic; 35 + struct page_pool *pp; 36 + unsigned long _pp_mapping_pad; 37 + unsigned long dma_addr; 38 + atomic_long_t pp_ref_count; 39 + }; 40 + 41 + #define NETMEM_DESC_ASSERT_OFFSET(pg, desc) \ 42 + static_assert(offsetof(struct page, pg) == \ 43 + offsetof(struct netmem_desc, desc)) 44 + NETMEM_DESC_ASSERT_OFFSET(flags, _flags); 45 + NETMEM_DESC_ASSERT_OFFSET(pp_magic, pp_magic); 46 + NETMEM_DESC_ASSERT_OFFSET(pp, pp); 47 + NETMEM_DESC_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); 48 + NETMEM_DESC_ASSERT_OFFSET(dma_addr, dma_addr); 49 + NETMEM_DESC_ASSERT_OFFSET(pp_ref_count, pp_ref_count); 50 + #undef NETMEM_DESC_ASSERT_OFFSET 51 + 52 + /* 53 + * Since struct netmem_desc uses the space in struct page, the size 54 + * should be checked, until struct netmem_desc has its own instance from 55 + * slab, to avoid conflicting with other members within struct page. 56 + */ 57 + static_assert(sizeof(struct netmem_desc) <= offsetof(struct page, _refcount)); 58 + 15 59 /* net_iov */ 16 60 17 61 DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); ··· 74 30 NET_IOV_MAX = ULONG_MAX 75 31 }; 76 32 33 + /* A memory descriptor representing abstract networking I/O vectors, 34 + * generally for non-pages memory that doesn't have its corresponding 35 + * struct page and needs to be explicitly allocated through slab. 36 + * 37 + * net_iovs are allocated and used by networking code, and the size of 38 + * the chunk is PAGE_SIZE. 39 + * 40 + * This memory can be any form of non-struct paged memory. Examples 41 + * include imported dmabuf memory and imported io_uring memory. See 42 + * net_iov_type for all the supported types. 43 + * 44 + * @pp_magic: pp field, similar to the one in struct page/struct 45 + * netmem_desc. 46 + * @pp: the pp this net_iov belongs to, if any. 47 + * @dma_addr: the dma addrs of the net_iov. Needed for the network 48 + * card to send/receive this net_iov. 49 + * @pp_ref_count: the pp ref count of this net_iov, exactly the same 50 + * usage as struct page/struct netmem_desc. 51 + * @owner: the net_iov_area this net_iov belongs to, if any. 52 + * @type: the type of the memory. Different types of net_iovs are 53 + * supported. 54 + */ 77 55 struct net_iov { 78 - enum net_iov_type type; 79 - unsigned long pp_magic; 80 - struct page_pool *pp; 56 + union { 57 + struct netmem_desc desc; 58 + 59 + /* XXX: The following part should be removed once all 60 + * the references to them are converted so as to be 61 + * accessed via netmem_desc e.g. niov->desc.pp instead 62 + * of niov->pp. 63 + */ 64 + struct { 65 + unsigned long _flags; 66 + unsigned long pp_magic; 67 + struct page_pool *pp; 68 + unsigned long _pp_mapping_pad; 69 + unsigned long dma_addr; 70 + atomic_long_t pp_ref_count; 71 + }; 72 + }; 81 73 struct net_iov_area *owner; 82 - unsigned long dma_addr; 83 - atomic_long_t pp_ref_count; 74 + enum net_iov_type type; 84 75 }; 85 76 86 77 struct net_iov_area { ··· 127 48 unsigned long base_virtual; 128 49 }; 129 50 130 - /* These fields in struct page are used by the page_pool and net stack: 51 + /* net_iov is union'ed with struct netmem_desc mirroring struct page, so 52 + * the page_pool can access these fields without worrying whether the 53 + * underlying fields are accessed via netmem_desc or directly via 54 + * net_iov, until all the references to them are converted so as to be 55 + * accessed via netmem_desc e.g. niov->desc.pp instead of niov->pp. 131 56 * 132 - * struct { 133 - * unsigned long pp_magic; 134 - * struct page_pool *pp; 135 - * unsigned long _pp_mapping_pad; 136 - * unsigned long dma_addr; 137 - * atomic_long_t pp_ref_count; 138 - * }; 139 - * 140 - * We mirror the page_pool fields here so the page_pool can access these fields 141 - * without worrying whether the underlying fields belong to a page or net_iov. 142 - * 143 - * The non-net stack fields of struct page are private to the mm stack and must 144 - * never be mirrored to net_iov. 57 + * The non-net stack fields of struct page are private to the mm stack 58 + * and must never be mirrored to net_iov. 145 59 */ 146 - #define NET_IOV_ASSERT_OFFSET(pg, iov) \ 147 - static_assert(offsetof(struct page, pg) == \ 60 + #define NET_IOV_ASSERT_OFFSET(desc, iov) \ 61 + static_assert(offsetof(struct netmem_desc, desc) == \ 148 62 offsetof(struct net_iov, iov)) 63 + NET_IOV_ASSERT_OFFSET(_flags, _flags); 149 64 NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); 150 65 NET_IOV_ASSERT_OFFSET(pp, pp); 66 + NET_IOV_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); 151 67 NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); 152 68 NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); 153 69 #undef NET_IOV_ASSERT_OFFSET ··· 247 173 return page_to_pfn(netmem_to_page(netmem)); 248 174 } 249 175 176 + /** 177 + * __netmem_to_nmdesc - unsafely get pointer to the &netmem_desc backing 178 + * @netmem 179 + * @netmem: netmem reference to convert 180 + * 181 + * Unsafe version that can be used only when @netmem is always backed by 182 + * system memory, performs faster and generates smaller object code (no 183 + * check for the LSB, no WARN). When @netmem points to IOV, provokes 184 + * undefined behaviour. 185 + * 186 + * Return: pointer to the &netmem_desc (garbage if @netmem is not backed 187 + * by system memory). 188 + */ 189 + static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem) 190 + { 191 + return (__force struct netmem_desc *)netmem; 192 + } 193 + 250 194 /* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to 251 195 * common fields. 252 196 * @netmem: netmem reference to extract as net_iov. ··· 285 193 return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); 286 194 } 287 195 196 + /* XXX: How to extract netmem_desc from page must be changed, once 197 + * netmem_desc no longer overlays on page and will be allocated through 198 + * slab. 199 + */ 200 + #define __pp_page_to_nmdesc(p) (_Generic((p), \ 201 + const struct page * : (const struct netmem_desc *)(p), \ 202 + struct page * : (struct netmem_desc *)(p))) 203 + 204 + /* CAUTION: Check if the page is a pp page before calling this helper or 205 + * know it's a pp page. 206 + */ 207 + #define pp_page_to_nmdesc(p) \ 208 + ({ \ 209 + DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ 210 + __pp_page_to_nmdesc(p); \ 211 + }) 212 + 288 213 /** 289 214 * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem 290 215 * @netmem: netmem reference to get the pointer from ··· 315 206 */ 316 207 static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) 317 208 { 318 - return __netmem_to_page(netmem)->pp; 209 + return __netmem_to_nmdesc(netmem)->pp; 319 210 } 320 211 321 212 static inline struct page_pool *netmem_get_pp(netmem_ref netmem)