Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-xdp-handle-frags-with-unreadable-memory'

Jakub Kicinski says:

====================
net: xdp: handle frags with unreadable memory

Make XDP helpers compatible with unreadable memory. This is very
similar to how we handle pfmemalloc frags today. Record the info
in xdp_buf flags as frags get added and then update the skb once
allocated.

This series adds the unreadable memory metadata tracking to drivers
using xdp_build_skb_from*() with no changes on the driver side - hence
the only driver changes here are refactoring. Obviously, unreadable memory
is incompatible with XDP today, but thanks to xdp_build_skb_from_buf()
increasing number of drivers have a unified datapath, whether XDP is
enabled or not.

RFC: https://lore.kernel.org/20250812161528.835855-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250905221539.2930285-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+69 -64
+3 -4
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
··· 468 468 if (!skb) 469 469 return NULL; 470 470 471 - xdp_update_skb_shared_info(skb, num_frags, 472 - sinfo->xdp_frags_size, 473 - BNXT_RX_PAGE_SIZE * num_frags, 474 - xdp_buff_is_frag_pfmemalloc(xdp)); 471 + xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size, 472 + BNXT_RX_PAGE_SIZE * num_frags, 473 + xdp_buff_get_skb_flags(xdp)); 475 474 return skb; 476 475 }
+7 -8
drivers/net/ethernet/intel/i40e/i40e_txrx.c
··· 2151 2151 memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], 2152 2152 sizeof(skb_frag_t) * nr_frags); 2153 2153 2154 - xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags, 2155 - sinfo->xdp_frags_size, 2156 - nr_frags * xdp->frame_sz, 2157 - xdp_buff_is_frag_pfmemalloc(xdp)); 2154 + xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags, 2155 + sinfo->xdp_frags_size, 2156 + nr_frags * xdp->frame_sz, 2157 + xdp_buff_get_skb_flags(xdp)); 2158 2158 2159 2159 /* First buffer has already been processed, so bump ntc */ 2160 2160 if (++rx_ring->next_to_clean == rx_ring->count) ··· 2206 2206 skb_metadata_set(skb, metasize); 2207 2207 2208 2208 if (unlikely(xdp_buff_has_frags(xdp))) { 2209 - xdp_update_skb_shared_info(skb, nr_frags, 2210 - sinfo->xdp_frags_size, 2211 - nr_frags * xdp->frame_sz, 2212 - xdp_buff_is_frag_pfmemalloc(xdp)); 2209 + xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 2210 + nr_frags * xdp->frame_sz, 2211 + xdp_buff_get_skb_flags(xdp)); 2213 2212 2214 2213 i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp); 2215 2214 } else {
+7 -8
drivers/net/ethernet/intel/ice/ice_txrx.c
··· 1035 1035 skb_metadata_set(skb, metasize); 1036 1036 1037 1037 if (unlikely(xdp_buff_has_frags(xdp))) 1038 - xdp_update_skb_shared_info(skb, nr_frags, 1039 - sinfo->xdp_frags_size, 1040 - nr_frags * xdp->frame_sz, 1041 - xdp_buff_is_frag_pfmemalloc(xdp)); 1038 + xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 1039 + nr_frags * xdp->frame_sz, 1040 + xdp_buff_get_skb_flags(xdp)); 1042 1041 1043 1042 return skb; 1044 1043 } ··· 1114 1115 memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], 1115 1116 sizeof(skb_frag_t) * nr_frags); 1116 1117 1117 - xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags, 1118 - sinfo->xdp_frags_size, 1119 - nr_frags * xdp->frame_sz, 1120 - xdp_buff_is_frag_pfmemalloc(xdp)); 1118 + xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags, 1119 + sinfo->xdp_frags_size, 1120 + nr_frags * xdp->frame_sz, 1121 + xdp_buff_get_skb_flags(xdp)); 1121 1122 } 1122 1123 1123 1124 return skb;
+3 -4
drivers/net/ethernet/marvell/mvneta.c
··· 2416 2416 skb->ip_summed = mvneta_rx_csum(pp, desc_status); 2417 2417 2418 2418 if (unlikely(xdp_buff_has_frags(xdp))) 2419 - xdp_update_skb_shared_info(skb, num_frags, 2420 - sinfo->xdp_frags_size, 2421 - num_frags * xdp->frame_sz, 2422 - xdp_buff_is_frag_pfmemalloc(xdp)); 2419 + xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size, 2420 + num_frags * xdp->frame_sz, 2421 + xdp_buff_get_skb_flags(xdp)); 2423 2422 2424 2423 return skb; 2425 2424 }
+11 -12
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
··· 1796 1796 1797 1797 if (xdp_buff_has_frags(&mxbuf->xdp)) { 1798 1798 /* sinfo->nr_frags is reset by build_skb, calculate again. */ 1799 - xdp_update_skb_shared_info(skb, wi - head_wi - 1, 1800 - sinfo->xdp_frags_size, truesize, 1801 - xdp_buff_is_frag_pfmemalloc( 1802 - &mxbuf->xdp)); 1799 + xdp_update_skb_frags_info(skb, wi - head_wi - 1, 1800 + sinfo->xdp_frags_size, truesize, 1801 + xdp_buff_get_skb_flags(&mxbuf->xdp)); 1803 1802 1804 1803 for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) 1805 1804 pwi->frag_page->frags++; ··· 2104 2105 struct mlx5e_frag_page *pagep; 2105 2106 2106 2107 /* sinfo->nr_frags is reset by build_skb, calculate again. */ 2107 - xdp_update_skb_shared_info(skb, frag_page - head_page, 2108 - sinfo->xdp_frags_size, truesize, 2109 - xdp_buff_is_frag_pfmemalloc( 2110 - &mxbuf->xdp)); 2108 + xdp_update_skb_frags_info(skb, frag_page - head_page, 2109 + sinfo->xdp_frags_size, 2110 + truesize, 2111 + xdp_buff_get_skb_flags(&mxbuf->xdp)); 2111 2112 2112 2113 pagep = head_page; 2113 2114 do ··· 2121 2122 if (xdp_buff_has_frags(&mxbuf->xdp)) { 2122 2123 struct mlx5e_frag_page *pagep; 2123 2124 2124 - xdp_update_skb_shared_info(skb, sinfo->nr_frags, 2125 - sinfo->xdp_frags_size, truesize, 2126 - xdp_buff_is_frag_pfmemalloc( 2127 - &mxbuf->xdp)); 2125 + xdp_update_skb_frags_info(skb, sinfo->nr_frags, 2126 + sinfo->xdp_frags_size, 2127 + truesize, 2128 + xdp_buff_get_skb_flags(&mxbuf->xdp)); 2128 2129 2129 2130 pagep = frag_page - sinfo->nr_frags; 2130 2131 do
+3 -4
drivers/net/virtio_net.c
··· 2185 2185 skb_metadata_set(skb, metasize); 2186 2186 2187 2187 if (unlikely(xdp_buff_has_frags(xdp))) 2188 - xdp_update_skb_shared_info(skb, nr_frags, 2189 - sinfo->xdp_frags_size, 2190 - xdp_frags_truesz, 2191 - xdp_buff_is_frag_pfmemalloc(xdp)); 2188 + xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 2189 + xdp_frags_truesz, 2190 + xdp_buff_get_skb_flags(xdp)); 2192 2191 2193 2192 return skb; 2194 2193 }
+25 -13
include/net/xdp.h
··· 76 76 XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under 77 77 * pressure 78 78 */ 79 + /* frags have unreadable mem, this can't be true for real XDP packets, 80 + * but drivers may use XDP helpers to construct Rx pkt state even when 81 + * XDP program is not attached. 82 + */ 83 + XDP_FLAGS_FRAGS_UNREADABLE = BIT(2), 79 84 }; 80 85 81 86 struct xdp_buff { ··· 121 116 xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; 122 117 } 123 118 124 - static __always_inline bool 125 - xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp) 126 - { 127 - return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); 128 - } 129 - 130 119 static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) 131 120 { 132 121 xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; 122 + } 123 + 124 + static __always_inline void xdp_buff_set_frag_unreadable(struct xdp_buff *xdp) 125 + { 126 + xdp->flags |= XDP_FLAGS_FRAGS_UNREADABLE; 127 + } 128 + 129 + static __always_inline u32 xdp_buff_get_skb_flags(const struct xdp_buff *xdp) 130 + { 131 + return xdp->flags; 133 132 } 134 133 135 134 static __always_inline void ··· 280 271 281 272 if (unlikely(netmem_is_pfmemalloc(netmem))) 282 273 xdp_buff_set_frag_pfmemalloc(xdp); 274 + if (unlikely(netmem_is_net_iov(netmem))) 275 + xdp_buff_set_frag_unreadable(xdp); 283 276 284 277 return true; 285 278 } ··· 305 294 return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); 306 295 } 307 296 308 - static __always_inline bool 309 - xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) 297 + static __always_inline u32 298 + xdp_frame_get_skb_flags(const struct xdp_frame *frame) 310 299 { 311 - return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); 300 + return frame->flags; 312 301 } 313 302 314 303 #define XDP_BULK_QUEUE_SIZE 16 ··· 345 334 } 346 335 347 336 static inline void 348 - xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, 349 - unsigned int size, unsigned int truesize, 350 - bool pfmemalloc) 337 + xdp_update_skb_frags_info(struct sk_buff *skb, u8 nr_frags, 338 + unsigned int size, unsigned int truesize, 339 + u32 xdp_flags) 351 340 { 352 341 struct skb_shared_info *sinfo = skb_shinfo(skb); 353 342 ··· 361 350 skb->len += size; 362 351 skb->data_len += size; 363 352 skb->truesize += truesize; 364 - skb->pfmemalloc |= pfmemalloc; 353 + skb->pfmemalloc |= !!(xdp_flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); 354 + skb->unreadable |= !!(xdp_flags & XDP_FLAGS_FRAGS_UNREADABLE); 365 355 } 366 356 367 357 /* Avoids inlining WARN macro in fast-path */
+10 -11
net/core/xdp.c
··· 663 663 u32 tsize; 664 664 665 665 tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz; 666 - xdp_update_skb_shared_info(skb, nr_frags, 667 - sinfo->xdp_frags_size, tsize, 668 - xdp_buff_is_frag_pfmemalloc(xdp)); 666 + xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 667 + tsize, xdp_buff_get_skb_flags(xdp)); 669 668 } 670 669 671 670 skb->protocol = eth_type_trans(skb, rxq->dev); ··· 691 692 struct skb_shared_info *sinfo = skb_shinfo(skb); 692 693 const struct skb_shared_info *xinfo; 693 694 u32 nr_frags, tsize = 0; 694 - bool pfmemalloc = false; 695 + u32 flags = 0; 695 696 696 697 xinfo = xdp_get_shared_info_from_buff(xdp); 697 698 nr_frags = xinfo->nr_frags; ··· 713 714 __skb_fill_page_desc_noacc(sinfo, i, page, offset, len); 714 715 715 716 tsize += truesize; 716 - pfmemalloc |= page_is_pfmemalloc(page); 717 + if (page_is_pfmemalloc(page)) 718 + flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; 717 719 } 718 720 719 - xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size, 720 - tsize, pfmemalloc); 721 + xdp_update_skb_frags_info(skb, nr_frags, xinfo->xdp_frags_size, tsize, 722 + flags); 721 723 722 724 return true; 723 725 } ··· 823 823 skb_metadata_set(skb, xdpf->metasize); 824 824 825 825 if (unlikely(xdp_frame_has_frags(xdpf))) 826 - xdp_update_skb_shared_info(skb, nr_frags, 827 - sinfo->xdp_frags_size, 828 - nr_frags * xdpf->frame_sz, 829 - xdp_frame_is_frag_pfmemalloc(xdpf)); 826 + xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, 827 + nr_frags * xdpf->frame_sz, 828 + xdp_frame_get_skb_flags(xdpf)); 830 829 831 830 /* Essential SKB info: protocol and skb->dev */ 832 831 skb->protocol = eth_type_trans(skb, dev);