Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'xdp-a-fistful-of-generic-changes-pt-iii'

Alexander Lobakin says:

====================
xdp: a fistful of generic changes pt. III

XDP for idpf is currently 5.(6) chapters:
* convert Rx to libeth;
* convert Tx and stats to libeth;
* generic XDP and XSk code changes;
* generic XDP and XSk code additions pt. 1;
* generic XDP and XSk code additions pt. 2 (you are here);
* actual XDP for idpf via new libeth_xdp;
* XSk for idpf (via ^).

Part III.3 does the following:
* adds generic functions to build skbs from xdp_buffs (regular and
XSk) and attach frags to xdp_buffs (regular and XSk);
* adds helper to optimize XSk xmit in drivers.

Everything is prereq for libeth_xdp, but will be useful standalone
as well: less code in drivers, faster XSk XDP_PASS, smaller object
code.
====================

Link: https://patch.msgid.link/20241218174435.1445282-1-aleksander.lobakin@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+316 -65
+3 -27
drivers/net/ethernet/intel/i40e/i40e_xsk.c
··· 395 395 WARN_ON_ONCE(1); 396 396 } 397 397 398 - static int 399 - i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first, 400 - struct xdp_buff *xdp, const unsigned int size) 401 - { 402 - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first); 403 - 404 - if (!xdp_buff_has_frags(first)) { 405 - sinfo->nr_frags = 0; 406 - sinfo->xdp_frags_size = 0; 407 - xdp_buff_set_frags_flag(first); 408 - } 409 - 410 - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { 411 - xsk_buff_free(first); 412 - return -ENOMEM; 413 - } 414 - 415 - __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, 416 - virt_to_page(xdp->data_hard_start), 417 - XDP_PACKET_HEADROOM, size); 418 - sinfo->xdp_frags_size += size; 419 - xsk_buff_add_frag(xdp); 420 - 421 - return 0; 422 - } 423 - 424 398 /** 425 399 * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring 426 400 * @rx_ring: Rx ring ··· 460 486 461 487 if (!first) 462 488 first = bi; 463 - else if (i40e_add_xsk_frag(rx_ring, first, bi, size)) 489 + else if (!xsk_buff_add_frag(first, bi)) { 490 + xsk_buff_free(first); 464 491 break; 492 + } 465 493 466 494 if (++next_to_process == count) 467 495 next_to_process = 0;
+2 -30
drivers/net/ethernet/intel/ice/ice_xsk.c
··· 801 801 return result; 802 802 } 803 803 804 - static int 805 - ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, 806 - struct xdp_buff *xdp, const unsigned int size) 807 - { 808 - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first); 809 - 810 - if (!size) 811 - return 0; 812 - 813 - if (!xdp_buff_has_frags(first)) { 814 - sinfo->nr_frags = 0; 815 - sinfo->xdp_frags_size = 0; 816 - xdp_buff_set_frags_flag(first); 817 - } 818 - 819 - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { 820 - xsk_buff_free(first); 821 - return -ENOMEM; 822 - } 823 - 824 - __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, 825 - virt_to_page(xdp->data_hard_start), 826 - XDP_PACKET_HEADROOM, size); 827 - sinfo->xdp_frags_size += size; 828 - xsk_buff_add_frag(xdp); 829 - 830 - return 0; 831 - } 832 - 833 804 /** 834 805 * ice_clean_rx_irq_zc - consumes packets from the hardware ring 835 806 * @rx_ring: AF_XDP Rx ring ··· 866 895 867 896 if (!first) { 868 897 first = xdp; 869 - } else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) { 898 + } else if (likely(size) && !xsk_buff_add_frag(first, xdp)) { 899 + xsk_buff_free(first); 870 900 break; 871 901 } 872 902
+12 -4
include/linux/skbuff.h
··· 608 608 * Warning : all fields before dataref are cleared in __alloc_skb() 609 609 */ 610 610 atomic_t dataref; 611 - unsigned int xdp_frags_size; 612 611 613 - /* Intermediate layers must ensure that destructor_arg 614 - * remains valid until skb destructor */ 615 - void * destructor_arg; 612 + union { 613 + struct { 614 + u32 xdp_frags_size; 615 + u32 xdp_frags_truesize; 616 + }; 617 + 618 + /* 619 + * Intermediate layers must ensure that destructor_arg 620 + * remains valid until skb destructor. 621 + */ 622 + void *destructor_arg; 623 + }; 616 624 617 625 /* must be last field, see pskb_expand_head() */ 618 626 skb_frag_t frags[MAX_SKB_FRAGS];
+9
include/net/page_pool/helpers.h
··· 144 144 return netmem; 145 145 } 146 146 147 + static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool, 148 + unsigned int *offset, 149 + unsigned int *size) 150 + { 151 + gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 152 + 153 + return page_pool_alloc_netmem(pool, offset, size, gfp); 154 + } 155 + 147 156 static inline struct page *page_pool_alloc(struct page_pool *pool, 148 157 unsigned int *offset, 149 158 unsigned int *size, gfp_t gfp)
+97 -1
include/net/xdp.h
··· 167 167 return len; 168 168 } 169 169 170 + void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp); 171 + 172 + /** 173 + * __xdp_buff_add_frag - attach frag to &xdp_buff 174 + * @xdp: XDP buffer to attach the frag to 175 + * @netmem: network memory containing the frag 176 + * @offset: offset at which the frag starts 177 + * @size: size of the frag 178 + * @truesize: total memory size occupied by the frag 179 + * @try_coalesce: whether to try coalescing the frags (not valid for XSk) 180 + * 181 + * Attach frag to the XDP buffer. If it currently has no frags attached, 182 + * initialize the related fields, otherwise check that the frag number 183 + * didn't reach the limit of ``MAX_SKB_FRAGS``. If possible, try coalescing 184 + * the frag with the previous one. 185 + * The function doesn't check/update the pfmemalloc bit. Please use the 186 + * non-underscored wrapper in drivers. 187 + * 188 + * Return: true on success, false if there's no space for the frag in 189 + * the shared info struct. 190 + */ 191 + static inline bool __xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem, 192 + u32 offset, u32 size, u32 truesize, 193 + bool try_coalesce) 194 + { 195 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 196 + skb_frag_t *prev; 197 + u32 nr_frags; 198 + 199 + if (!xdp_buff_has_frags(xdp)) { 200 + xdp_buff_set_frags_flag(xdp); 201 + 202 + nr_frags = 0; 203 + sinfo->xdp_frags_size = 0; 204 + sinfo->xdp_frags_truesize = 0; 205 + 206 + goto fill; 207 + } 208 + 209 + nr_frags = sinfo->nr_frags; 210 + prev = &sinfo->frags[nr_frags - 1]; 211 + 212 + if (try_coalesce && netmem == skb_frag_netmem(prev) && 213 + offset == skb_frag_off(prev) + skb_frag_size(prev)) { 214 + skb_frag_size_add(prev, size); 215 + /* Guaranteed to only decrement the refcount */ 216 + xdp_return_frag(netmem, xdp); 217 + } else if (unlikely(nr_frags == MAX_SKB_FRAGS)) { 218 + return false; 219 + } else { 220 + fill: 221 + __skb_fill_netmem_desc_noacc(sinfo, nr_frags++, netmem, 222 + offset, size); 223 + } 224 + 225 + sinfo->nr_frags = nr_frags; 226 + sinfo->xdp_frags_size += size; 227 + sinfo->xdp_frags_truesize += truesize; 228 + 229 + return true; 230 + } 231 + 232 + /** 233 + * xdp_buff_add_frag - attach frag to &xdp_buff 234 + * @xdp: XDP buffer to attach the frag to 235 + * @netmem: network memory containing the frag 236 + * @offset: offset at which the frag starts 237 + * @size: size of the frag 238 + * @truesize: total memory size occupied by the frag 239 + * 240 + * Version of __xdp_buff_add_frag() which takes care of the pfmemalloc bit. 241 + * 242 + * Return: true on success, false if there's no space for the frag in 243 + * the shared info struct. 244 + */ 245 + static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem, 246 + u32 offset, u32 size, u32 truesize) 247 + { 248 + if (!__xdp_buff_add_frag(xdp, netmem, offset, size, truesize, true)) 249 + return false; 250 + 251 + if (unlikely(netmem_is_pfmemalloc(netmem))) 252 + xdp_buff_set_frag_pfmemalloc(xdp); 253 + 254 + return true; 255 + } 256 + 170 257 struct xdp_frame { 171 258 void *data; 172 259 u32 len; ··· 317 230 unsigned int size, unsigned int truesize, 318 231 bool pfmemalloc) 319 232 { 320 - skb_shinfo(skb)->nr_frags = nr_frags; 233 + struct skb_shared_info *sinfo = skb_shinfo(skb); 234 + 235 + sinfo->nr_frags = nr_frags; 236 + /* 237 + * ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``, 238 + * reset it after that these fields aren't used anymore. 239 + */ 240 + sinfo->destructor_arg = NULL; 321 241 322 242 skb->len += size; 323 243 skb->data_len += size; ··· 336 242 void xdp_warn(const char *msg, const char *func, const int line); 337 243 #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) 338 244 245 + struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp); 246 + struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp); 339 247 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); 340 248 struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, 341 249 struct sk_buff *skb,
+15 -3
include/net/xdp_sock_drv.h
··· 136 136 xp_free(xskb); 137 137 } 138 138 139 - static inline void xsk_buff_add_frag(struct xdp_buff *xdp) 139 + static inline bool xsk_buff_add_frag(struct xdp_buff *head, 140 + struct xdp_buff *xdp) 140 141 { 141 - struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); 142 + const void *data = xdp->data; 143 + struct xdp_buff_xsk *frag; 142 144 145 + if (!__xdp_buff_add_frag(head, virt_to_netmem(data), 146 + offset_in_page(data), xdp->data_end - data, 147 + xdp->frame_sz, false)) 148 + return false; 149 + 150 + frag = container_of(xdp, struct xdp_buff_xsk, xdp); 143 151 list_add_tail(&frag->list_node, &frag->pool->xskb_list); 152 + 153 + return true; 144 154 } 145 155 146 156 static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) ··· 367 357 { 368 358 } 369 359 370 - static inline void xsk_buff_add_frag(struct xdp_buff *xdp) 360 + static inline bool xsk_buff_add_frag(struct xdp_buff *head, 361 + struct xdp_buff *xdp) 371 362 { 363 + return false; 372 364 } 373 365 374 366 static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
+178
net/core/xdp.c
··· 535 535 } 536 536 EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); 537 537 538 + /** 539 + * xdp_return_frag -- free one XDP frag or decrement its refcount 540 + * @netmem: network memory reference to release 541 + * @xdp: &xdp_buff to release the frag for 542 + */ 543 + void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp) 544 + { 545 + __xdp_return(netmem, xdp->rxq->mem.type, true, NULL); 546 + } 547 + EXPORT_SYMBOL_GPL(xdp_return_frag); 548 + 538 549 void xdp_return_buff(struct xdp_buff *xdp) 539 550 { 540 551 struct skb_shared_info *sinfo; ··· 628 617 return 0; 629 618 } 630 619 EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk); 620 + 621 + /** 622 + * xdp_build_skb_from_buff - create an skb from &xdp_buff 623 + * @xdp: &xdp_buff to convert to an skb 624 + * 625 + * Perform common operations to create a new skb to pass up the stack from 626 + * &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize 627 + * skb data pointers and offsets, set the recycle bit if the buff is 628 + * PP-backed, Rx queue index, protocol and update frags info. 629 + * 630 + * Return: new &sk_buff on success, %NULL on error. 631 + */ 632 + struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp) 633 + { 634 + const struct xdp_rxq_info *rxq = xdp->rxq; 635 + const struct skb_shared_info *sinfo; 636 + struct sk_buff *skb; 637 + u32 nr_frags = 0; 638 + int metalen; 639 + 640 + if (unlikely(xdp_buff_has_frags(xdp))) { 641 + sinfo = xdp_get_shared_info_from_buff(xdp); 642 + nr_frags = sinfo->nr_frags; 643 + } 644 + 645 + skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); 646 + if (unlikely(!skb)) 647 + return NULL; 648 + 649 + skb_reserve(skb, xdp->data - xdp->data_hard_start); 650 + __skb_put(skb, xdp->data_end - xdp->data); 651 + 652 + metalen = xdp->data - xdp->data_meta; 653 + if (metalen > 0) 654 + skb_metadata_set(skb, metalen); 655 + 656 + if (rxq->mem.type == MEM_TYPE_PAGE_POOL) 657 + skb_mark_for_recycle(skb); 658 + 659 + skb_record_rx_queue(skb, rxq->queue_index); 660 + 661 + if (unlikely(nr_frags)) { 662 + u32 tsize; 663 + 664 + tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz; 665 + xdp_update_skb_shared_info(skb, nr_frags, 666 + sinfo->xdp_frags_size, tsize, 667 + xdp_buff_is_frag_pfmemalloc(xdp)); 668 + } 669 + 670 + skb->protocol = eth_type_trans(skb, rxq->dev); 671 + 672 + return skb; 673 + } 674 + EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff); 675 + 676 + /** 677 + * xdp_copy_frags_from_zc - copy frags from XSk buff to skb 678 + * @skb: skb to copy frags to 679 + * @xdp: XSk &xdp_buff from which the frags will be copied 680 + * @pp: &page_pool backing page allocation, if available 681 + * 682 + * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack. 683 + * Allocate a new buffer for each frag, copy it and attach to the skb. 684 + * 685 + * Return: true on success, false on netmem allocation fail. 686 + */ 687 + static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, 688 + const struct xdp_buff *xdp, 689 + struct page_pool *pp) 690 + { 691 + struct skb_shared_info *sinfo = skb_shinfo(skb); 692 + const struct skb_shared_info *xinfo; 693 + u32 nr_frags, tsize = 0; 694 + bool pfmemalloc = false; 695 + 696 + xinfo = xdp_get_shared_info_from_buff(xdp); 697 + nr_frags = xinfo->nr_frags; 698 + 699 + for (u32 i = 0; i < nr_frags; i++) { 700 + u32 len = skb_frag_size(&xinfo->frags[i]); 701 + u32 offset, truesize = len; 702 + netmem_ref netmem; 703 + 704 + netmem = page_pool_dev_alloc_netmem(pp, &offset, &truesize); 705 + if (unlikely(!netmem)) { 706 + sinfo->nr_frags = i; 707 + return false; 708 + } 709 + 710 + memcpy(__netmem_address(netmem), 711 + __netmem_address(xinfo->frags[i].netmem), 712 + LARGEST_ALIGN(len)); 713 + __skb_fill_netmem_desc_noacc(sinfo, i, netmem, offset, len); 714 + 715 + tsize += truesize; 716 + pfmemalloc |= netmem_is_pfmemalloc(netmem); 717 + } 718 + 719 + xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size, 720 + tsize, pfmemalloc); 721 + 722 + return true; 723 + } 724 + 725 + /** 726 + * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff 727 + * @xdp: source XSk buff 728 + * 729 + * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb 730 + * head, new buffer for the head, copy the data and initialize the skb fields. 731 + * If there are frags, allocate new buffers for them and copy. 732 + * Buffers are allocated from the system percpu pools to try recycling them. 733 + * If new skb was built successfully, @xdp is returned to XSk pool's freelist. 734 + * On error, it remains untouched and the caller must take care of this. 735 + * 736 + * Return: new &sk_buff on success, %NULL on error. 737 + */ 738 + struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp) 739 + { 740 + struct page_pool *pp = this_cpu_read(system_page_pool); 741 + const struct xdp_rxq_info *rxq = xdp->rxq; 742 + u32 len = xdp->data_end - xdp->data_meta; 743 + u32 truesize = xdp->frame_sz; 744 + struct sk_buff *skb; 745 + int metalen; 746 + void *data; 747 + 748 + if (!IS_ENABLED(CONFIG_PAGE_POOL)) 749 + return NULL; 750 + 751 + data = page_pool_dev_alloc_va(pp, &truesize); 752 + if (unlikely(!data)) 753 + return NULL; 754 + 755 + skb = napi_build_skb(data, truesize); 756 + if (unlikely(!skb)) { 757 + page_pool_free_va(pp, data, true); 758 + return NULL; 759 + } 760 + 761 + skb_mark_for_recycle(skb); 762 + skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 763 + 764 + memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len)); 765 + 766 + metalen = xdp->data - xdp->data_meta; 767 + if (metalen > 0) { 768 + skb_metadata_set(skb, metalen); 769 + __skb_pull(skb, metalen); 770 + } 771 + 772 + skb_record_rx_queue(skb, rxq->queue_index); 773 + 774 + if (unlikely(xdp_buff_has_frags(xdp)) && 775 + unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) { 776 + napi_consume_skb(skb, true); 777 + return NULL; 778 + } 779 + 780 + xsk_buff_free(xdp); 781 + 782 + skb->protocol = eth_type_trans(skb, rxq->dev); 783 + 784 + return skb; 785 + } 786 + EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc); 631 787 632 788 struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, 633 789 struct sk_buff *skb,