Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'xsk-fix-bugs-around-xsk-skb-allocation'

Jason Xing says:

====================
xsk: fix bugs around xsk skb allocation

There are rare issues around xsk_build_skb(). Some of them
were founded by Sashiko[1][2].

[1]: https://lore.kernel.org/all/20260415082654.21026-1-kerneljasonxing@gmail.com/
[2]: https://lore.kernel.org/all/20260418045644.28612-1-kerneljasonxing@gmail.com/
====================

Link: https://patch.msgid.link/20260502200722.53960-1-kerneljasonxing@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+77 -41
+74 -41
net/xdp/xsk.c
··· 646 646 return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL); 647 647 } 648 648 649 - static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) 649 + static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr) 650 650 { 651 - skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); 651 + struct xsk_addrs *xsk_addr; 652 + 653 + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); 654 + if (unlikely(!xsk_addr)) 655 + return NULL; 656 + 657 + xsk_addr->addrs[0] = addr; 658 + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; 659 + return xsk_addr; 660 + } 661 + 662 + static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb) 663 + { 664 + struct xsk_addrs *xsk_addr; 665 + 666 + if (!xsk_skb_destructor_is_addr(skb)) 667 + return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; 668 + 669 + xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb)); 670 + if (likely(xsk_addr)) 671 + xsk_addr->num_descs = 1; 672 + return xsk_addr; 673 + } 674 + 675 + static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) 676 + { 677 + if (IS_ENABLED(CONFIG_64BIT)) { 678 + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); 679 + return 0; 680 + } 681 + 682 + if (unlikely(!__xsk_addrs_alloc(skb, addr))) 683 + return -ENOMEM; 684 + return 0; 652 685 } 653 686 654 687 static void xsk_inc_num_desc(struct sk_buff *skb) ··· 718 685 spin_lock_irqsave(&pool->cq_prod_lock, flags); 719 686 idx = xskq_get_prod(pool->cq); 720 687 721 - if (unlikely(num_descs > 1)) { 688 + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { 722 689 xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; 723 690 724 691 for (i = 0; i < num_descs; i++) { ··· 757 724 sock_wfree(skb); 758 725 } 759 726 760 - static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, 761 - u64 addr) 727 + static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, 728 + u64 addr) 762 729 { 730 + int err; 731 + 732 + err = xsk_skb_destructor_set_addr(skb, addr); 733 + if (unlikely(err)) 734 + return err; 735 + 763 736 skb->dev = xs->dev; 764 737 skb->priority = READ_ONCE(xs->sk.sk_priority); 765 738 skb->mark = READ_ONCE(xs->sk.sk_mark); 766 739 skb->destructor = xsk_destruct_skb; 767 - xsk_skb_destructor_set_addr(skb, addr); 740 + return 0; 768 741 } 769 742 770 743 static void xsk_consume_skb(struct sk_buff *skb) ··· 779 740 u32 num_descs = xsk_get_num_desc(skb); 780 741 struct xsk_addrs *xsk_addr; 781 742 782 - if (unlikely(num_descs > 1)) { 743 + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { 783 744 xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; 784 745 kmem_cache_free(xsk_tx_generic_cache, xsk_addr); 785 746 } ··· 858 819 return ERR_PTR(err); 859 820 860 821 skb_reserve(skb, hr); 861 - 862 - xsk_skb_init_misc(skb, xs, desc->addr); 863 822 if (desc->options & XDP_TX_METADATA) { 864 823 err = xsk_skb_metadata(skb, buffer, desc, pool, hr); 865 - if (unlikely(err)) 824 + if (unlikely(err)) { 825 + kfree_skb(skb); 866 826 return ERR_PTR(err); 827 + } 867 828 } 868 829 } else { 869 830 struct xsk_addrs *xsk_addr; 870 831 871 - if (xsk_skb_destructor_is_addr(skb)) { 872 - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, 873 - GFP_KERNEL); 874 - if (!xsk_addr) 875 - return ERR_PTR(-ENOMEM); 876 - 877 - xsk_addr->num_descs = 1; 878 - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); 879 - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; 880 - } else { 881 - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; 882 - } 832 + xsk_addr = xsk_addrs_alloc(skb); 833 + if (!xsk_addr) 834 + return ERR_PTR(-ENOMEM); 883 835 884 836 /* in case of -EOVERFLOW that could happen below, 885 837 * xsk_consume_skb() will release this node as whole skb ··· 886 856 addr = buffer - pool->addrs; 887 857 888 858 for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { 889 - if (unlikely(i >= MAX_SKB_FRAGS)) 859 + if (unlikely(i >= MAX_SKB_FRAGS)) { 860 + if (!xs->skb) 861 + kfree_skb(skb); 890 862 return ERR_PTR(-EOVERFLOW); 863 + } 891 864 892 865 page = pool->umem->pgs[addr >> PAGE_SHIFT]; 893 866 get_page(page); ··· 947 914 if (unlikely(err)) 948 915 goto free_err; 949 916 950 - xsk_skb_init_misc(skb, xs, desc->addr); 951 917 if (desc->options & XDP_TX_METADATA) { 952 918 err = xsk_skb_metadata(skb, buffer, desc, 953 919 xs->pool, hr); ··· 959 927 struct page *page; 960 928 u8 *vaddr; 961 929 962 - if (xsk_skb_destructor_is_addr(skb)) { 963 - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, 964 - GFP_KERNEL); 965 - if (!xsk_addr) { 966 - err = -ENOMEM; 967 - goto free_err; 968 - } 969 - 970 - xsk_addr->num_descs = 1; 971 - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); 972 - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; 973 - } else { 974 - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; 930 + xsk_addr = xsk_addrs_alloc(skb); 931 + if (!xsk_addr) { 932 + err = -ENOMEM; 933 + goto free_err; 975 934 } 976 935 977 936 if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { ··· 987 964 } 988 965 } 989 966 967 + if (!xs->skb) { 968 + err = xsk_skb_init_misc(skb, xs, desc->addr); 969 + if (unlikely(err)) 970 + goto free_err; 971 + } 990 972 xsk_inc_num_desc(skb); 991 973 992 974 return skb; 993 975 994 976 free_err: 995 - if (skb && !skb_shinfo(skb)->nr_frags) 977 + if (skb && !xs->skb) 996 978 kfree_skb(skb); 997 979 998 980 if (err == -EOVERFLOW) { 999 - /* Drop the packet */ 1000 - xsk_inc_num_desc(xs->skb); 1001 - xsk_drop_skb(xs->skb); 981 + if (xs->skb) { 982 + /* Drop the packet */ 983 + xsk_inc_num_desc(xs->skb); 984 + xsk_drop_skb(xs->skb); 985 + } else { 986 + xsk_cq_cancel_locked(xs->pool, 1); 987 + xs->tx->invalid_descs++; 988 + } 1002 989 xskq_cons_release(xs->tx); 1003 990 } else { 1004 991 /* Let application retry */
+3
net/xdp/xsk_buff_pool.c
··· 175 175 if (force_zc && force_copy) 176 176 return -EINVAL; 177 177 178 + if (pool->tx_sw_csum && (netdev->priv_flags & IFF_TX_SKB_NO_LINEAR)) 179 + return -EOPNOTSUPP; 180 + 178 181 if (xsk_get_pool_from_qid(netdev, queue_id)) 179 182 return -EBUSY; 180 183