Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Revert "net: mirror skb frag ref/unref helpers"

This reverts commit a580ea994fd37f4105028f5a85c38ff6508a2b25.

This revert is to resolve Dragos's report of page_pool leak here:
https://lore.kernel.org/lkml/20240424165646.1625690-2-dtatulea@nvidia.com/

The reverted patch interacts very badly with commit 2cc3aeb5eccc ("skbuff:
Fix a potential race while recycling page_pool packets"). The reverted
commit hopes that the pp_recycle + is_pp_page variables do not change
between the skb_frag_ref and skb_frag_unref operation. If such a change
occurs, the skb_frag_ref/unref will not operate on the same reference type.
In the case of Dragos's report, the grabbed ref was a pp ref, but the unref
was a page ref, because the pp_recycle setting on the skb was changed.

Attempting to fix this issue on the fly is risky. Lets revert and I hope
to reland this with better understanding and testing to ensure we don't
regress some edge case while streamlining skb reffing.

Fixes: a580ea994fd3 ("net: mirror skb frag ref/unref helpers")
Reported-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Link: https://lore.kernel.org/r/20240502175423.2456544-1-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Mina Almasry and committed by
Jakub Kicinski
173e7622 5bfadc57

+51 -44
+1 -1
drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
··· 1659 1659 for (i = 0; i < record->num_frags; i++) { 1660 1660 skb_shinfo(nskb)->frags[i] = record->frags[i]; 1661 1661 /* increase the frag ref count */ 1662 - __skb_frag_ref(&skb_shinfo(nskb)->frags[i], nskb->pp_recycle); 1662 + __skb_frag_ref(&skb_shinfo(nskb)->frags[i]); 1663 1663 } 1664 1664 1665 1665 skb_shinfo(nskb)->nr_frags = record->num_frags;
+2 -2
drivers/net/ethernet/sun/cassini.c
··· 2000 2000 skb->len += hlen - swivel; 2001 2001 2002 2002 skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel); 2003 - __skb_frag_ref(frag, skb->pp_recycle); 2003 + __skb_frag_ref(frag); 2004 2004 2005 2005 /* any more data? */ 2006 2006 if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { ··· 2024 2024 frag++; 2025 2025 2026 2026 skb_frag_fill_page_desc(frag, page->buffer, 0, hlen); 2027 - __skb_frag_ref(frag, skb->pp_recycle); 2027 + __skb_frag_ref(frag); 2028 2028 RX_USED_ADD(page, hlen + cp->crc_size); 2029 2029 } 2030 2030
+1 -1
drivers/net/veth.c
··· 717 717 return; 718 718 719 719 for (i = 0; i < sinfo->nr_frags; i++) 720 - __skb_frag_ref(&sinfo->frags[i], false); 720 + __skb_frag_ref(&sinfo->frags[i]); 721 721 } 722 722 723 723 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
+4 -35
include/linux/skbuff_ref.h
··· 8 8 #define _LINUX_SKBUFF_REF_H 9 9 10 10 #include <linux/skbuff.h> 11 - #include <net/page_pool/helpers.h> 12 - 13 - #ifdef CONFIG_PAGE_POOL 14 - static inline bool is_pp_page(struct page *page) 15 - { 16 - return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; 17 - } 18 - 19 - static inline bool napi_pp_get_page(struct page *page) 20 - { 21 - page = compound_head(page); 22 - 23 - if (!is_pp_page(page)) 24 - return false; 25 - 26 - page_pool_ref_page(page); 27 - return true; 28 - } 29 - #endif 30 - 31 - static inline void skb_page_ref(struct page *page, bool recycle) 32 - { 33 - #ifdef CONFIG_PAGE_POOL 34 - if (recycle && napi_pp_get_page(page)) 35 - return; 36 - #endif 37 - get_page(page); 38 - } 39 11 40 12 /** 41 13 * __skb_frag_ref - take an addition reference on a paged fragment. 42 14 * @frag: the paged fragment 43 - * @recycle: skb->pp_recycle param of the parent skb. False if no parent skb. 44 15 * 45 - * Takes an additional reference on the paged fragment @frag. Obtains the 46 - * correct reference count depending on whether skb->pp_recycle is set and 47 - * whether the frag is a page pool frag. 16 + * Takes an additional reference on the paged fragment @frag. 48 17 */ 49 - static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle) 18 + static inline void __skb_frag_ref(skb_frag_t *frag) 50 19 { 51 - skb_page_ref(skb_frag_page(frag), recycle); 20 + get_page(skb_frag_page(frag)); 52 21 } 53 22 54 23 /** ··· 29 60 */ 30 61 static inline void skb_frag_ref(struct sk_buff *skb, int f) 31 62 { 32 - __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle); 63 + __skb_frag_ref(&skb_shinfo(skb)->frags[f]); 33 64 } 34 65 35 66 bool napi_pp_put_page(struct page *page);
+42 -4
net/core/skbuff.c
··· 904 904 skb_get(list); 905 905 } 906 906 907 + static bool is_pp_page(struct page *page) 908 + { 909 + return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; 910 + } 911 + 907 912 int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, 908 913 unsigned int headroom) 909 914 { ··· 1028 1023 if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) 1029 1024 return false; 1030 1025 return napi_pp_put_page(virt_to_page(data)); 1026 + } 1027 + 1028 + /** 1029 + * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb 1030 + * @skb: page pool aware skb 1031 + * 1032 + * Increase the fragment reference count (pp_ref_count) of a skb. This is 1033 + * intended to gain fragment references only for page pool aware skbs, 1034 + * i.e. when skb->pp_recycle is true, and not for fragments in a 1035 + * non-pp-recycling skb. It has a fallback to increase references on normal 1036 + * pages, as page pool aware skbs may also have normal page fragments. 1037 + */ 1038 + static int skb_pp_frag_ref(struct sk_buff *skb) 1039 + { 1040 + struct skb_shared_info *shinfo; 1041 + struct page *head_page; 1042 + int i; 1043 + 1044 + if (!skb->pp_recycle) 1045 + return -EINVAL; 1046 + 1047 + shinfo = skb_shinfo(skb); 1048 + 1049 + for (i = 0; i < shinfo->nr_frags; i++) { 1050 + head_page = compound_head(skb_frag_page(&shinfo->frags[i])); 1051 + if (likely(is_pp_page(head_page))) 1052 + page_pool_ref_page(head_page); 1053 + else 1054 + page_ref_inc(head_page); 1055 + } 1056 + return 0; 1031 1057 } 1032 1058 1033 1059 static void skb_kfree_head(void *head, unsigned int end_offset) ··· 4196 4160 to++; 4197 4161 4198 4162 } else { 4199 - __skb_frag_ref(fragfrom, skb->pp_recycle); 4163 + __skb_frag_ref(fragfrom); 4200 4164 skb_frag_page_copy(fragto, fragfrom); 4201 4165 skb_frag_off_copy(fragto, fragfrom); 4202 4166 skb_frag_size_set(fragto, todo); ··· 4846 4810 } 4847 4811 4848 4812 *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; 4849 - __skb_frag_ref(nskb_frag, nskb->pp_recycle); 4813 + __skb_frag_ref(nskb_frag); 4850 4814 size = skb_frag_size(nskb_frag); 4851 4815 4852 4816 if (pos < offset) { ··· 5977 5941 /* if the skb is not cloned this does nothing 5978 5942 * since we set nr_frags to 0. 5979 5943 */ 5980 - for (i = 0; i < from_shinfo->nr_frags; i++) 5981 - __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle); 5944 + if (skb_pp_frag_ref(from)) { 5945 + for (i = 0; i < from_shinfo->nr_frags; i++) 5946 + __skb_frag_ref(&from_shinfo->frags[i]); 5947 + } 5982 5948 5983 5949 to->truesize += delta; 5984 5950 to->len += len;
+1 -1
net/tls/tls_device_fallback.c
··· 278 278 for (i = 0; remaining > 0; i++) { 279 279 skb_frag_t *frag = &record->frags[i]; 280 280 281 - __skb_frag_ref(frag, false); 281 + __skb_frag_ref(frag); 282 282 sg_set_page(sg_in + i, skb_frag_page(frag), 283 283 skb_frag_size(frag), skb_frag_off(frag)); 284 284