Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Use kmalloc_nolock() in bpf streams

BPF stream kfuncs need to be non-sleeping as they can be called from
programs running in any context, this requires a way to allocate memory
from any context. Currently, this is done by a custom per-CPU NMI-safe
bump allocation mechanism, backed by alloc_pages_nolock() and
free_pages_nolock() primitives.

As kmalloc_nolock() and kfree_nolock() primitives are available now, the
custom allocator can be removed in favor of these.

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20251023161448.4263-1-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Puranjay Mohan and committed by
Alexei Starovoitov
5701d5ae d28c0e49

+8 -151
+8 -151
kernel/bpf/stream.c
··· 4 4 #include <linux/bpf.h> 5 5 #include <linux/filter.h> 6 6 #include <linux/bpf_mem_alloc.h> 7 - #include <linux/percpu.h> 8 - #include <linux/refcount.h> 9 7 #include <linux/gfp.h> 10 8 #include <linux/memory.h> 11 - #include <linux/local_lock.h> 12 9 #include <linux/mutex.h> 13 - 14 - /* 15 - * Simple per-CPU NMI-safe bump allocation mechanism, backed by the NMI-safe 16 - * try_alloc_pages()/free_pages_nolock() primitives. We allocate a page and 17 - * stash it in a local per-CPU variable, and bump allocate from the page 18 - * whenever items need to be printed to a stream. Each page holds a global 19 - * atomic refcount in its first 4 bytes, and then records of variable length 20 - * that describe the printed messages. Once the global refcount has dropped to 21 - * zero, it is a signal to free the page back to the kernel's page allocator, 22 - * given all the individual records in it have been consumed. 23 - * 24 - * It is possible the same page is used to serve allocations across different 25 - * programs, which may be consumed at different times individually, hence 26 - * maintaining a reference count per-page is critical for correct lifetime 27 - * tracking. 28 - * 29 - * The bpf_stream_page code will be replaced to use kmalloc_nolock() once it 30 - * lands. 31 - */ 32 - struct bpf_stream_page { 33 - refcount_t ref; 34 - u32 consumed; 35 - char buf[]; 36 - }; 37 - 38 - /* Available room to add data to a refcounted page. */ 39 - #define BPF_STREAM_PAGE_SZ (PAGE_SIZE - offsetofend(struct bpf_stream_page, consumed)) 40 - 41 - static DEFINE_PER_CPU(local_trylock_t, stream_local_lock) = INIT_LOCAL_TRYLOCK(stream_local_lock); 42 - static DEFINE_PER_CPU(struct bpf_stream_page *, stream_pcpu_page); 43 - 44 - static bool bpf_stream_page_local_lock(unsigned long *flags) 45 - { 46 - return local_trylock_irqsave(&stream_local_lock, *flags); 47 - } 48 - 49 - static void bpf_stream_page_local_unlock(unsigned long *flags) 50 - { 51 - local_unlock_irqrestore(&stream_local_lock, *flags); 52 - } 53 - 54 - static void bpf_stream_page_free(struct bpf_stream_page *stream_page) 55 - { 56 - struct page *p; 57 - 58 - if (!stream_page) 59 - return; 60 - p = virt_to_page(stream_page); 61 - free_pages_nolock(p, 0); 62 - } 63 - 64 - static void bpf_stream_page_get(struct bpf_stream_page *stream_page) 65 - { 66 - refcount_inc(&stream_page->ref); 67 - } 68 - 69 - static void bpf_stream_page_put(struct bpf_stream_page *stream_page) 70 - { 71 - if (refcount_dec_and_test(&stream_page->ref)) 72 - bpf_stream_page_free(stream_page); 73 - } 74 - 75 - static void bpf_stream_page_init(struct bpf_stream_page *stream_page) 76 - { 77 - refcount_set(&stream_page->ref, 1); 78 - stream_page->consumed = 0; 79 - } 80 - 81 - static struct bpf_stream_page *bpf_stream_page_replace(void) 82 - { 83 - struct bpf_stream_page *stream_page, *old_stream_page; 84 - struct page *page; 85 - 86 - page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0); 87 - if (!page) 88 - return NULL; 89 - stream_page = page_address(page); 90 - bpf_stream_page_init(stream_page); 91 - 92 - old_stream_page = this_cpu_read(stream_pcpu_page); 93 - if (old_stream_page) 94 - bpf_stream_page_put(old_stream_page); 95 - this_cpu_write(stream_pcpu_page, stream_page); 96 - return stream_page; 97 - } 98 - 99 - static int bpf_stream_page_check_room(struct bpf_stream_page *stream_page, int len) 100 - { 101 - int min = offsetof(struct bpf_stream_elem, str[0]); 102 - int consumed = stream_page->consumed; 103 - int total = BPF_STREAM_PAGE_SZ; 104 - int rem = max(0, total - consumed - min); 105 - 106 - /* Let's give room of at least 8 bytes. */ 107 - WARN_ON_ONCE(rem % 8 != 0); 108 - rem = rem < 8 ? 0 : rem; 109 - return min(len, rem); 110 - } 111 10 112 11 static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len) 113 12 { ··· 15 116 elem->consumed_len = 0; 16 117 } 17 118 18 - static struct bpf_stream_page *bpf_stream_page_from_elem(struct bpf_stream_elem *elem) 19 - { 20 - unsigned long addr = (unsigned long)elem; 21 - 22 - return (struct bpf_stream_page *)PAGE_ALIGN_DOWN(addr); 23 - } 24 - 25 - static struct bpf_stream_elem *bpf_stream_page_push_elem(struct bpf_stream_page *stream_page, int len) 26 - { 27 - u32 consumed = stream_page->consumed; 28 - 29 - stream_page->consumed += round_up(offsetof(struct bpf_stream_elem, str[len]), 8); 30 - return (struct bpf_stream_elem *)&stream_page->buf[consumed]; 31 - } 32 - 33 - static struct bpf_stream_elem *bpf_stream_page_reserve_elem(int len) 34 - { 35 - struct bpf_stream_elem *elem = NULL; 36 - struct bpf_stream_page *page; 37 - int room = 0; 38 - 39 - page = this_cpu_read(stream_pcpu_page); 40 - if (!page) 41 - page = bpf_stream_page_replace(); 42 - if (!page) 43 - return NULL; 44 - 45 - room = bpf_stream_page_check_room(page, len); 46 - if (room != len) 47 - page = bpf_stream_page_replace(); 48 - if (!page) 49 - return NULL; 50 - bpf_stream_page_get(page); 51 - room = bpf_stream_page_check_room(page, len); 52 - WARN_ON_ONCE(room != len); 53 - 54 - elem = bpf_stream_page_push_elem(page, room); 55 - bpf_stream_elem_init(elem, room); 56 - return elem; 57 - } 58 - 59 119 static struct bpf_stream_elem *bpf_stream_elem_alloc(int len) 60 120 { 61 121 const int max_len = ARRAY_SIZE((struct bpf_bprintf_buffers){}.buf); 62 122 struct bpf_stream_elem *elem; 63 - unsigned long flags; 123 + size_t alloc_size; 64 124 65 - BUILD_BUG_ON(max_len > BPF_STREAM_PAGE_SZ); 66 125 /* 67 126 * Length denotes the amount of data to be written as part of stream element, 68 127 * thus includes '\0' byte. We're capped by how much bpf_bprintf_buffers can ··· 29 172 if (len < 0 || len > max_len) 30 173 return NULL; 31 174 32 - if (!bpf_stream_page_local_lock(&flags)) 175 + alloc_size = offsetof(struct bpf_stream_elem, str[len]); 176 + elem = kmalloc_nolock(alloc_size, __GFP_ZERO, -1); 177 + if (!elem) 33 178 return NULL; 34 - elem = bpf_stream_page_reserve_elem(len); 35 - bpf_stream_page_local_unlock(&flags); 179 + 180 + bpf_stream_elem_init(elem, len); 181 + 36 182 return elem; 37 183 } 38 184 ··· 91 231 92 232 static void bpf_stream_free_elem(struct bpf_stream_elem *elem) 93 233 { 94 - struct bpf_stream_page *p; 95 - 96 - p = bpf_stream_page_from_elem(elem); 97 - bpf_stream_page_put(p); 234 + kfree_nolock(elem); 98 235 } 99 236 100 237 static void bpf_stream_free_list(struct llist_node *list)