Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/alloc_tag: clear codetag for pages allocated before page_ext initialization

Due to initialization ordering, page_ext is allocated and initialized
relatively late during boot. Some pages have already been allocated and
freed before page_ext becomes available, leaving their codetag
uninitialized.

A clear example is in init_section_page_ext(): alloc_page_ext() calls
kmemleak_alloc(). If the slab cache has no free objects, it falls back to
the buddy allocator to allocate memory. However, at this point page_ext
is not yet fully initialized, so these newly allocated pages have no
codetag set. These pages may later be reclaimed by KASAN, which causes
the warning to trigger when they are freed because their codetag ref is
still empty.

Use a global array to track pages allocated before page_ext is fully
initialized. The array size is fixed at 8192 entries, and will emit a
warning if this limit is exceeded. When page_ext initialization
completes, set their codetag to empty to avoid warnings when they are
freed later.

This warning is only observed with CONFIG_MEM_ALLOC_PROFILING_DEBUG=Y and
mem_profiling_compressed disabled:

[ 9.582133] ------------[ cut here ]------------
[ 9.582137] alloc_tag was not set
[ 9.582139] WARNING: ./include/linux/alloc_tag.h:164 at __pgalloc_tag_sub+0x40f/0x550, CPU#5: systemd/1
[ 9.582190] CPU: 5 UID: 0 PID: 1 Comm: systemd Not tainted 7.0.0-rc4 #1 PREEMPT(lazy)
[ 9.582192] Hardware name: Red Hat KVM, BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 9.582194] RIP: 0010:__pgalloc_tag_sub+0x40f/0x550
[ 9.582196] Code: 00 00 4c 29 e5 48 8b 05 1f 88 56 05 48 8d 4c ad 00 48 8d 2c c8 e9 87 fd ff ff 0f 0b 0f 0b e9 f3 fe ff ff 48 8d 3d 61 2f ed 03 <67> 48 0f b9 3a e9 b3 fd ff ff 0f 0b eb e4 e8 5e cd 14 02 4c 89 c7
[ 9.582197] RSP: 0018:ffffc9000001f940 EFLAGS: 00010246
[ 9.582200] RAX: dffffc0000000000 RBX: 1ffff92000003f2b RCX: 1ffff110200d806c
[ 9.582201] RDX: ffff8881006c0360 RSI: 0000000000000004 RDI: ffffffff9bc7b460
[ 9.582202] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff3a62324
[ 9.582203] R10: ffffffff9d311923 R11: 0000000000000000 R12: ffffea0004001b00
[ 9.582204] R13: 0000000000002000 R14: ffffea0000000000 R15: ffff8881006c0360
[ 9.582206] FS: 00007ffbbcf2d940(0000) GS:ffff888450479000(0000) knlGS:0000000000000000
[ 9.582208] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 9.582210] CR2: 000055ee3aa260d0 CR3: 0000000148b67005 CR4: 0000000000770ef0
[ 9.582211] PKRU: 55555554
[ 9.582212] Call Trace:
[ 9.582213] <TASK>
[ 9.582214] ? __pfx___pgalloc_tag_sub+0x10/0x10
[ 9.582216] ? check_bytes_and_report+0x68/0x140
[ 9.582219] __free_frozen_pages+0x2e4/0x1150
[ 9.582221] ? __free_slab+0xc2/0x2b0
[ 9.582224] qlist_free_all+0x4c/0xf0
[ 9.582227] kasan_quarantine_reduce+0x15d/0x180
[ 9.582229] __kasan_slab_alloc+0x69/0x90
[ 9.582232] kmem_cache_alloc_noprof+0x14a/0x500
[ 9.582234] do_getname+0x96/0x310
[ 9.582237] do_readlinkat+0x91/0x2f0
[ 9.582239] ? __pfx_do_readlinkat+0x10/0x10
[ 9.582240] ? get_random_bytes_user+0x1df/0x2c0
[ 9.582244] __x64_sys_readlinkat+0x96/0x100
[ 9.582246] do_syscall_64+0xce/0x650
[ 9.582250] ? __x64_sys_getrandom+0x13a/0x1e0
[ 9.582252] ? __pfx___x64_sys_getrandom+0x10/0x10
[ 9.582254] ? do_syscall_64+0x114/0x650
[ 9.582255] ? ksys_read+0xfc/0x1d0
[ 9.582258] ? __pfx_ksys_read+0x10/0x10
[ 9.582260] ? do_syscall_64+0x114/0x650
[ 9.582262] ? do_syscall_64+0x114/0x650
[ 9.582264] ? __pfx_fput_close_sync+0x10/0x10
[ 9.582266] ? file_close_fd_locked+0x178/0x2a0
[ 9.582268] ? __x64_sys_faccessat2+0x96/0x100
[ 9.582269] ? __x64_sys_close+0x7d/0xd0
[ 9.582271] ? do_syscall_64+0x114/0x650
[ 9.582273] ? do_syscall_64+0x114/0x650
[ 9.582275] ? clear_bhb_loop+0x50/0xa0
[ 9.582277] ? clear_bhb_loop+0x50/0xa0
[ 9.582279] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 9.582280] RIP: 0033:0x7ffbbda345ee
[ 9.582282] Code: 0f 1f 40 00 48 8b 15 29 38 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff c3 0f 1f 40 00 f3 0f 1e fa 49 89 ca b8 0b 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fa 37 0d 00 f7 d8 64 89 01 48
[ 9.582284] RSP: 002b:00007ffe2ad8de58 EFLAGS: 00000202 ORIG_RAX: 000000000000010b
[ 9.582286] RAX: ffffffffffffffda RBX: 000055ee3aa25570 RCX: 00007ffbbda345ee
[ 9.582287] RDX: 000055ee3aa25570 RSI: 00007ffe2ad8dee0 RDI: 00000000ffffff9c
[ 9.582288] RBP: 0000000000001000 R08: 0000000000000003 R09: 0000000000001001
[ 9.582289] R10: 0000000000001000 R11: 0000000000000202 R12: 0000000000000033
[ 9.582290] R13: 00007ffe2ad8dee0 R14: 00000000ffffff9c R15: 00007ffe2ad8deb0
[ 9.582292] </TASK>
[ 9.582293] ---[ end trace 0000000000000000 ]---

Link: https://lore.kernel.org/20260331081312.123719-1-hao.ge@linux.dev
Fixes: dcfe378c81f72 ("lib: introduce support for page allocation tagging")
Signed-off-by: Hao Ge <hao.ge@linux.dev>
Suggested-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Hao Ge and committed by
Andrew Morton
6b184277 d14514c6

+121 -2
+2
include/linux/alloc_tag.h
··· 163 163 { 164 164 WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n"); 165 165 } 166 + void alloc_tag_add_early_pfn(unsigned long pfn); 166 167 #else 167 168 static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {} 168 169 static inline void alloc_tag_sub_check(union codetag_ref *ref) {} 170 + static inline void alloc_tag_add_early_pfn(unsigned long pfn) {} 169 171 #endif 170 172 171 173 /* Caller should verify both ref and tag to be valid */
+1 -1
include/linux/pgalloc_tag.h
··· 181 181 182 182 if (get_page_tag_ref(page, &ref, &handle)) { 183 183 alloc_tag_sub_check(&ref); 184 - if (ref.ct) 184 + if (ref.ct && !is_codetag_empty(&ref)) 185 185 tag = ct_to_alloc_tag(ref.ct); 186 186 put_page_tag_ref(handle); 187 187 }
+109
lib/alloc_tag.c
··· 6 6 #include <linux/kallsyms.h> 7 7 #include <linux/module.h> 8 8 #include <linux/page_ext.h> 9 + #include <linux/pgalloc_tag.h> 9 10 #include <linux/proc_fs.h> 11 + #include <linux/rcupdate.h> 10 12 #include <linux/seq_buf.h> 11 13 #include <linux/seq_file.h> 12 14 #include <linux/string_choices.h> ··· 760 758 return mem_profiling_support; 761 759 } 762 760 761 + #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG 762 + /* 763 + * Track page allocations before page_ext is initialized. 764 + * Some pages are allocated before page_ext becomes available, leaving 765 + * their codetag uninitialized. Track these early PFNs so we can clear 766 + * their codetag refs later to avoid warnings when they are freed. 767 + * 768 + * Early allocations include: 769 + * - Base allocations independent of CPU count 770 + * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init, 771 + * such as trace ring buffers, scheduler per-cpu data) 772 + * 773 + * For simplicity, we fix the size to 8192. 774 + * If insufficient, a warning will be triggered to alert the user. 775 + * 776 + * TODO: Replace fixed-size array with dynamic allocation using 777 + * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion. 778 + */ 779 + #define EARLY_ALLOC_PFN_MAX 8192 780 + 781 + static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata; 782 + static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0); 783 + 784 + static void __init __alloc_tag_add_early_pfn(unsigned long pfn) 785 + { 786 + int old_idx, new_idx; 787 + 788 + do { 789 + old_idx = atomic_read(&early_pfn_count); 790 + if (old_idx >= EARLY_ALLOC_PFN_MAX) { 791 + pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n", 792 + EARLY_ALLOC_PFN_MAX); 793 + return; 794 + } 795 + new_idx = old_idx + 1; 796 + } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx)); 797 + 798 + early_pfns[old_idx] = pfn; 799 + } 800 + 801 + typedef void alloc_tag_add_func(unsigned long pfn); 802 + static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata = 803 + RCU_INITIALIZER(__alloc_tag_add_early_pfn); 804 + 805 + void alloc_tag_add_early_pfn(unsigned long pfn) 806 + { 807 + alloc_tag_add_func *alloc_tag_add; 808 + 809 + if (static_key_enabled(&mem_profiling_compressed)) 810 + return; 811 + 812 + rcu_read_lock(); 813 + alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr); 814 + if (alloc_tag_add) 815 + alloc_tag_add(pfn); 816 + rcu_read_unlock(); 817 + } 818 + 819 + static void __init clear_early_alloc_pfn_tag_refs(void) 820 + { 821 + unsigned int i; 822 + 823 + if (static_key_enabled(&mem_profiling_compressed)) 824 + return; 825 + 826 + rcu_assign_pointer(alloc_tag_add_early_pfn_ptr, NULL); 827 + /* Make sure we are not racing with __alloc_tag_add_early_pfn() */ 828 + synchronize_rcu(); 829 + 830 + for (i = 0; i < atomic_read(&early_pfn_count); i++) { 831 + unsigned long pfn = early_pfns[i]; 832 + 833 + if (pfn_valid(pfn)) { 834 + struct page *page = pfn_to_page(pfn); 835 + union pgtag_ref_handle handle; 836 + union codetag_ref ref; 837 + 838 + if (get_page_tag_ref(page, &ref, &handle)) { 839 + /* 840 + * An early-allocated page could be freed and reallocated 841 + * after its page_ext is initialized but before we clear it. 842 + * In that case, it already has a valid tag set. 843 + * We should not overwrite that valid tag with CODETAG_EMPTY. 844 + * 845 + * Note: there is still a small race window between checking 846 + * ref.ct and calling set_codetag_empty(). We accept this 847 + * race as it's unlikely and the extra complexity of atomic 848 + * cmpxchg is not worth it for this debug-only code path. 849 + */ 850 + if (ref.ct) { 851 + put_page_tag_ref(handle); 852 + continue; 853 + } 854 + 855 + set_codetag_empty(&ref); 856 + update_page_tag_ref(handle, &ref); 857 + put_page_tag_ref(handle); 858 + } 859 + } 860 + 861 + } 862 + } 863 + #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */ 864 + static inline void __init clear_early_alloc_pfn_tag_refs(void) {} 865 + #endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ 866 + 763 867 static __init void init_page_alloc_tagging(void) 764 868 { 869 + clear_early_alloc_pfn_tag_refs(); 765 870 } 766 871 767 872 struct page_ext_operations page_alloc_tagging_ops = {
+9 -1
mm/page_alloc.c
··· 1252 1252 union pgtag_ref_handle handle; 1253 1253 union codetag_ref ref; 1254 1254 1255 - if (get_page_tag_ref(page, &ref, &handle)) { 1255 + if (likely(get_page_tag_ref(page, &ref, &handle))) { 1256 1256 alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); 1257 1257 update_page_tag_ref(handle, &ref); 1258 1258 put_page_tag_ref(handle); 1259 + } else { 1260 + /* 1261 + * page_ext is not available yet, record the pfn so we can 1262 + * clear the tag ref later when page_ext is initialized. 1263 + */ 1264 + alloc_tag_add_early_pfn(page_to_pfn(page)); 1265 + if (task->alloc_tag) 1266 + alloc_tag_set_inaccurate(task->alloc_tag); 1259 1267 } 1260 1268 } 1261 1269